def close_spider(self,spider): if self.conn: mysql.close(self.conn) if os.path.exists(self.db_file): self.db.closeDb() shutil.move(self.db_file,self.dbfile_move_target) print 'total_item:%s'%self.total_cnt log.msg('time:%s, links: %s'%(self.time_stamp, self.total_cnt),level=log.INFO)
def close_spider(self, spider): if self.conn: mysql.close(self.conn) if os.path.exists(self.db_file): self.db.closeDb() shutil.move(self.db_file, self.dbfile_move_target) print 'total_item:%s' % self.total_cnt log.msg('time:%s, links: %s' % (self.time_stamp, self.total_cnt), level=log.INFO)
def process(self): datas = self.fetch_raw_data() res = dict() for data in datas: userid = data[0] res['DOCNO'] = userid res['author'] = process_content(data[1]) res['userid'] = process_content(userid) res['abs'] = process_content(data[2]) res['hot'] = self.fetch_hot_words(userid) res['site'] = self.site res['authenticate'] = '' self.write_to_file(res) mysql.close(self.conn) return self.total
def fetch_query_results(self, query_string, num=200): index_path = "/disk1/kol_search_index/index" query_index = QueryEnvironment() query_index.addIndex(index_path) # 根据query_string查询结果 # print query_string docs = query_index.runQuery(query_string, num) # 解析查询的结果 results = get_query_results(query_index, docs) datas = {} flag = 0 conn = "" now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") flag = 0 for result in results: if flag >= 200: break data = {} userid = result["userid"] site = result["site"] relevance = process_relevance(result["relevance"]) data.update({"userid": userid}) data.update({"site": site}) data.update({"relevance": relevance}) datas.update({flag: data}) flag += 1 if datas: conn = mysql.connect("kol_search") results = json.dumps(datas) results = conn.escape_string(results) query = str(self.keyword) + "#$#" + str(self.site) sql = "insert into search_result_cache(query,result,update_time) values('%s','%s','%s');" % ( query, results, now, ) # print sql mysql.insert(conn, sql) mysql.commit(conn) mysql.close(conn) query_index.close() return datas
def fetch_query_results(self, query_string, num=200): index_path = '/disk1/kol_search_index/index' query_index = QueryEnvironment() query_index.addIndex(index_path) #根据query_string查询结果 #print query_string docs = query_index.runQuery(query_string, num) #解析查询的结果 results = get_query_results(query_index, docs) datas = {} flag = 0 conn = '' now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') flag = 0 for result in results: if flag >= 200: break data = {} userid = result['userid'] site = result['site'] relevance = process_relevance(result['relevance']) data.update({'userid': userid}) data.update({'site': site}) data.update({'relevance': relevance}) datas.update({flag: data}) flag += 1 if datas: conn = mysql.connect('kol_search') results = json.dumps(datas) results = conn.escape_string(results) query = str(self.keyword) + '#$#' + str(self.site) sql = "insert into search_result_cache(query,result,update_time) values('%s','%s','%s');" % ( query, results, now) #print sql mysql.insert(conn, sql) mysql.commit(conn) mysql.close(conn) query_index.close() return datas
def close_spider(self,spider): # mysql.close(self.conn) mysql.close(self.conn_local) if self.conn_weixin: mysql.close(self.conn_weixin)
def get_appnameid(appname): sql = 'select id from appname_maps where e_appname="%s";'%(appname) conn = mysql.connect('app_crawler', '192.168.241.17') id = mysql.query_one(conn, sql)[0] mysql.close(conn) return id