Beispiel #1
0
 def close_spider(self,spider):
     if self.conn:
         mysql.close(self.conn)
     if os.path.exists(self.db_file):
         self.db.closeDb()
         shutil.move(self.db_file,self.dbfile_move_target)
     print 'total_item:%s'%self.total_cnt
     log.msg('time:%s, links: %s'%(self.time_stamp, self.total_cnt),level=log.INFO)
Beispiel #2
0
 def close_spider(self, spider):
     if self.conn:
         mysql.close(self.conn)
     if os.path.exists(self.db_file):
         self.db.closeDb()
         shutil.move(self.db_file, self.dbfile_move_target)
     print 'total_item:%s' % self.total_cnt
     log.msg('time:%s, links: %s' % (self.time_stamp, self.total_cnt),
             level=log.INFO)
Beispiel #3
0
 def process(self):
     datas = self.fetch_raw_data()
     res = dict()
     for data in datas:
         userid = data[0]
         res['DOCNO'] = userid
         res['author'] = process_content(data[1])
         res['userid'] = process_content(userid)
         res['abs'] = process_content(data[2])
         res['hot'] = self.fetch_hot_words(userid)
         res['site'] = self.site
         res['authenticate'] = ''
         self.write_to_file(res)
     mysql.close(self.conn)
     return self.total
Beispiel #4
0
 def process(self):
     datas = self.fetch_raw_data()
     res = dict()
     for data in datas:
         userid = data[0]
         res['DOCNO'] = userid
         res['author'] = process_content(data[1])
         res['userid'] = process_content(userid)
         res['abs'] = process_content(data[2])
         res['hot'] = self.fetch_hot_words(userid)
         res['site'] = self.site
         res['authenticate'] = ''
         self.write_to_file(res)
     mysql.close(self.conn)
     return self.total
 def fetch_query_results(self, query_string, num=200):
     index_path = "/disk1/kol_search_index/index"
     query_index = QueryEnvironment()
     query_index.addIndex(index_path)
     # 根据query_string查询结果
     # print query_string
     docs = query_index.runQuery(query_string, num)
     # 解析查询的结果
     results = get_query_results(query_index, docs)
     datas = {}
     flag = 0
     conn = ""
     now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     flag = 0
     for result in results:
         if flag >= 200:
             break
         data = {}
         userid = result["userid"]
         site = result["site"]
         relevance = process_relevance(result["relevance"])
         data.update({"userid": userid})
         data.update({"site": site})
         data.update({"relevance": relevance})
         datas.update({flag: data})
         flag += 1
     if datas:
         conn = mysql.connect("kol_search")
         results = json.dumps(datas)
         results = conn.escape_string(results)
         query = str(self.keyword) + "#$#" + str(self.site)
         sql = "insert into search_result_cache(query,result,update_time) values('%s','%s','%s');" % (
             query,
             results,
             now,
         )
         # print sql
         mysql.insert(conn, sql)
         mysql.commit(conn)
         mysql.close(conn)
         query_index.close()
     return datas
 def fetch_query_results(self, query_string, num=200):
     index_path = '/disk1/kol_search_index/index'
     query_index = QueryEnvironment()
     query_index.addIndex(index_path)
     #根据query_string查询结果
     #print query_string
     docs = query_index.runQuery(query_string, num)
     #解析查询的结果
     results = get_query_results(query_index, docs)
     datas = {}
     flag = 0
     conn = ''
     now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     flag = 0
     for result in results:
         if flag >= 200:
             break
         data = {}
         userid = result['userid']
         site = result['site']
         relevance = process_relevance(result['relevance'])
         data.update({'userid': userid})
         data.update({'site': site})
         data.update({'relevance': relevance})
         datas.update({flag: data})
         flag += 1
     if datas:
         conn = mysql.connect('kol_search')
         results = json.dumps(datas)
         results = conn.escape_string(results)
         query = str(self.keyword) + '#$#' + str(self.site)
         sql = "insert into search_result_cache(query,result,update_time) values('%s','%s','%s');" % (
             query, results, now)
         #print sql
         mysql.insert(conn, sql)
         mysql.commit(conn)
         mysql.close(conn)
         query_index.close()
     return datas
Beispiel #7
0
    def close_spider(self,spider):
#        mysql.close(self.conn)
        mysql.close(self.conn_local)
        if self.conn_weixin:
            mysql.close(self.conn_weixin)
Beispiel #8
0
def get_appnameid(appname):
    sql = 'select id from appname_maps where e_appname="%s";'%(appname)
    conn = mysql.connect('app_crawler', '192.168.241.17')
    id = mysql.query_one(conn, sql)[0]
    mysql.close(conn)
    return id