def process(): conn_old = mysql.connect('bsppr', '192.168.241.7') mysql.insert(conn_old, 'set names utf8') conn_new = mysql.connect('bsppr', '192.168.241.32') mysql.insert(conn_new, 'set names utf8') cinfos_old = get_cinfos(conn_old) #print cinfos_old cinfos_new = get_cinfos(conn_new) mongo_conn = get_mongo_conn() tablename = 'weixin' tmpdatas = mongo.find(mongo_conn, tablename, {}, 50) rawdatas = [] for raw in tmpdatas: date = raw['pubtime'] now = datetime.datetime.now() diff = now - date print diff.days rawdatas.append(raw) if len(rawdatas) == 0: time.sleep(10) raw_old_qualified = filter(cinfos_old, rawdatas) old_insert_num = feed_xpost.feed_data_to_xpost(conn_old, raw_old_qualified, 'old') raw_new_qualified = filter(cinfos_new, rawdatas) new_insert_num = feed_xpost.feed_data_to_xpost(conn_new, raw_new_qualified, 'new')
def process(): conn_old = mysql.connect('bsppr', '192.168.241.7') mysql.insert(conn_old,'set names utf8') conn_new = mysql.connect('bsppr', '192.168.241.32') mysql.insert(conn_new,'set names utf8') cinfos_old = get_cinfos_moa(conn_old) cinfos_new = get_cinfos_moa(conn_new) mongo_conn = get_mongo_conn() tablename = 'weixin' while True: tmpdatas = mongo.find(mongo_conn, tablename, {},1000) rawdatas = [] for raw in tmpdatas: url = raw['url'] mongo.delete(mongo_conn, tablename, {'url':url}) date = raw['pubtime'] now = datetime.datetime.now() diff = now - date if diff.days>2: continue rawdatas.append(raw) if len(rawdatas)==0: print 'wait datas...' time.sleep(300) raw_old_qualified = filter(cinfos_old,rawdatas) if raw_old_qualified: old_insert_num = feed_xpost.feed_data_to_xpost(conn_old, raw_old_qualified,'old') print 'old_insert_num: ',old_insert_num raw_new_qualified = filter(cinfos_new,rawdatas) if raw_new_qualified: new_insert_num = feed_xpost.feed_data_to_xpost(conn_new, raw_new_qualified,'new') print 'new_insert_num: ',new_insert_num
def __init__(self): self.conn = mysql.connect('mx_kol') self.flag = 1 timetag = datetime.datetime.now().strftime('%Y%m%d%H%M%S') self.f = open( '%s/kol_search_%s_%s.dat' % (self.data_path, self.filetag, timetag), 'w') self.total = 0
def connect(self): self.conn = mysql.connect('app_crawler',host='192.168.241.17') self.cursor = self.conn.cursor() self.conn.set_character_set('utf8') self.cursor.execute('set names utf8mb4') self.cursor.execute('SET CHARACTER SET utf8;') self.cursor.execute('SET character_set_connection=utf8;') self.cursor.execute('set interactive_timeout=24*3600;') self.cursor.execute('set wait_timeout=24*3600;')
def connect(self): self.conn = mysql.connect('app_crawler', host='192.168.241.17') self.cursor = self.conn.cursor() self.conn.set_character_set('utf8') self.cursor.execute('set names utf8mb4') self.cursor.execute('SET CHARACTER SET utf8;') self.cursor.execute('SET character_set_connection=utf8;') self.cursor.execute('set interactive_timeout=24*3600;') self.cursor.execute('set wait_timeout=24*3600;')
def process(terrace): if terrace=='new': conn = mysql.connect('bsppr', '192.168.241.32') else: conn = mysql.connect('bsppr', '192.168.241.7') mysql.insert(conn,'set names utf8') mongo_conn = get_mongo_conn() tablename = 'comgeneral' while True: total = 0 before_total = 0 raw_total = 0 tmpdatas = mongo.find(mongo_conn, tablename, {'terrace':terrace},200) print '...%s get data ...'%(terrace) rawdatas = [] for raw in tmpdatas: raw_total += 1 url = raw['url'] mongo.delete(mongo_conn, tablename, {'url':url}) datestr = raw['pubtime'] if not datestr: continue updatetime = raw.get('updatetime',time.time) try: date = format_time(datestr,updatetime) #open(self.terrace,'a+').write('%s\n'%str(date)) now = datetime.datetime.now() diff = now - date except: open('date_error.dat','a+').write('%s\t%s\n'%(datestr,url)) continue if diff.days>2: continue raw.update({'pubtime':date}) rawdatas.append(raw) before_total += 1 if tmpdatas.count()==0: break insert_num = feed_xpost.feed_data_to_xpost(conn, rawdatas,terrace) total += insert_num open('total_%s.dat'%(terrace),'a+').write('%s\t%s\n'%(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),total)) open('raw_total_%s.dat'%(terrace),'a+').write('%s\t%s\n'%(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),raw_total)) open('before_total_%s.dat'%(terrace),'a+').write('%s\t%s\n'%(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),before_total))
def __init__(self): self.conn = mysql.connect('coopinion',host='192.168.241.31') self.cursor = self.conn.cursor() self.conn.set_character_set('utf8') self.cursor.execute('set names utf8mb4') self.cursor.execute('SET CHARACTER SET utf8;') self.cursor.execute('SET character_set_connection=utf8;') self.cursor.execute('set interactive_timeout=24*3600;') self.cursor.execute('set wait_timeout=24*3600;') self.conn_weixin = '' self.conn_local = mysql.connect('meta',host='localhost') self.conn_local_cursor = self.conn_local.cursor() self.conn_local.set_character_set('utf8') self.conn_local_cursor.execute('set names utf8mb4') self.conn_local_cursor.execute('SET CHARACTER SET utf8;') self.conn_local_cursor.execute('SET character_set_connection=utf8;') self.conn_local_cursor.execute('set interactive_timeout=24*3600;') self.conn_local_cursor.execute('set wait_timeout=24*3600;') self.conn_local_cursor.execute('set innodb_lock_wait_timeout=1000;') # self.conn_local_cursor.execute('set global autocommit=1') try: self.meta_ip = get_meta_ip(network_card='enp7s0') except: self.meta_ip = get_meta_ip(network_card='eth0')
def process(): conn_old = mysql.connect('bsppr', '192.168.241.7') mysql.insert(conn_old,'set names utf8') conn_new = mysql.connect('bsppr', '192.168.241.32') mysql.insert(conn_new,'set names utf8') cinfos_old = get_cinfos(conn_old) #print cinfos_old cinfos_new = get_cinfos(conn_new) mongo_conn = get_mongo_conn() tablename = 'weixin' tmpdatas = mongo.find(mongo_conn, tablename, {},50) rawdatas = [] for raw in tmpdatas: date = raw['pubtime'] now = datetime.datetime.now() diff = now - date print diff.days rawdatas.append(raw) if len(rawdatas)==0: time.sleep(10) raw_old_qualified = filter(cinfos_old,rawdatas) old_insert_num = feed_xpost.feed_data_to_xpost(conn_old, raw_old_qualified,'old') raw_new_qualified = filter(cinfos_new,rawdatas) new_insert_num = feed_xpost.feed_data_to_xpost(conn_new, raw_new_qualified,'new')
def mx_kol(self,result): if not self.conn_weixin: self.conn_weixin = mysql.connect('mx_kol', host='192.168.241.29') self.weixin_cousor = self.conn_weixin.cursor() self.conn_weixin.set_character_set('utf8') self.weixin_cousor.execute('set names utf8mb4') self.weixin_cousor.execute('SET CHARACTER SET utf8;') self.weixin_cousor.execute('SET character_set_connection=utf8;') openid = process_openid(result['id']) url = result['url'] author = result.get('author','') tencent_id = get_tencent_id(url) is_v = 0 sql = "insert into weixin_user_info(userid,openid,tencent_id,screen_name,is_v) values('%s','%s','%s','%s',%d) on duplicate key update screen_name='%s';" sql = sql%(openid,openid,tencent_id,author,is_v,author) self.weixin_cousor.execute(sql) self.conn_weixin.commit()
def fetch_query_results(self, query_string, num=200): index_path = "/disk1/kol_search_index/index" query_index = QueryEnvironment() query_index.addIndex(index_path) # 根据query_string查询结果 # print query_string docs = query_index.runQuery(query_string, num) # 解析查询的结果 results = get_query_results(query_index, docs) datas = {} flag = 0 conn = "" now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") flag = 0 for result in results: if flag >= 200: break data = {} userid = result["userid"] site = result["site"] relevance = process_relevance(result["relevance"]) data.update({"userid": userid}) data.update({"site": site}) data.update({"relevance": relevance}) datas.update({flag: data}) flag += 1 if datas: conn = mysql.connect("kol_search") results = json.dumps(datas) results = conn.escape_string(results) query = str(self.keyword) + "#$#" + str(self.site) sql = "insert into search_result_cache(query,result,update_time) values('%s','%s','%s');" % ( query, results, now, ) # print sql mysql.insert(conn, sql) mysql.commit(conn) mysql.close(conn) query_index.close() return datas
def fetch_query_results(self, query_string, num=200): index_path = '/disk1/kol_search_index/index' query_index = QueryEnvironment() query_index.addIndex(index_path) #根据query_string查询结果 #print query_string docs = query_index.runQuery(query_string, num) #解析查询的结果 results = get_query_results(query_index, docs) datas = {} flag = 0 conn = '' now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') flag = 0 for result in results: if flag >= 200: break data = {} userid = result['userid'] site = result['site'] relevance = process_relevance(result['relevance']) data.update({'userid': userid}) data.update({'site': site}) data.update({'relevance': relevance}) datas.update({flag: data}) flag += 1 if datas: conn = mysql.connect('kol_search') results = json.dumps(datas) results = conn.escape_string(results) query = str(self.keyword) + '#$#' + str(self.site) sql = "insert into search_result_cache(query,result,update_time) values('%s','%s','%s');" % ( query, results, now) #print sql mysql.insert(conn, sql) mysql.commit(conn) mysql.close(conn) query_index.close() return datas
def get_conn(): conn_new = mysql.connect('bsppr', '192.168.241.32') conn_old = mysql.connect('bsppr', '192.168.241.7') return conn_new, conn_old
def get_conn(): conn_new = mysql.connect('bsppr', '192.168.241.32') conn_old = mysql.connect('bsppr', '192.168.241.7') return conn_new,conn_old
import mysql_api as mysql conn_local = mysql.connect('meta',host='localhost') conn_local_cursor = conn_local.cursor() conn_local.set_character_set('utf8') conn_local_cursor.execute('set names utf8mb4') conn_local_cursor.execute('SET CHARACTER SET utf8;') conn_local_cursor.execute('SET character_set_connection=utf8;') conn_local_cursor.execute('set interactive_timeout=24*3600;') conn_local_cursor.execute('set wait_timeout=24*3600;') conn_local_cursor.execute('set innodb_lock_wait_timeout=1000;') sql = 'select * from meta_result limit 1;' conn_local_cursor.execute(sql) print conn_local_cursor.fetchall() conn_local.close()
def __init__(self): self.conn = mysql.connect('mx_kol') self.flag = 1 timetag = datetime.datetime.now().strftime('%Y%m%d%H%M%S') self.f = open('%s/kol_search_%s_%s.dat'%(self.data_path,self.filetag,timetag),'w') self.total = 0
def get_appnameid(appname): sql = 'select id from appname_maps where e_appname="%s";'%(appname) conn = mysql.connect('app_crawler', '192.168.241.17') id = mysql.query_one(conn, sql)[0] mysql.close(conn) return id