def send_all(f, sender): count = 0 tb = time.time() ts = tb try: for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) if int(weibo_item_bin['sp_type']) != 1: continue sender.send_json(weibo_item_bin) count += 1 if count % 10000 == 0: te = time.time() print '[%s] deliver speed: %s sec/per %s' % (datetime.now( ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000) if count % 100000 == 0: print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % ( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count, te - tb, count / (te - tb)) time.sleep(2) ts = te except Exception, r: print Exception, r
def send_all(f, sender): count = 0 tb = time.time() ts = tb try: for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) sender.send_json(weibo_item_bin) count += 1 if count % 10000 == 0: te = time.time() print '[%s] deliver speed: %s sec/per %s' % (datetime.now( ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000) if count % 100000 == 0: print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % ( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count, te - tb, count / (te - tb)) #time.sleep(2.5) ts = te except: print 'error' total_cost = time.time() - tb return count, total_cost
def csv2reposts(): path = './csv/' files = os.listdir(path) print 'files count: ', len(files) count = 0 ts = te = time.time() for fname in files: f = open(path + fname, 'r') for line in f: count += 1 if count % 10000 == 0: te = time.time() print count, '%s sec' % (te - ts) ts = te itemdict = itemLine2Dict(line) if itemdict and itemdict['retweeted_mid'] and itemdict['retweeted_mid'] != '': retweeted_mid = itemdict['retweeted_mid'] retweeted_uid = itemdict['retweeted_uid'] mid = itemdict['_id'] user = itemdict['user'] timestamp = itemdict['timestamp'] key = str(retweeted_mid) try: reposts = json.loads(weibo_repost_bucket.Get(key)) reposts.extend([str(mid) + '_' + str(user) + '_' + str(timestamp)]) except KeyError: reposts = [str(mid) + '_' + str(user) + '_' + str(timestamp)] weibo_repost_bucket.Put(key, json.dumps(reposts)) f.close()
def send_all(f, sender): count = 0 count_send = 0 tb = time.time() ts = tb weibo_list = [] weibo_send = [] try: for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) if int(weibo_item_bin['sp_type']) != 1: continue sender.send_json(weibo_item_bin) count += 1 if count % 10000 == 0: te = time.time() print '[%s] read csv speed: %s sec/per %s' % (datetime.now( ).strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000) ts = te time.sleep(1) except: print "pass" total_cost = time.time() - tb return count, total_cost
def send_all(f, sender): count = 0 count_send = 0 tb = time.time() ts = tb weibo_list = [] weibo_send = [] try: for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) if int(weibo_item_bin['sp_type']) != 1: continue sender.send_json(weibo_item_bin) count += 1 if count % 10000 == 0: te = time.time() print '[%s] read csv speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000) print '[%s] total send filter weibo %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count_send, te - tb, count / (te - tb)) ts = te except: print "pass" total_cost = time.time() - tb return count, total_cost
def send_all(f, sender): count = 0 count_send = 0 tb = time.time() ts = tb weibo_list = [] weibo_send = [] for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) if int(weibo_item_bin['sp_type']) != 1: continue weibo_send.append(weibo_item_bin) weibo_list.append([weibo_item_bin['mid'], weibo_item_bin['text'].encode('utf-8')]) count += 1 if count % 10000 == 0: results_set = filter_ad(weibo_list) count_send = send_filter(results_set, weibo_send, count_send, sender) weibo_list = [] weibo_send = [] te = time.time() print '[%s] read csv speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000) print '[%s] total send filter weibo %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count_send, te - tb, count / (te - tb)) ts = te if weibo_list: results_set = filter_ad(weibo_list) count_send = send_filter(results_set, weibo_send, count_send, sender) total_cost = time.time() - tb return count_send, total_cost
def send_all(f, sender): count = 0 tb = time.time() ts = tb try: for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) if int(weibo_item_bin["sp_type"]) != 1: continue sender.send_json(weibo_item_bin) count += 1 if count % 10000 == 0: te = time.time() print "[%s] deliver speed: %s sec/per %s" % ( datetime.now().strftime("%Y-%m-%d %H:%M:%S"), te - ts, 10000, ) if count % 100000 == 0: print "[%s] total deliver %s, cost %s sec [avg %s per/sec]" % ( datetime.now().strftime("%Y-%m-%d %H:%M:%S"), count, te - tb, count / (te - tb), ) time.sleep(2) ts = te except Exception, r: print Exception, r
def send_all(f, sender): count = 0 tb = time.time() ts = tb try: for line in f: weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) sender.send_json(weibo_item_bin) count += 1 if count % 10000 == 0: te = time.time() print '[%s] deliver speed: %s sec/per %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), te - ts, 10000) if count % 100000 == 0: print '[%s] total deliver %s, cost %s sec [avg %s per/sec]' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), count, te - tb, count / (te - tb)) ts = te except Exception, e: pass
if count_n == 100: break uid_text = file('uid_text.csv', 'wb') writer = csv.writer(uid_text) count = 0 count_f = 0 file_list = set(os.listdir(CSV_FILE_PATH)) print "total file is ", len(file_list) for each in file_list: with open(os.path.join(CSV_FILE_PATH, each), 'rb') as f: try: for line in f: count_f += 1 weibo_item = itemLine2Dict(line) if weibo_item: weibo_item_bin = csv2bin(weibo_item) if int(weibo_item_bin['sp_type']) != 1: continue if not str(weibo_item_bin['uid']) in uid_set: continue text = weibo_item_bin['text'] if weibo_item_bin['message_type'] == 1: write_text = text elif weibo_item_bin['message_type'] == 2: temp = text.split('//@')[0].split(':')[1:] write_text = ''.join(temp) else: continue item = [str(weibo_item_bin['uid']), write_text]
def csv_input_pre_func(item): item = itemLine2Dict(item) return item