def scan_es2redis(): count = 0 s_re = scan(es_user_portrait, query={'query':{'match_all':{}}, 'size':1000}, index=portrait_index_name, doc_type=portrait_index_type) start_ts = time.time() user_list = [] user_info = {} while True: try: scan_re = s_re.next()['_source'] count += 1 uid = scan_re['uid'] user_info[uid] = {'activity_geo_dict':scan_re['activity_geo_dict']} update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) user_info = {} if count % 1000==0 and count!=0: end_ts = time.time() print '%s sec count 1000' % (end_ts - start_ts) start_ts = end_ts except StopIteration: print 'all done' if user_info: update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) user_info = {} break except Exception, r: raise r break
def scan_es2redis_day(): count = 0 s_re = scan(es_user_portrait, query={'query':{'match_all':{}}, 'size':1000}, index=portrait_index_name, doc_type=portrait_index_type) start_ts = time.time() user_info = {} while True: try: scan_re = s_re.next()['_source'] count += 1 uid = scan_re['uid'] user_info[uid] = {'activity_geo_dict':scan_re['activity_geo_dict']} user_info[uid].update({'sensitive_activity_geo_dict':scan_re['sensitive_activity_geo_dict']}) update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) user_info = {} #log_should_delete if count % 1000==0 and count!=0: end_ts = time.time() print '%s sec count 1000' % (end_ts - start_ts) start_ts = end_ts #log_should_delete except StopIteration: if user_info: update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) user_info = {} break except Exception, r: raise r break
if count % 1000==0 and count!=0: end_ts = time.time() print '%s sec count 1000' % (end_ts - start_ts) start_ts = end_ts except StopIteration: print 'all done' if user_info: update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) user_info = {} break except Exception, r: raise r break if user_info: update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) print 'count:', count #scan es to redis as a queue for update_week #write in version: 15-12-08 #order time task for every week def scan_es2redis_week(): count = 0 s_re = scan(es_user_portrait, query={'query':{'match_all': {}}, 'size':1000}, index=portrait_index_name, doc_type=portrait_index_type) user_info = {} start_ts = time.time() while True: try: scan_re = s_re.next()['_source']
#log_should_delete if count % 1000 == 0 and count != 0: end_ts = time.time() print '%s sec count 1000' % (end_ts - start_ts) start_ts = end_ts #log_should_delete except StopIteration: if user_info: update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) user_info = {} break except Exception, r: raise r break if user_info: update_day_redis.lpush(UPDATE_DAY_REDIS_KEY, json.dumps(user_info)) if __name__ == '__main__': log_time_ts = time.time() log_time_date = ts2datetime(log_time_ts) print 'cron/text_attribute/scan_es2redis_day.py&start&' + log_time_date scan_es2redis_day() log_time_ts = time.time() log_time_date = ts2datetime(log_time_ts) print 'cron/text_attribute/scan_es2redis_day.py&end&' + log_time_date