def save_es(en_name, result): bulk_action = [] count = 0 tb = time.time() for weibos in result: #try: source = weibos['_source'] action = {'index': {'_id': weibos['_id']}} bulk_action.extend([action, source]) count += 1 if count % 1000 == 0: weibo_es.bulk(bulk_action, index=en_name, doc_type=topic_index_type, timeout=100) bulk_action = [] print count if count % 10000 == 0: te = time.time() print "index 10000 per %s second" % (te - tb) tb = ts print "all done" if bulk_action: weibo_es.bulk(bulk_action, index=en_name, doc_type=topic_index_type, timeout=100) return 1
def gexf2es(indexname, value): bulk_action = [] action = {"index": {"_id": 1}} #print value source = json.dumps(value) bulk_action.extend([action, source]) es.bulk(bulk_action, index=indexname, doc_type='text', timeout=600)
def gexf2es(indexname, value): bulk_action = [] action = {"index":{"_id":1}} #print value source = json.dumps(value) bulk_action.extend([action,source]) es.bulk(bulk_action, index=indexname, doc_type='text', timeout=600)
def txt2es(filename, name): weibo = [] f = open(filename, 'r') i = 0 bulk_action = [] for line0 in f: line0 = json.loads(line0) #print line0[-1] count = 0 for line in line0: #weibo.append(line) #print line['_source']['mid'],type(line['_source']['mid']) action = {"index": {"_id": line['_source']['mid']}} source = line['_source'] count += 1 bulk_action.extend([action, source]) if count % 1000 == 0: print es.bulk(bulk_action, index=name, doc_type='text', timeout=600) bulk_action = [] print count #print len(bulk_action) print len(bulk_action) #print bulk_action #print es print name, type(name), name.decode('utf-8') print es.bulk(bulk_action, index=name, doc_type='text', timeout=600)
def txt2es(filename,name ): weibo = [] f = open(filename,'r') i = 0 bulk_action = [] for line0 in f: line0 = json.loads(line0) #print line0[-1] count = 0 for line in line0: #weibo.append(line) #print line['_source']['mid'],type(line['_source']['mid']) action = {"index":{"_id":line['_source']['mid']}} source = line['_source'] count += 1 bulk_action.extend([action,source]) if count % 1000 == 0: print es.bulk(bulk_action, index=name, doc_type='text', timeout=600) bulk_action = [] print count
def find_flow_texts_scan(start_ts, end_ts, topic, en_name, keywords): index_names = get_day_zero(start_ts, end_ts) if len(keywords) == 0: query_body = {'query': {'wildcard': {'text': '*' + topic + '*'}}} else: #keywords_list = [{'wildcard':{'text':'*'+topic+'*'}}] keywords_list = [] for i in keywords: print i keywords_list.append({'wildcard': {'text': '*' + i + '*'}}) query_body = { 'query': { 'bool': { 'should': keywords_list, 'minimum_should_match': '60%' } } } print query_body result = [] index_list = [] for index_name in index_names: index_list.append(flow_text_index_name_pre + index_name) s_re = scan(es_flow_text, index=index_list, doc_type=flow_text_index_type, query=query_body) bulk_action = [] count = 0 tb = time.time() while True: try: if count > 5000: break scan_re = s_re.next() _id = scan_re['_id'] source = scan_re['_source'] source['en_name'] = en_name action = {"index": {"_id": _id}} bulk_action.extend([action, source]) count += 1 if count % 1000 == 0: weibo_es.bulk(bulk_action, index=event_text, doc_type=event_text_type, timeout=100) bulk_action = [] print count if count % 10000 == 0: te = time.time() print "index 10000 per %s second" % (te - tb) tb = te except StopIteration: print "all done" if bulk_action: weibo_es.bulk(bulk_action, index=event_text, doc_type=event_text_type, timeout=100) return 1