def store_results(self, data): """Store results back to ES.""" index_out = self._prep_index_name(self.config.ES_TARGET_INDEX) actions = [{"_index": index_out, "_type": "log", "_source": data[i]} for i in range(len(data))] helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)
def process_sessions(self): sessions = self.cassandra_session.execute(self.cassandra_sessions_stmt) counter = 0 batch = [] for s in sessions: self.l.debug("processing " + str(s)) if s['coordinator'] is not None: # filter out empty traces if s['request'] == 'Execute batch of CQL3 queries': s['query_type'] = 'BATCH' else: params = self.parse_params(s['parameters']) es_data = self.merge_two_dicts(params, s) es_data.pop('parameters', None) es_data['session_id'] = str(es_data['session_id']) es_data['cluster'] = self.cluster_name batch.append({ '_index': self.es_index_name, '_type': self.es_session_doc_type, '_id': es_data['session_id'], '_source': es_data }) if len(batch) >= self.args.batch_size: counter = counter + 1 print(counter, batch[0]) helpers.bulk(self.es, batch) batch = []
def bulk_index_documents_to_es5(index_name, documents): try: bulk(es_client5, _prepare_docs_for_bulk_insert(documents), index=index_name, doc_type=GENERIC_DOC_TYPE, chunk_size=100) except TransportError5 as e: index_individual_docs(index_name, documents)
def bulk_index_documents_to_es5(index_name, doc_type, documents): try: bulk(es_client5, _prepare_docs_for_bulk_insert(documents), index=index_name, doc_type=doc_type, chunk_size=100) return "acknowledged", 200 except TransportError5 as e: print("Failed to bulk index the %s documents: %s", doc_type, str(e)) return str(e), e.status_code
def parse_dir(path, channel2id): channel_name = path.name channel = channel2id[channel_name] actions = [] for fname in path.iterdir(): for data in parse_file(fname, channel): act = {'_index': INDEX_NAME, '_type': TYPE_NAME} act['_source'] = data actions.append(act) print(channel_name, len(actions)) helpers.bulk(es, actions)
def process_events_csv(self, filename): with open(filename) as f: csvreader = csv.DictReader(f) counter = 0 batch = [] for row in csvreader: # extract time from UUID for indexing event_uuid = uuid.UUID(row['event_id']) event_dt = datetime_from_uuid1(event_uuid) #row['started_at'] = event_dt.strftime('%Y-%m-%d %H:%M:%S.%f') row['started_at'] = datetime_from_uuid1(event_uuid) # add cluster name to doc row['cluster'] = self.cluster_name batch.append({ '_index': self.es_index_name, '_type': self.es_event_doc_type, '_parent': row['session_id'], '_routing': row['session_id'], '_id': row['event_id'], '_source': row }) if len(batch) >= self.args.batch_size: counter = counter + 1 r = helpers.bulk(self.es, batch) print(counter, r) batch = []
def indexToElastic(es): analysisSettings = {} index = 'movielens' settings = { #A "settings": { "number_of_shards": 1, #B "index": { "analysis": analysisSettings, #C } } } es.indices.delete(index, ignore=[400, 404]) es.indices.create(index, body=json.dumps(settings)) helpers.bulk(es, userBaskets(), chunk_size=250)
def process_sessions_csv(self, filename): with open(filename) as f: sessions = csv.DictReader(f) counter = 0 batch = [] for s in sessions: self.l.debug("processing " + str(s)) if s['coordinator'] is not None and s[ 'coordinator'] is not '' and s[ 'parameters'] is not '': # filter out empty traces if s['request'] == 'Execute batch of CQL3 queries': s['query_type'] = 'BATCH' else: params_no_single = s['parameters'].replace("'", '"') #params_escape_double = s['parameters'].replace('""','\\"') try: params_json = json.loads(params_no_single) params = self.parse_params(params_json) except: continue es_data = self.merge_two_dicts(params, s) es_data.pop('parameters', None) my_dt = datetime.datetime.strptime( es_data['started_at'][:-5], "%Y-%m-%d %H:%M:%S") es_data['started_at'] = my_dt es_data['session_id'] = str(es_data['session_id']) es_data['cluster'] = self.cluster_name batch.append({ '_index': self.es_index_name, '_type': self.es_session_doc_type, '_id': es_data['session_id'], '_source': es_data }) if len(batch) >= self.args.batch_size: counter = counter + 1 print(counter, batch[0]) helpers.bulk(self.es, batch) batch = []
def batch(): ret = sc.api_call('channels.list') channels = ret['channels'] channels = [(c['id'], c['name']) for c in channels] for channel_id, channel_name in channels: print('channel = ', channel_name) count = 100 latest = str(datetime.now().timestamp()) has_more = True actions = [] while has_more: ret = sc.api_call("channels.history", channel=channel_id, count=count, latest=latest) print(ret['ok'], ret.get('error', '')) has_more = ret['has_more'] if not has_more: break messages = ret['messages'] latest = messages[-1]['ts'] for data in messages: data = convert_message(data) if data is None: continue data['channel'] = channel_id res_msg = fetch_message(es, data['channel'], data['timestamp']) if res_msg['total'] != 0: assert res_msg['total'] == 1 continue act = {'_index': INDEX_NAME, '_type': TYPE_NAME} act['_source'] = data actions.append(act) print(channel_name, len(actions)) helpers.bulk(es, actions) time.sleep(1)
def process_events(self): events = self.cassandra_session.execute(self.cassandra_events_stmt) counter = 0 batch = [] for row in events: # add cluster name to doc row['cluster'] = self.cluster_name batch.append({ '_index': self.es_index_name, '_type': self.es_event_doc_type, '_parent': row['session_id'], '_routing': row['session_id'], '_id': row['event_id'], '_source': row }) if len(batch) >= self.args.batch_size: counter = counter + 1 r = helpers.bulk(self.es, batch) print(counter, r) batch = []
try: indexName = "searchautocomplete_" + locale obj.pop("locale", None) es_update_obj = { '_op_type': 'index', '_index': indexName, '_type': 'autocomplete', '_id': id, '_source': obj } return es_update_obj except Exception as e: print(repr(e)) updateList = [] for item in indexDocs: updateList.append(update_obj(item.get("id"), item, item.get("locale"))) if len(updateList) % 1000 == 0: try: res = helpers.bulk(esObj, updateList) print(str(res)) except Exception as e: print(e) updateList = [] if (len(updateList) > 0): try: res = helpers.bulk(esObj, updateList) print(str(res)) except Exception as e: print(repr(e))
def getrawtransaction(): url = "http://127.0.0.1:8332" j=0 while True: if j<=len(txids)-1000: txids1 =txids[j:(j+1000)] commands = [{"method": "getrawtransaction", "params": [txid,1], "id": "jsonrpc"} for txid in txids1] r = requests.post(url, data=json.dumps(commands), auth=("admin", "admin"),headers={'content-type': "application/json"}) jrawtransaction = json.loads(r.content.decode("utf-8")) toelastic=[] for i in range(1000): vinstringarray = [] voutstringarray = [] vins = jrawtransaction[i]['result']['vin'] vouts = jrawtransaction[i]['result']['vout'] for vin in vins: if 'coinbase' not in vin: # print(vin) vinstringarray.append(vin['txid']+str(vin['vout'])) for vout in vouts: if vout['scriptPubKey']['type'] in ["pubkey","pubkeyhash"]: try: voutstringarray.append(vout['scriptPubKey']['addresses'][0]) except: print(vout) pass doc = { '_op_type': 'index', '_index': 'btc', '_type': 'info', '_id': txids1[i], "_source": { "i": ",".join(vinstringarray), "o": ",".join(voutstringarray) } } toelastic.append(doc) helpers.bulk(es, toelastic) elif j<len(txids) and j!=len(txids) : txids1 =txids[j:len(txids)] commands = [{"method": "getrawtransaction", "params": [txid,1], "id": "jsonrpc"} for txid in txids1] r = requests.post(url, data=json.dumps(commands), auth=("admin", "admin"),headers={'content-type': "application/json"}) jrawtransaction = json.loads(r.content.decode("utf-8")) toelastic=[] for i in range(len(jrawtransaction)): vinstringarray = [] voutstringarray = [] vins = jrawtransaction[i]['result']['vin'] vouts = jrawtransaction[i]['result']['vout'] for vin in vins: if 'coinbase' not in vin: # print(vin) vinstringarray.append(vin['txid']+str(vin['vout'])) for vout in vouts: if vout['scriptPubKey']['type'] in ["pubkey","pubkeyhash"]: try: voutstringarray.append(vout['scriptPubKey']['addresses'][0]) except: print(vout) pass doc = { '_op_type': 'index', '_index': 'btc', '_type': 'info', '_id': txids1[i], "_source": { "i": ",".join(vinstringarray), "o": ",".join(voutstringarray) } } toelastic.append(doc) helpers.bulk(es, toelastic) else: print(str(j)) # print(txids[j]) break j=j+1000 print(str(j))