for tid in data: row = data[tid] print(tid, row) if row[0] == 0: status = -1 else: status = 1 to_store.append({ "_op_type": "update", "_id": tid, "_index": "grid_emulation", "_type": "doc", "doc": { "jobs": row[0], "Scores": row[1], "Swall_time": row[2], "Sinputfiles": row[3], "dataset": row[4], "status": status } }) count += 1 if not count % 500: print(count) res = estools.bulk_index(to_store, es) if res: del to_store[:] estools.bulk_index(to_store) print('final updates:', count)
data = [] count = 0 for row in cursor: doc = {} for colName, colValue in zip(escolumns, row): # print(colName, colValue) doc[colName] = colValue if doc['CTIME']: doc['CTIME'] = str(doc['CTIME']).replace(' ', 'T') if doc['MTIME']: doc['MTIME'] = str(doc['MTIME']).replace(' ', 'T') doc["_index"] = "t0_exesbig" doc["pipeline"] = "t0_exesbig" doc["_id"] = doc['TASKID'] data.append(doc) # print(doc) if not count % 500: print(count) res = estools.bulk_index(data, es) if res: del data[:] count += 1 estools.bulk_index(data, es) print('final count:', count) con.close()
def main(): count = 0 data = [] while True: # get data with status = 2 (job info present) tasks_query = { "size": 100, "_source": ["dataset"], "query": { "bool": { "must": [{ "term": { "status": 2 } }, { "term": { "tasktype": "prod" } }] } } } res = es.search(body=tasks_query, index="grid_emulation") if res['hits']['total'] == 0: break else: print('remaining:', res['hits']['total']) for doc in res['hits']['hits']: tid = doc['_id'] dataset = doc['_source']['dataset'] # print(tid, dataset) count += 1 status = -5 ds_files = 0 ds_size = 0 ds_type = '' if dataset is not None: if not ':' in dataset: print('* no Scope: \n', dataset) status = -2 else: (scope, name) = dataset.split(':') name = name.rstrip('/') try: res = dc.get_did(scope, name) if res['type'] == 'DATASET': ds_files, ds_size, ds_type = get_ds_info( scope, name) elif res['type'] == 'CONTAINER': ds_files, ds_size, ds_type = get_container_info( scope, name) else: print("NOT A DATASET OR CONTAINER!") status = 3 except rex.DataIdentifierNotFound as dide: status = -3 # print(dide) except rex.RucioException as rue: status = -4 print(rue) except: status = -5 data.append({ "_op_type": "update", "_id": tid, "_index": "grid_emulation", "_type": "doc", "doc": { "ds_files": ds_files, "ds_size": ds_size, "ds_type": ds_type, "status": status } }) if not count % 100: print(count) # print(data) res = estools.bulk_index(data, es) if res: del data[:] estools.bulk_index(data, es) print('final count:', count)