Esempio n. 1
0
for tid in data:
    row = data[tid]
    print(tid, row)
    if row[0] == 0:
        status = -1
    else:
        status = 1
    to_store.append({
        "_op_type": "update",
        "_id": tid,
        "_index": "grid_emulation",
        "_type": "doc",
        "doc": {
            "jobs": row[0],
            "Scores": row[1],
            "Swall_time": row[2],
            "Sinputfiles": row[3],
            "dataset": row[4],
            "status": status
        }
    })
    count += 1
    if not count % 500:
        print(count)
        res = estools.bulk_index(to_store, es)
        if res:
            del to_store[:]

estools.bulk_index(to_store)
print('final updates:', count)
Esempio n. 2
0
data = []
count = 0
for row in cursor:
    doc = {}
    for colName, colValue in zip(escolumns, row):
        # print(colName, colValue)
        doc[colName] = colValue

    if doc['CTIME']:
        doc['CTIME'] = str(doc['CTIME']).replace(' ', 'T')
    if doc['MTIME']:
        doc['MTIME'] = str(doc['MTIME']).replace(' ', 'T')
    doc["_index"] = "t0_exesbig"
    doc["pipeline"] = "t0_exesbig"
    doc["_id"] = doc['TASKID']

    data.append(doc)
    # print(doc)

    if not count % 500:
        print(count)
        res = estools.bulk_index(data, es)
        if res:
            del data[:]
    count += 1

estools.bulk_index(data, es)
print('final count:', count)

con.close()
Esempio n. 3
0
def main():
    count = 0
    data = []

    while True:
        # get data with status = 2 (job info present)
        tasks_query = {
            "size": 100,
            "_source": ["dataset"],
            "query": {
                "bool": {
                    "must": [{
                        "term": {
                            "status": 2
                        }
                    }, {
                        "term": {
                            "tasktype": "prod"
                        }
                    }]
                }
            }
        }

        res = es.search(body=tasks_query, index="grid_emulation")

        if res['hits']['total'] == 0:
            break
        else:
            print('remaining:', res['hits']['total'])

        for doc in res['hits']['hits']:
            tid = doc['_id']
            dataset = doc['_source']['dataset']
            # print(tid, dataset)
            count += 1
            status = -5
            ds_files = 0
            ds_size = 0
            ds_type = ''
            if dataset is not None:
                if not ':' in dataset:
                    print('* no Scope: \n', dataset)
                    status = -2
                else:
                    (scope, name) = dataset.split(':')
                    name = name.rstrip('/')
                    try:
                        res = dc.get_did(scope, name)
                        if res['type'] == 'DATASET':
                            ds_files, ds_size, ds_type = get_ds_info(
                                scope, name)
                        elif res['type'] == 'CONTAINER':
                            ds_files, ds_size, ds_type = get_container_info(
                                scope, name)
                        else:
                            print("NOT A DATASET OR CONTAINER!")
                        status = 3
                    except rex.DataIdentifierNotFound as dide:
                        status = -3
                        # print(dide)
                    except rex.RucioException as rue:
                        status = -4
                        print(rue)
                    except:
                        status = -5

            data.append({
                "_op_type": "update",
                "_id": tid,
                "_index": "grid_emulation",
                "_type": "doc",
                "doc": {
                    "ds_files": ds_files,
                    "ds_size": ds_size,
                    "ds_type": ds_type,
                    "status": status
                }
            })

            if not count % 100:
                print(count)
                # print(data)
                res = estools.bulk_index(data, es)
                if res:
                    del data[:]

    estools.bulk_index(data, es)
    print('final count:', count)