Esempio n. 1
0
    return table_actions, row_actions, column_actions

def bulk_index(service, actions, unit_name):
    res = helpers.bulk(service, actions, stats_only=True)
    print('{0} {1} indexed, {2} failed.'.format(res[0], unit_name, res[1]), file=sys.stderr)


es = Elasticsearch([
    {'host': 'compute-1-32'}
])

if es.indices.exists(index=TABLE):
    es.indices.delete(index=TABLE)
es.indices.create(index=TABLE, body='{"settings":{"index":{"analysis":{"analyzer":{"default":{"type":"english"}}}}}}')
#es.indices.close(index=TABLE)
#es.indices.put_settings(body='{"analysis":{"analyzer":{"default":{"type":"english"}}}}', index=TABLE)
#es.indices.open(index=TABLE)

path = '../data/'
subfolders = [os.path.join(path, f) for f in os.listdir(path)
              if os.path.isdir(os.path.join(path, f))]

for folder in subfolders:
    doclist = Table.load_from_path(folder)
    table_actions, row_actions, column_actions = get_bulk_body(doclist)

    bulk_index(es, table_actions, 'tables')
    bulk_index(es, row_actions, 'rows')
    bulk_index(es, column_actions, 'columns')
    print(file=sys.stderr)