Python bulk Exemples, elasticsearch5.helpers.bulk Python Exemples

Exemple #1

0

Afficher le fichier

    def store_results(self, data):
        """Store results back to ES."""
        index_out = self._prep_index_name(self.config.ES_TARGET_INDEX)

        actions = [{"_index": index_out, "_type": "log", "_source": data[i]} for i in range(len(data))]

        helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)

Exemple #2

0

Afficher le fichier

Fichier : cassandra-tracing-indexer.py Projet : soccerties/cassandra-misc

    def process_sessions(self):
        sessions = self.cassandra_session.execute(self.cassandra_sessions_stmt)
        counter = 0
        batch = []
        for s in sessions:
            self.l.debug("processing " + str(s))
            if s['coordinator'] is not None:  # filter out empty traces
                if s['request'] == 'Execute batch of CQL3 queries':
                    s['query_type'] = 'BATCH'
                else:
                    params = self.parse_params(s['parameters'])

                es_data = self.merge_two_dicts(params, s)

                es_data.pop('parameters', None)

                es_data['session_id'] = str(es_data['session_id'])
                es_data['cluster'] = self.cluster_name

                batch.append({
                    '_index': self.es_index_name,
                    '_type': self.es_session_doc_type,
                    '_id': es_data['session_id'],
                    '_source': es_data
                })

                if len(batch) >= self.args.batch_size:
                    counter = counter + 1
                    print(counter, batch[0])

                    helpers.bulk(self.es, batch)
                    batch = []

Exemple #3

0

Afficher le fichier

def bulk_index_documents_to_es5(index_name, documents):
    try:
        bulk(es_client5,
             _prepare_docs_for_bulk_insert(documents),
             index=index_name,
             doc_type=GENERIC_DOC_TYPE,
             chunk_size=100)
    except TransportError5 as e:
        index_individual_docs(index_name, documents)

Exemple #4

0

Afficher le fichier

Fichier : bulk_index_es2_to_es5.py Projet : katstevens/elasticsearch-migration-helpers

def bulk_index_documents_to_es5(index_name, doc_type, documents):
    try:
        bulk(es_client5,
             _prepare_docs_for_bulk_insert(documents),
             index=index_name,
             doc_type=doc_type,
             chunk_size=100)
        return "acknowledged", 200
    except TransportError5 as e:
        print("Failed to bulk index the %s documents: %s", doc_type, str(e))
        return str(e), e.status_code

Exemple #5

0

Afficher le fichier

def parse_dir(path, channel2id):
    channel_name = path.name
    channel = channel2id[channel_name]
    actions = []
    for fname in path.iterdir():
        for data in parse_file(fname, channel):
            act = {'_index': INDEX_NAME, '_type': TYPE_NAME}
            act['_source'] = data
            actions.append(act)

    print(channel_name, len(actions))
    helpers.bulk(es, actions)

Exemple #6

0

Afficher le fichier

Fichier : cassandra-tracing-indexer.py Projet : soccerties/cassandra-misc

    def process_events_csv(self, filename):
        with open(filename) as f:
            csvreader = csv.DictReader(f)
            counter = 0
            batch = []
            for row in csvreader:
                # extract time from UUID for indexing
                event_uuid = uuid.UUID(row['event_id'])
                event_dt = datetime_from_uuid1(event_uuid)
                #row['started_at'] = event_dt.strftime('%Y-%m-%d %H:%M:%S.%f')
                row['started_at'] = datetime_from_uuid1(event_uuid)

                # add cluster name to doc
                row['cluster'] = self.cluster_name

                batch.append({
                    '_index': self.es_index_name,
                    '_type': self.es_event_doc_type,
                    '_parent': row['session_id'],
                    '_routing': row['session_id'],
                    '_id': row['event_id'],
                    '_source': row
                })

                if len(batch) >= self.args.batch_size:
                    counter = counter + 1
                    r = helpers.bulk(self.es, batch)
                    print(counter, r)
                    batch = []

Exemple #7

0

Afficher le fichier

Fichier : ratingsToES.py Projet : vincent775/elasticsearch-ltr-demo

def indexToElastic(es):
    analysisSettings = {}
    index = 'movielens'

    settings = {  #A
        "settings": {
            "number_of_shards": 1,  #B
            "index": {
                "analysis": analysisSettings,  #C
            }
        }
    }

    es.indices.delete(index, ignore=[400, 404])
    es.indices.create(index, body=json.dumps(settings))

    helpers.bulk(es, userBaskets(), chunk_size=250)

Exemple #8

0

Afficher le fichier

Fichier : cassandra-tracing-indexer.py Projet : soccerties/cassandra-misc

    def process_sessions_csv(self, filename):
        with open(filename) as f:
            sessions = csv.DictReader(f)
            counter = 0
            batch = []
            for s in sessions:
                self.l.debug("processing " + str(s))
                if s['coordinator'] is not None and s[
                        'coordinator'] is not '' and s[
                            'parameters'] is not '':  # filter out empty traces
                    if s['request'] == 'Execute batch of CQL3 queries':
                        s['query_type'] = 'BATCH'
                    else:
                        params_no_single = s['parameters'].replace("'", '"')
                        #params_escape_double = s['parameters'].replace('""','\\"')
                        try:
                            params_json = json.loads(params_no_single)
                            params = self.parse_params(params_json)
                        except:
                            continue

                    es_data = self.merge_two_dicts(params, s)

                    es_data.pop('parameters', None)

                    my_dt = datetime.datetime.strptime(
                        es_data['started_at'][:-5], "%Y-%m-%d %H:%M:%S")
                    es_data['started_at'] = my_dt

                    es_data['session_id'] = str(es_data['session_id'])
                    es_data['cluster'] = self.cluster_name

                    batch.append({
                        '_index': self.es_index_name,
                        '_type': self.es_session_doc_type,
                        '_id': es_data['session_id'],
                        '_source': es_data
                    })

                    if len(batch) >= self.args.batch_size:
                        counter = counter + 1
                        print(counter, batch[0])

                        helpers.bulk(self.es, batch)
                        batch = []

Exemple #9

0

Afficher le fichier

def batch():
    ret = sc.api_call('channels.list')
    channels = ret['channels']
    channels = [(c['id'], c['name']) for c in channels]

    for channel_id, channel_name in channels:
        print('channel = ', channel_name)
        count = 100
        latest = str(datetime.now().timestamp())
        has_more = True
        actions = []

        while has_more:
            ret = sc.api_call("channels.history",
                              channel=channel_id,
                              count=count,
                              latest=latest)
            print(ret['ok'], ret.get('error', ''))
            has_more = ret['has_more']
            if not has_more:
                break

            messages = ret['messages']
            latest = messages[-1]['ts']

            for data in messages:
                data = convert_message(data)
                if data is None:
                    continue
                data['channel'] = channel_id

                res_msg = fetch_message(es, data['channel'], data['timestamp'])
                if res_msg['total'] != 0:
                    assert res_msg['total'] == 1
                    continue

                act = {'_index': INDEX_NAME, '_type': TYPE_NAME}
                act['_source'] = data
                actions.append(act)

        print(channel_name, len(actions))
        helpers.bulk(es, actions)
        time.sleep(1)

Exemple #10

0

Afficher le fichier

Fichier : cassandra-tracing-indexer.py Projet : soccerties/cassandra-misc

    def process_events(self):
        events = self.cassandra_session.execute(self.cassandra_events_stmt)
        counter = 0
        batch = []
        for row in events:
            # add cluster name to doc
            row['cluster'] = self.cluster_name

            batch.append({
                '_index': self.es_index_name,
                '_type': self.es_event_doc_type,
                '_parent': row['session_id'],
                '_routing': row['session_id'],
                '_id': row['event_id'],
                '_source': row
            })

            if len(batch) >= self.args.batch_size:
                counter = counter + 1
                r = helpers.bulk(self.es, batch)
                print(counter, r)
                batch = []

Exemple #11

0

Afficher le fichier

    try:
        indexName = "searchautocomplete_" + locale
        obj.pop("locale", None)
        es_update_obj = {
            '_op_type': 'index',
            '_index': indexName,
            '_type': 'autocomplete',
            '_id': id,
            '_source': obj
        }
        return es_update_obj
    except Exception as e:
        print(repr(e))


updateList = []
for item in indexDocs:
    updateList.append(update_obj(item.get("id"), item, item.get("locale")))
    if len(updateList) % 1000 == 0:
        try:
            res = helpers.bulk(esObj, updateList)
            print(str(res))
        except Exception as e:
            print(e)
        updateList = []
if (len(updateList) > 0):
    try:
        res = helpers.bulk(esObj, updateList)
        print(str(res))
    except Exception as e:
        print(repr(e))

Exemple #12

0

Afficher le fichier

def getrawtransaction():
    url = "http://127.0.0.1:8332"
    j=0
    while True:

        if j<=len(txids)-1000:
            txids1 =txids[j:(j+1000)]
            commands = [{"method": "getrawtransaction", "params": [txid,1], "id": "jsonrpc"} for txid in txids1]
            r = requests.post(url, data=json.dumps(commands), auth=("admin", "admin"),headers={'content-type': "application/json"})
            jrawtransaction = json.loads(r.content.decode("utf-8"))
            toelastic=[]
            for i in range(1000):
                vinstringarray = []
                voutstringarray = []
                vins = jrawtransaction[i]['result']['vin']
                vouts = jrawtransaction[i]['result']['vout']
                for vin in vins:
                    if 'coinbase' not in vin:
                       # print(vin)
                        vinstringarray.append(vin['txid']+str(vin['vout']))

                for vout in vouts:
                    if vout['scriptPubKey']['type']  in ["pubkey","pubkeyhash"]:
                        try:
                            voutstringarray.append(vout['scriptPubKey']['addresses'][0])
                        except:
                            print(vout)
                            pass

                doc = {
                    '_op_type': 'index',
                    '_index': 'btc',
                    '_type': 'info',
                    '_id': txids1[i],
                    "_source": {
                        "i": ",".join(vinstringarray),
                        "o": ",".join(voutstringarray)
                    }
                }
                toelastic.append(doc)
            helpers.bulk(es, toelastic)

        elif j<len(txids) and j!=len(txids) :
            txids1 =txids[j:len(txids)]
            commands = [{"method": "getrawtransaction", "params": [txid,1], "id": "jsonrpc"} for txid in txids1]
            r = requests.post(url, data=json.dumps(commands), auth=("admin", "admin"),headers={'content-type': "application/json"})
            jrawtransaction = json.loads(r.content.decode("utf-8"))
            toelastic=[]
            for i in range(len(jrawtransaction)):
                vinstringarray = []
                voutstringarray = []
                vins = jrawtransaction[i]['result']['vin']
                vouts = jrawtransaction[i]['result']['vout']
                for vin in vins:
                    if 'coinbase' not in vin:
                       # print(vin)
                        vinstringarray.append(vin['txid']+str(vin['vout']))

                for vout in vouts:
                    if vout['scriptPubKey']['type']  in ["pubkey","pubkeyhash"]:
                        try:
                            voutstringarray.append(vout['scriptPubKey']['addresses'][0])
                        except:
                            print(vout)
                            pass

                doc = {
                    '_op_type': 'index',
                    '_index': 'btc',
                    '_type': 'info',
                    '_id': txids1[i],
                    "_source": {
                        "i": ",".join(vinstringarray),
                        "o": ",".join(voutstringarray)
                    }
                }
                toelastic.append(doc)
            helpers.bulk(es, toelastic)


        else:
            print(str(j))
          #  print(txids[j])
            break
        j=j+1000
        print(str(j))