Python bulk Examples, elasticsearch2.helpers.bulk Python Examples

Example #1

0

Show file

File: ib_importer.py Project: liangguo96/District51

def get_alarm_relevant_events(objs, **kwargs):
    from_es = kwargs["act_from_es"]
    to_es = kwargs["act_to_es"]
    events = []
    for obj in objs:
        alarm = obj["_source"]
        start_date = datetime.datetime.fromtimestamp(int(alarm["start_time"]) / 1000)
        end_date = datetime.datetime.fromtimestamp(int(alarm["end_time"]) / 1000)
        indices = [("event_" + (start_date + datetime.timedelta(x)).strftime("%Y%m%d"))
                   for x in range(0, (end_date - start_date).days + 1)]

        query_str = {"query": {"terms": {"_id": alarm["event_id"]}}}
        s_res = from_es.search(index=indices, body=query_str, allow_no_indices=True, size=2000)
        events.extend(s_res["hits"]["hits"])

        while len(events) > 3000:
            helpers.bulk(client=to_es, actions=events, chunk_size=3000)
            logging.debug("Write 3000 items into dest ES.")
            del events[:3000]

    # write the rest items into dst ES.
    helpers.bulk(client=to_es, actions=events, chunk_size=3000)
    logging.debug("Write the remaining " + str(len(events)) + " items into dest ES.")

    return

Example #2

0

Show file

File: ib_importer.py Project: liangguo96/District51

def do_search_and_write(from_es, to_es, indices, query, step=5000, **kwargs):
    action = None
    if "action" in kwargs:
        action = kwargs["action"]
    # act_para = None
    # if "act_para" in kwargs:
    #    act_para = kwargs["act_para"]

    if (to_es is None) or (from_es is None):
        logging.log(logging.ERROR, "The destination es has not been correctly specified.")
        return
    else:
        search_res = helpers.scan(client=from_es, query=query, index=indices, scroll=u"6m", size=step)
        bat = []
        cnt = 0
        for i in search_res:
            bat.append(i)
            cnt += 1
            if (cnt % 30000) == 0:
                if action is not None:
                    action(objs=bat, **kwargs)
                helpers.bulk(client=to_es, actions=bat, chunk_size=step, max_chunk_bytes=209715200)
                del bat[:]
                logging.info("Write 30000 items to dst ES.")

        # insert the rest data into dst_es
        if action is not None:
            action(objs=bat, **kwargs)
        helpers.bulk(client=to_es, actions=bat, chunk_size=step, max_chunk_bytes=204857600)
        return

Example #3

0

Show file

    def store_results(self, data):
        """Store results back to ES."""
        index_out = self._prep_index_name(self.config.storage.ES_TARGET_INDEX)

        actions = [{
            "_index": index_out,
            "_type": "log",
            "_source": data[i]
        } for i in range(len(data))]

        helpers.bulk(self.es, actions, chunk_size=int(len(data) / 4) + 1)

Example #4

0

Show file

File: elastic_search.py Project: aaxelb/osf.io

def bulk_update_cgm(cgms, actions=None, op='update', index=None):
    index = index or INDEX
    if not actions and cgms:
        actions = ({
            '_op_type': op,
            '_index': index,
            '_id': cgm._id,
            '_type': 'collectionSubmission',
            'doc': serialize_cgm(cgm),
            'doc_as_upsert': True,
        } for cgm in cgms)

    try:
        helpers.bulk(client(), actions or [], refresh=True, raise_on_error=False)
    except helpers.BulkIndexError as e:
        raise exceptions.BulkUpdateError(e.errors)

Example #5

0

Show file

File: elastic_search.py Project: mdicgovbr/osf.io

def bulk_update_cgm(cgms, actions=None, op='update', index=None):
    index = index or INDEX
    if not actions and cgms:
        actions = ({
            '_op_type': op,
            '_index': index,
            '_id': cgm._id,
            '_type': 'collectionSubmission',
            'doc': serialize_cgm(cgm),
            'doc_as_upsert': True,
        } for cgm in cgms)

    try:
        helpers.bulk(client(), actions or [], refresh=True, raise_on_error=False)
    except helpers.BulkIndexError as e:
        raise exceptions.BulkUpdateError(e.errors)

Example #6

0

Show file

def set_data(es,
             input_file,
             index_name=settings.es_index,
             doc_type_name=settings.es_type):
    #read in
    with open(input_file, 'r') as fp:
        line_list = fp.readlines()

    # make ACTIONS
    ACTIONS = []
    for line in line_list:
        fields = json.loads(line)
        # print fields[1]
        action = {
            "_index": index_name,
            "_type": doc_type_name,
            "_source": {
                "id": fields["id"],
                "work_id": fields["work_id"],
                "title": fields["title"],
                "creator": fields["creator"],
                "creator_variant": fields["creator_variant"],
                "vol_id": fields["vol_id"],
                "category": fields["category"],
                "sutra_body": fields["sutra_body"],
            }
        }
        ACTIONS.append(action)

    # batch proc
    success, _ = bulk(es, ACTIONS, index=index_name, raise_on_error=True)
    print('Performed %d actions' % success)

Example #7

0

Show file

def sql_migrate(index, sql, max_id, increment, es_args=None, **kwargs):
    """ Run provided SQL and send output to elastic.

    :param str index: Elastic index to update (formatted into `sql`)
    :param str sql: SQL to format and run. See __init__.py in this module
    :param int max_id: Last known object id. Indicates when to stop paging
    :param int increment: Page size
    :param  dict es_args:  Dict or None, to pass to `helpers.bulk`
    :kwargs: Additional format arguments for `sql` arg

    :return int: Number of migrated objects
    """
    if es_args is None:
        es_args = {}
    total_pages = int(ceil(max_id / float(increment)))
    total_objs = 0
    page_start = 0
    page_end = 0
    page = 0
    while page_end <= (max_id + increment):
        page += 1
        page_end += increment
        if page <= total_pages:
            logger.info('Updating page {} / {}'.format(page_end / increment,
                                                       total_pages))
        else:
            # An extra page is included to cover the edge case where:
            #       max_id == (total_pages * increment) - 1
            # and two additional objects are created during runtime.
            logger.info('Cleaning up...')
        with connection.cursor() as cursor:
            cursor.execute(
                sql.format(index=index,
                           page_start=page_start,
                           page_end=page_end,
                           **kwargs))
            ser_objs = cursor.fetchone()[0]
            if ser_objs:
                total_objs += len(ser_objs)
                helpers.bulk(client(), ser_objs, **es_args)
        page_start = page_end
    return total_objs

Example #8

0

Show file

def bulk_update_nodes(serialize, nodes, index=None, category=None):
    """Updates the list of input projects

    :param function Node-> dict serialize:
    :param Node[] nodes: Projects, components, registrations, or preprints
    :param str index: Index of the nodes
    :return:
    """
    index = index or INDEX
    actions = []
    for node in nodes:
        serialized = serialize(node)
        if serialized:
            actions.append({
                '_op_type': 'update',
                '_index': index,
                '_id': node._id,
                '_type': category or get_doctype_from_node(node),
                'doc': serialized,
                'doc_as_upsert': True,
            })
    if actions:
        return helpers.bulk(client(), actions)

Example #9

0

Show file

File: elastic_search.py Project: aaxelb/osf.io

def bulk_update_nodes(serialize, nodes, index=None, category=None):
    """Updates the list of input projects

    :param function Node-> dict serialize:
    :param Node[] nodes: Projects, components, registrations, or preprints
    :param str index: Index of the nodes
    :return:
    """
    index = index or INDEX
    actions = []
    for node in nodes:
        serialized = serialize(node)
        if serialized:
            actions.append({
                '_op_type': 'update',
                '_index': index,
                '_id': node._id,
                '_type': category or get_doctype_from_node(node),
                'doc': serialized,
                'doc_as_upsert': True,
            })
    if actions:
        return helpers.bulk(client(), actions)

Example #10

0

Show file

File: es-insert.py Project: liangguo96/District51

        "response": "/accept",
        "dst_port": 80,
        "event_level": 0
    }
}

if __name__ == "__main__":
    logging.info("==================== Start ====================")
    dst_es = Elasticsearch(hosts=config["dst_es"],
                           sniff_on_start=True,
                           sniff_on_connection_fail=True,
                           timeout=120)
    bat = []
    _id = 1
    while True:
        item = copy.deepcopy(data)
        item["_id"] = _id
        bat.append(item)
        if (_id % 1000) == 0:
            helpers.bulk(client=dst_es,
                         actions=bat,
                         chunk_size=1000,
                         max_chunk_bytes=209715200)
            bat = []

        if (_id % 30000) == 0:
            logging.info("Write 30000 items to ES.")
        _id += 1

    logging.info("==================== End ====================\n\n")