def create_mongo_fetch_generator(collection, query_filters):
    fetch_limit = _FETCH_LIMIT
    records = collection.find(query_filters, {'_id': 0}).count() + fetch_limit
    offset = 0
    while (records > 0):
        yield offset, fetch_limit
        offset += fetch_limit
        records -= fetch_limit
        LOGGER.debug("Fetching another limit of  {0}".format(str(fetch_limit)))
def push_data_file_to_es():
    data = open(_ES_READ_FILE,'rb').read()
    if not data:
        return
    LOGGER.debug("{0} pushing data to es")
    req = urllib2.Request(es_conf[_ES_HOST]+'_bulk', data)
    req.add_header('Content-Length', '%d' % len(data))
    req.add_header('Content-Type', 'application/octet-stream')
    res = urllib2.urlopen(req)
    LOGGER.debug("pushing data to es success")
def fetch_mongo_data_for_fetch_range(collection, offset, fetch_limit,query_filters):
    _mongo_fetched_data_dict = []
    data_cursor = collection.find(query_filters, {'_id': 0}).skip(offset).limit(fetch_limit)
    for document in data_cursor:
        try:
            if not 'category' in document:
                continue
            _mongo_fetched_data_dict.append(document)
        except Exception, e:
            LOGGER.error(
                "For document {0} encountered error {1} ".format(doc, e))
def dump_data_dict_to_es_readable_file(mongo_fetched_data_dict):
    put = {"index": {"_index": es_conf[_ES_INDEX], "_type": "wevent"}}
    events_dump_file = open(_ES_READ_FILE, 'w')
    try:
        for document in mongo_fetched_data_dict:
            put['index']['_type'] = document['category']
            json.dump(put,events_dump_file)
            events_dump_file.write('\n')
            json.dump(document,events_dump_file)
            events_dump_file.write('\n')
    except Exception, e:
        LOGGER.error(
                "For document {0} encountered dumping data error {1} ".format(document, e))
def push_incremental_data_to_es():
    params_collection = mongo_helpers.get_mongo_db_con(
        database=_MONGO_PARAMS_DB)[mongo_conf[_MONGO_PARAMS_COLLECTION]]
    timestamp_range = obtain_time_ranges(params_collection)
    LOGGER.debug(
        "Started river to push data to ES for {0}".format(timestamp_range))
    mongo_object_ids_range = mongo_helpers.get_server_object_ids(
        timestamp_range=timestamp_range)
    raw_data_collection = mongo_helpers.get_mongo_db_con(
        database=_MONGO_RAW_DATA_DB)[mongo_conf[_MONGO_RAW_DATA_COLLECTION]]
    query_filters = {'_id': {'$gte': mongo_object_ids_range[0], '$lte': mongo_object_ids_range[
        1]}, 'category': {'$in': _categories}}
    mongo_fetch_generator = mongo_helpers.create_mongo_fetch_generator(raw_data_collection,query_filters)
    process_pipeline(raw_data_collection,mongo_fetch_generator,query_filters)
    params_collection.update({'elasticsearch.lastUpdated': {'$exists': 'true'}}, {
                             '$set': {'elasticsearch.lastUpdated': str(timestamp_range[1])}})