Example #1
0
def execute():
    print 'Started Entity Aggregation... ',

    start_timing()

    map_function = Code(
        open(join(JAVASCRIPT_PATH, MAP_FUNCTION_FILENAME), 'r').read())
    reduce_function = Code(
        open(join(JAVASCRIPT_PATH, REDUCE_FUNCTION_FILENAME), 'r').read())
    aggregate_map_function = Code(
        open(join(JAVASCRIPT_PATH, AGGREGATION_MAP_ADD_FUNCTION_FILENAME),
             'r').read())
    aggregate_reduce_function = Code(
        open(join(JAVASCRIPT_PATH, AGGREGATION_REDUCE_FUNCTION_FILENAME),
             'r').read())

    temp_raw.map_reduce(map_function, reduce_function,
                        TEMP_RESULTS_COLLECTION_NAME)
    temp_results.map_reduce(aggregate_map_function, aggregate_reduce_function,
                            {'reduce': RESULTS_COLLECTION_NAME})

    if temp_raw.count() > 0:
        copy_into_collection(temp_raw.find(no_cursor_timeout=True), coll)

    temp_results.drop()
    temp_raw.drop()

    client.close()

    print 'Finished'
    stop_timing()
def save_to_collection():

    for lower_entity in entity_pseudos.keys():
        for entity in entity_pseudos[lower_entity]:
            topic_id = entity_topic[lower_entity]
            coll_name = TOPIC_COLLECTION_NAME(topic_id)
            check_or_create_collection(TOPIC_TWEETS_DB_NAME, coll_name, Collection.TOPIC)
            coll = topic_db[coll_name]
            copy_into_collection(raw_collection.find({ENTITIES: entity}), coll)
Example #3
0
def execute():
    print 'Started Entity Aggregation... ',

    start_timing()

    map_function = Code(open(join(JAVASCRIPT_PATH, MAP_FUNCTION_FILENAME), 'r').read())
    reduce_function = Code(open(join(JAVASCRIPT_PATH, REDUCE_FUNCTION_FILENAME), 'r').read())
    aggregate_map_function = Code(open(join(JAVASCRIPT_PATH, AGGREGATION_MAP_ADD_FUNCTION_FILENAME), 'r').read())
    aggregate_reduce_function = Code(open(join(JAVASCRIPT_PATH, AGGREGATION_REDUCE_FUNCTION_FILENAME), 'r').read())

    temp_raw.map_reduce(map_function, reduce_function, TEMP_RESULTS_COLLECTION_NAME)
    temp_results.map_reduce(aggregate_map_function, aggregate_reduce_function, {'reduce': RESULTS_COLLECTION_NAME})

    if temp_raw.count() > 0:
        copy_into_collection(temp_raw.find(no_cursor_timeout=True), coll)

    temp_results.drop()
    temp_raw.drop()

    client.close()
    
    print 'Finished'
    stop_timing()