Exemple #1
0
def main():
    if not os.path.exists('.data/items.json'):
        raise Exception(
            'The items.json file in the .data folder does not exist. Please run the scraping script before executing this script.'
        )

    collection_name = 'rsbuddy'
    client = MongoClient('localhost', 27017)
    database = client[collection_name]

    items = list(read_json('.data/items.json').items())
    indexes = np.array_split(np.arange(len(items)), len(items) // 50)
    threads = []
    thread_count = 2

    for index in range(len(indexes)):
        indexes_ = indexes[index]

        thread = Reducer(database, collection_name, indexes_, items)
        thread.start()
        threads.append(thread)

        if (index % thread_count == 0 and index != 0):
            for thread in threads:
                thread.join()

            threads = []
Exemple #2
0
def main():
    initialize_log()
    logging.info("Starting reducer.")
    config_params = parse_config_params()
    reducer = Reducer(config_params['aggregated_data_queue'],
                      config_params['sink_queue'],
                      config_params['aggregators_quantity'],
                      config_params['unflatten_key'],
                      config_params['unflatten_value_key'])
    reducer.start()