def main(): if not os.path.exists('.data/items.json'): raise Exception( 'The items.json file in the .data folder does not exist. Please run the scraping script before executing this script.' ) collection_name = 'rsbuddy' client = MongoClient('localhost', 27017) database = client[collection_name] items = list(read_json('.data/items.json').items()) indexes = np.array_split(np.arange(len(items)), len(items) // 50) threads = [] thread_count = 2 for index in range(len(indexes)): indexes_ = indexes[index] thread = Reducer(database, collection_name, indexes_, items) thread.start() threads.append(thread) if (index % thread_count == 0 and index != 0): for thread in threads: thread.join() threads = []
def main(): initialize_log() logging.info("Starting reducer.") config_params = parse_config_params() reducer = Reducer(config_params['aggregated_data_queue'], config_params['sink_queue'], config_params['aggregators_quantity'], config_params['unflatten_key'], config_params['unflatten_value_key']) reducer.start()