def run_loader(self): """Main function for running loader""" if self.args.verbose: self.logger.warn('DEBUG mode enabled!') time.sleep(3) data_manager = DataFileManager(self.context_info.config_file_location) file_transactor = FileTransactor() file_transactor.start_threads( data_manager.get_file_transactor_thread_settings()) data_manager.download_and_validate() self.logger.debug("finished downloading now doing thread") file_transactor.check_for_thread_errors() self.logger.debug("finished threads waiting for queues") file_transactor.wait_for_queues() self.logger.debug("finished queues waiting for shutdown") file_transactor.shutdown() neo_transactor = Neo4jTransactor() neo_transactor.start_threads( data_manager.get_neo_transactor_thread_settings()) self.logger.debug("finished starting neo threads ") if not self.context_info.env["USING_PICKLE"]: self.logger.info("Creating indices.") Neo4jHelper.create_indices() etl_time_tracker_list = self.run_etl_groups(self.logger, data_manager, neo_transactor) neo_transactor.shutdown() elapsed_time = time.time() - self.start_time for time_item in etl_time_tracker_list: self.logger.info(time_item) self.logger.info('Loader finished. Elapsed time: %s' % time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
def run_preprocessor(self): if args.verbose: logger.warn('DEBUG mode enabled!') time.sleep(3) start_time = time.time() data_manager = DataFileManager(context_info.config_file_location) logger.info("config_file_location %s" % (context_info.config_file_location)) ft = FileTransactor() ft.start_threads(data_manager.get_FT_thread_settings()) data_manager.download_and_validate() logger.info("finished downloading now doing thread") ft.check_for_thread_errors() logger.info("finished threads waiting for queues") ft.wait_for_queues() logger.info("finished queues waiting for shutdown") ft.shutdown() configs_dict = { 'INTERACTION-SOURCE-MOL': ['INTERACTION-SOURCE', 'BGI'], 'INTERACTION-SOURCE-GEN': ['INTERACTION-SOURCE', 'BGI'] } config_dict = { 'INTERACTION-SOURCE-MOL': 'INTERACTION-SOURCE', 'INTERACTION-SOURCE-GEN': 'INTERACTION-SOURCE' } processor_dispatch = { 'INTERACTION-SOURCE-MOL': InteractionMolecularProcessor, 'INTERACTION-SOURCE-GEN': InteractionGeneticProcessor } list_of_processor_groups = [[ 'INTERACTION-SOURCE-MOL', 'INTERACTION-SOURCE-GEN' ]] processor_time_tracker_list = [] for processor_group in list_of_processor_groups: processor_group_start_time = time.time() logger.info("Starting Processor group: %s" % processor_group) thread_pool = [] for processor_name in processor_group: logger.info("Processor Name: %s" % processor_name) configs = [] for config_type in configs_dict[processor_name]: config = data_manager.get_config(config_type) if config is not None: configs.append(config) else: logger.info("No Config found for: %s %s" % (processor_name, config_type)) if len(configs) > 0: processor = processor_dispatch[processor_name](configs) p = multiprocessing.Process(target=processor.run_processor) p.start() thread_pool.append(p) else: logger.info("No Configs found for: %s" % processor_name) Processor.wait_for_threads(thread_pool) logger.info("Waiting for Queues to sync up") processor_elapsed_time = time.time() - processor_group_start_time processor_time_message = ( "Finished Processor group: %s, Elapsed time: %s" % (processor_group, time.strftime("%H:%M:%S", time.gmtime(processor_elapsed_time)))) logger.info(processor_time_message) processor_time_tracker_list.append(processor_time_message) end_time = time.time() elapsed_time = end_time - start_time for time_item in processor_time_tracker_list: logger.info(time_item) logger.info('PreProcess finished. Elapsed time: %s' % time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))