def run_pipeline(msg): ''' Run the pipeline for the current file and use this task as a callback for future tasks ''' file_index = 0 # if this is not the first run record the data from the previous run in the database # and increment the file index if msg.get('batch_guid'): record_benchmark_info(msg['batch_guid'], msg['hist_schema'], msg['user'], msg['passwd'], msg['host'], msg['port'], msg['hist_db'], msg['memory'], msg['cpu']) file_index = msg['file_index'] + 1 # Exit if all tests have completed if file_index >= len(FILES): logger.info('All Tests Complete') return logger.info('**Running Pipeline test %d**' % file_index) # Construct new message directory = msg['directory'] new_msg = dict(list(msg.items()) + list({'file_index': file_index, 'callback': run_pipeline}.items())) # define csv and json file #csv_file = os.path.join(directory, CSV_FILES[file_index]) #json_file = os.path.join(directory, JSON_FILES[file_index]) files = os.path.join(directory, FILES[file_index]) # run pipeline with the two files and the newly constructed message #start_pipeline(csv_file, json_file, udl2_conf, batch_guid_forced=None, **new_msg) get_pipeline_chain(files, udl2_conf, batch_guid_forced=None, **new_msg)
def run_pipeline(archive_file=None, batch_guid_forced=None): """ Begins the UDL Pipeline process for the file found at path archive_file :param archive_file: The file to be processed :param batch_guid_forced: this value will be used as batch_guid for the current run """ if not archive_file: raise Exception get_pipeline_chain(archive_file, guid_batch=batch_guid_forced).delay()
def schedule_pipeline(archive_file): """Point of entry task to start the pipeline chain :param archive_file: path of the file which needs to be run through the pipeline """ if not archive_file or not os.path.exists(archive_file): logger.error('W_schedule_pipeline: Scheduling pipeline failed due to invalid file <%s>' % archive_file) raise Exception('Scheduling pipeline failed due to invalid file') # rename the file to mark it as scheduled for processing before submitting task to pipeline. # this is needed to avoid the udl trigger from rescheduling the pipeline in case of delay archive_file_for_processing = archive_file + Const.PROCESSING_FILE_EXT os.rename(archive_file, archive_file_for_processing) logger.info('W_schedule_pipeline: Scheduling pipeline for file <%s>' % archive_file_for_processing) udl2_pipeline.get_pipeline_chain(archive_file_for_processing).delay()
def test_get_pipeline_chain_check_type(self): arch_file = 'path_to_some_file' load_type = 'some_load_type' file_part = 12 batc_guid = '1234-s5678' pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part, batc_guid) self.assertIsInstance(pipeline_chain, chain)
def test_get_pipeline_chain_check_msg(self): arch_file = 'path_to_some_file' load_type = 'some_load_type' file_part = 12 batc_guid = '1234-s5678' pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part, batc_guid) msg = pipeline_chain.tasks[0].args[0] for mk in MESSAGE_KEYS: self.assertIn(mk, msg)
def test_get_pipeline_chain_check_msg_values(self): arch_file = 'path_to_some_file' load_type = 'some_load_type' file_part = 12 batc_guid = '1234-s5678' pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part, batc_guid) msg = pipeline_chain.tasks[0].args[0] self.assertEqual(msg['guid_batch'], batc_guid) self.assertEqual(msg['parts'], file_part) self.assertEqual(msg['input_file_path'], arch_file) self.assertEqual(msg['load_type'], load_type)