Beispiel #1
0
def run_pipeline(msg):
    '''
    Run the pipeline for the current file and use this task as a callback for future tasks
    '''
    file_index = 0

    # if this is not the first run record the data from the previous run in the database
    # and increment the file index
    if msg.get('batch_guid'):
        record_benchmark_info(msg['batch_guid'], msg['hist_schema'], msg['user'], msg['passwd'],
                              msg['host'], msg['port'], msg['hist_db'], msg['memory'], msg['cpu'])
        file_index = msg['file_index'] + 1

    # Exit if all tests have completed
    if file_index >= len(FILES):
        logger.info('All Tests Complete')
        return

    logger.info('**Running Pipeline test %d**' % file_index)

    # Construct new message
    directory = msg['directory']
    new_msg = dict(list(msg.items()) + list({'file_index': file_index, 'callback': run_pipeline}.items()))

    # define csv and json file
    #csv_file = os.path.join(directory, CSV_FILES[file_index])
    #json_file = os.path.join(directory, JSON_FILES[file_index])
    files = os.path.join(directory, FILES[file_index])

    # run pipeline with the two files and the newly constructed message
    #start_pipeline(csv_file, json_file, udl2_conf, batch_guid_forced=None, **new_msg)
    get_pipeline_chain(files, udl2_conf, batch_guid_forced=None, **new_msg)
Beispiel #2
0
def run_pipeline(archive_file=None, batch_guid_forced=None):
    """
    Begins the UDL Pipeline process for the file found at path archive_file

    :param archive_file: The file to be processed
    :param batch_guid_forced: this value will be used as batch_guid for the current run
    """
    if not archive_file:
        raise Exception
    get_pipeline_chain(archive_file, guid_batch=batch_guid_forced).delay()
def schedule_pipeline(archive_file):
    """Point of entry task to start the pipeline chain

    :param archive_file: path of the file which needs to be run through the pipeline
    """
    if not archive_file or not os.path.exists(archive_file):
        logger.error('W_schedule_pipeline: Scheduling pipeline failed due to invalid file <%s>' % archive_file)
        raise Exception('Scheduling pipeline failed due to invalid file')

    # rename the file to mark it as scheduled for processing before submitting task to pipeline.
    # this is needed to avoid the udl trigger from rescheduling the pipeline in case of delay
    archive_file_for_processing = archive_file + Const.PROCESSING_FILE_EXT
    os.rename(archive_file, archive_file_for_processing)
    logger.info('W_schedule_pipeline: Scheduling pipeline for file <%s>' % archive_file_for_processing)
    udl2_pipeline.get_pipeline_chain(archive_file_for_processing).delay()
 def test_get_pipeline_chain_check_type(self):
     arch_file = 'path_to_some_file'
     load_type = 'some_load_type'
     file_part = 12
     batc_guid = '1234-s5678'
     pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part, batc_guid)
     self.assertIsInstance(pipeline_chain, chain)
Beispiel #5
0
 def test_get_pipeline_chain_check_type(self):
     arch_file = 'path_to_some_file'
     load_type = 'some_load_type'
     file_part = 12
     batc_guid = '1234-s5678'
     pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part,
                                         batc_guid)
     self.assertIsInstance(pipeline_chain, chain)
    def test_get_pipeline_chain_check_msg(self):
        arch_file = 'path_to_some_file'
        load_type = 'some_load_type'
        file_part = 12
        batc_guid = '1234-s5678'
        pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part, batc_guid)

        msg = pipeline_chain.tasks[0].args[0]

        for mk in MESSAGE_KEYS:
            self.assertIn(mk, msg)
Beispiel #7
0
    def test_get_pipeline_chain_check_msg(self):
        arch_file = 'path_to_some_file'
        load_type = 'some_load_type'
        file_part = 12
        batc_guid = '1234-s5678'
        pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part,
                                            batc_guid)

        msg = pipeline_chain.tasks[0].args[0]

        for mk in MESSAGE_KEYS:
            self.assertIn(mk, msg)
    def test_get_pipeline_chain_check_msg_values(self):
        arch_file = 'path_to_some_file'
        load_type = 'some_load_type'
        file_part = 12
        batc_guid = '1234-s5678'
        pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part, batc_guid)

        msg = pipeline_chain.tasks[0].args[0]
        self.assertEqual(msg['guid_batch'], batc_guid)
        self.assertEqual(msg['parts'], file_part)
        self.assertEqual(msg['input_file_path'], arch_file)
        self.assertEqual(msg['load_type'], load_type)
Beispiel #9
0
    def test_get_pipeline_chain_check_msg_values(self):
        arch_file = 'path_to_some_file'
        load_type = 'some_load_type'
        file_part = 12
        batc_guid = '1234-s5678'
        pipeline_chain = get_pipeline_chain(arch_file, load_type, file_part,
                                            batc_guid)

        msg = pipeline_chain.tasks[0].args[0]
        self.assertEqual(msg['guid_batch'], batc_guid)
        self.assertEqual(msg['parts'], file_part)
        self.assertEqual(msg['input_file_path'], arch_file)
        self.assertEqual(msg['load_type'], load_type)