Beispiel #1
0
 def test_transform_pipeline_process(self, app_with_db):
     manager = Manager(dbi=app_with_db.dbi)
     manager._pipelines['fake_pipeline'] = PipelineConfig(
         pipeline=FakePipeline(1234),
         sub_directory=None,
         force=False,
         unpack=False,
         trigger_dataflow_dag=False,
     )
     manager.pipeline_process('fake_pipeline')
     actual_data_file_registry_list = DatafileRegistryModel.query.all()
     assert len(actual_data_file_registry_list) == 1
     actual_data_file_registry = actual_data_file_registry_list[0]
     assert actual_data_file_registry.state == DatafileState.PROCESSED.value
     assert actual_data_file_registry.error_message is None
     assert actual_data_file_registry.file_name is None
     assert actual_data_file_registry.created_timestamp == datetime.utcnow()
     assert actual_data_file_registry.updated_timestamp == datetime.utcnow()
     assert actual_data_file_registry.source == '1234'
Beispiel #2
0
    def test_pipeline_process(
        self,
        mock_read_files,
        mock_get_file_names,
        app_with_db,
        raise_exception,
        expected_state,
        expected_error_message,
    ):
        def read_files(*args, **kwargs):
            yield ['fake_file.txt']

        mock_get_file_names.return_value = ['fake_file.txt']
        mock_read_files.side_effect = read_files

        bucket = app_with_db.config['s3']['bucket_url']
        source_folder = os.path.join(
            bucket, app_with_db.config['s3']['datasets_folder'])
        manager = Manager(storage=source_folder, dbi=app_with_db.dbi)
        manager._pipelines['fake_pipeline'] = PipelineConfig(
            pipeline=FakePipeline(1234,
                                  raise_processing_exception=raise_exception),
            sub_directory='/tmp/fake_pipeline',
            force=False,
            unpack=False,
            trigger_dataflow_dag=False,
        )
        manager.pipeline_process('fake_pipeline')
        actual_data_file_registry_list = DatafileRegistryModel.query.all()
        assert len(actual_data_file_registry_list) == 1
        actual_data_file_registry = actual_data_file_registry_list[0]
        assert actual_data_file_registry.state == expected_state
        assert actual_data_file_registry.error_message == expected_error_message
        assert actual_data_file_registry.file_name == 'fake_file.txt'
        assert actual_data_file_registry.created_timestamp == datetime.utcnow()
        assert actual_data_file_registry.updated_timestamp == datetime.utcnow()
        assert actual_data_file_registry.source == '1234'