Ejemplo n.º 1
0
    def test_run_app_chained_placeholders(self, execute_job_steps, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: downloader1
      transform: morgues-download
      base_url: http://example.com/data
      output: /tmp/data/morgues
    - name: downloader2
      transform: morgues-download
      base_url: $downloader1.base_url
      output: /tmp/data/morgues
    - name: downloader3
      transform: morgues-download
      base_url: $downloader2.base_url
      output: /tmp/data/morgues
        """)

        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_job_steps.call_count == 1
        actual_steps = execute_job_steps.call_args_list[0][1].get('steps') or execute_job_steps.call_args_list[0][0][1]
        actual_base_urls = [step['base_url'] for step in actual_steps]
        assert actual_base_urls == ['http://example.com/data'] * 3
Ejemplo n.º 2
0
    def test_resolve_variable_previous_output_first_step(self, execute_job_steps, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: splitter
      transform: morgue-splitter
      morgues: $previous.output
      output: /data/output
        """)

        with pytest.raises(Exception) as exc_info:
            runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)
        assert str(exc_info.value) == 'Cannot use $previous placeholder on the first step'
Ejemplo n.º 3
0
    def test_run_app_temp_placeholder(self, tmpfile_mock, tmpdir_mock, execute_transform, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Named placeholder
data: /data
jobs:
  my-job:
    - transform: morgues-download
      some-file: $tmp.file
      output: $tmp.dir
        """)
        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_transform.call_count == 1, '`execute_transform` was called an unexpected number of times'
        actual_steps = [call[1].get('step') or call[0][0] for call in execute_transform.call_args_list]
        assert actual_steps == [
            {'transform': 'morgues-download', 'some-file': '/data/tmp/file', 'output': '/data/tmp/dir'},
        ]
Ejemplo n.º 4
0
    def test_resolve_variable_previous_output_no_previous_output(self, execute_job_steps, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
    - name: splitter
      transform: morgue-splitter
      morgues: $previous.output
      output: /data/output
        """)

        with pytest.raises(Exception) as exc_info:
            runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)
        assert str(exc_info.value) == "No property named \"output\" defined in previous step. Possible values are: ['name', 'transform', 'base_url']"
Ejemplo n.º 5
0
    def test_run_app_simple_job(self, execute_transform, app_manifest_simple, transforms_fixtures_path):
        manifest = parse_yaml(app_manifest_simple)
        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_transform.call_count == 1, '`execute_transform` was called an unexpected number of times'
        actual_steps = [call[1].get('step') or call[0][0] for call in execute_transform.call_args_list]
        actual_transforms = [call[1].get('transforms') or call[0][1] for call in execute_transform.call_args_list]
        actual_dryruns = [call[1].get('dryrun') or call[0][2] for call in execute_transform.call_args_list]

        assert actual_steps == [
            {'transform': 'download', 'base_url': 'http://example.com/data', 'throttle': 1000, 'output': '/tmp/data/morgues'}
        ]
        actual_transform = actual_transforms[0]
        assert all(actual_transform == p for p in actual_transforms), 'Each call to `execute_transform` should have passed the same transforms dict'
        assert sorted(actual_transform.keys()) == [
            'morgue-splitter', 'morgues-download', 'parser'
        ]
        assert all(dryrun == False for dryrun in actual_dryruns)
Ejemplo n.º 6
0
    def test_run_app_named_placeholders_value_key_not_found(self, execute_transform, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  my-job:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      throttle: 1000
      output: /tmp/data/morgues
    - transform: morgue-splitter
      morgues: $downloader.unknown  # unknown value name
      output: /tmp/data/splits
        """)

        with pytest.raises(AssertionError) as exc:
            runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert str(exc.value) == "Invalid placeholder: $downloader.unknown; valid option names include: ['base_url', 'name', 'output', 'throttle', 'transform']"
Ejemplo n.º 7
0
    def test_run_app_named_placeholders(self, execute_transform, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  my-job:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      throttle: 1000
      output: /tmp/data/morgues
    - transform: morgue-splitter
      morgues: $downloader.output  # this should be replaced with the first step's output value
      output: /tmp/data/splits
        """)
        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_transform.call_count == 2, '`execute_transform` was called an unexpected number of times'
        actual_steps = [call[1].get('step') or call[0][0] for call in execute_transform.call_args_list]
        assert actual_steps[1]['morgues'] == actual_steps[0]['output']
Ejemplo n.º 8
0
    def test_run_app_multiple_single_step_jobs(self, execute_transform, dryrun, app_manifest_multiple_single_step_jobs, transforms_fixtures_path):
        manifest = parse_yaml(app_manifest_multiple_single_step_jobs)
        runner.run_app(manifest, dryrun=dryrun, transforms_repo_path=transforms_fixtures_path)

        # TODO review this test, we might need to verify that execute_job_steps is called twice instead
        assert execute_transform.call_count == 2, '`execute_transform` was called an unexpected number of times'
        actual_steps = [call[1].get('step') or call[0][0] for call in execute_transform.call_args_list]
        actual_transforms = [call[1].get('transforms') or call[0][1] for call in execute_transform.call_args_list]
        actual_dryruns = [call[1].get('dryrun') or call[0][2] for call in execute_transform.call_args_list]

        assert actual_steps == [
            {'transform': 'download', 'base_url': 'http://example.com/data', 'throttle': 1000, 'output': '/tmp/data/morgues'},
            {'transform': 'splitter', 'morgues': '/tmp/data/morgues', 'output': '/tmp/data/splits'}
        ]
        actual_transform = actual_transforms[0]
        assert all(actual_transform == p for p in actual_transforms), 'Each call to `execute_transform` should have passed the same transforms dict'
        assert sorted(actual_transform.keys()) == [
            'morgue-splitter', 'morgues-download', 'parser'
        ]
        assert all(actual_dryrun == dryrun for actual_dryrun in actual_dryruns), 'Unexpected dryruns: {}'.format(list(actual_dryruns))
Ejemplo n.º 9
0
    def test_resolve_variable_previous_output(self, execute_job_steps, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      output: /some/path
    - name: splitter
      transform: morgue-splitter
      morgues: $previous.output
      output: /data/output
        """)

        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_job_steps.call_count == 1
        actual_steps = execute_job_steps.call_args_list[0][1].get('steps') or execute_job_steps.call_args_list[0][0][1]
        assert actual_steps[1]['morgues'] == actual_steps[0]['output']
Ejemplo n.º 10
0
    def test_resolve_variable_curley_braces(self, execute_job_steps, placeholder, resolved, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      output: /some/path
    - name: splitter
      transform: morgue-splitter
      morgues: morgues
      output: '{}'
        """.format(placeholder))

        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_job_steps.call_count == 1
        actual_steps = execute_job_steps.call_args_list[0][1].get('steps') or execute_job_steps.call_args_list[0][0][1]
        assert actual_steps[1]['output'] == resolved
Ejemplo n.º 11
0
    def test_run_app_named_placeholders_circular_reference(self, execute_job_steps, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      throttle: 1000
      output: $splitter.output
    - name: splitter
      transform: morgue-splitter
      morgues: morgues
      output: $downloader.output
        """)

        with pytest.raises(AssertionError) as exc:
            runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert str(exc.value) == "Invalid placeholder: $splitter; valid names include: []"
Ejemplo n.º 12
0
    def test_run_app_named_placeholders_reference_other_job(self, execute_transform, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      throttle: 1000
      output: /tmp/data/morgues
  job2:
    - transform: morgue-splitter
      morgues: $downloader.output  # not part of the same job
      output: /tmp/data/splits
        """)

        with pytest.raises(AssertionError) as exc:
            runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert str(exc.value) == "Invalid placeholder: $downloader; valid names include: []"
Ejemplo n.º 13
0
    def test_run_app_named_placeholders_reference_future_step(self, execute_transform, transforms_fixtures_path):
        manifest = parse_yaml("""
name: Single composed job manifest
data: /data
jobs:
  my-job:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      throttle: 1000
      output: $splitter.morgues  # cannot reference values from future steps
    - name: splitter
      transform: /tmp/data/morgues
      morgues: $downloader.unknown
      output: /tmp/data/splits
        """)

        with pytest.raises(AssertionError) as exc:
            runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert str(exc.value) == "Invalid placeholder: $splitter; valid names include: []"
Ejemplo n.º 14
0
    def test_resolve_variable_previous_output_variable(self, execute_job_steps, transforms_fixtures_path, tmpdir):
        data_path = str(tmpdir.mkdir('data'))
        manifest = parse_yaml("""
name: Single composed job manifest
data: {}
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      output: $tmp.dir
    - name: splitter
      transform: morgue-splitter
      morgues: $previous.output
      output: /data/output
        """.format(data_path))

        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_job_steps.call_count == 1
        actual_steps = execute_job_steps.call_args_list[0][1].get('steps') or execute_job_steps.call_args_list[0][0][1]
        assert actual_steps[1]['morgues'] == actual_steps[0]['output']
        assert actual_steps[0]['output'].startswith(data_path)
Ejemplo n.º 15
0
    def test_resolve_tmp_file(self, execute_job_steps, transforms_fixtures_path, tmpdir):
        data_path = str(tmpdir.mkdir('data'))
        manifest = parse_yaml("""
name: Single composed job manifest
data: {}
jobs:
  job1:
    - name: downloader
      transform: morgues-download
      base_url: http://example.com/data
      output: $tmp.file
    - name: splitter
      transform: morgue-splitter
      morgues: morgues
      output: $downloader.output
        """.format(data_path))

        runner.run_app(manifest, transforms_repo_path=transforms_fixtures_path)

        assert execute_job_steps.call_count == 1
        actual_steps = execute_job_steps.call_args_list[0][1].get('steps') or execute_job_steps.call_args_list[0][0][1]
        assert all(step['output'] == actual_steps[0]['output'] for step in actual_steps), 'Every tmp value should be the same value'
        assert actual_steps[0]['output'].startswith(data_path + '/tmp/')
        assert os.path.isfile(actual_steps[0]['output'])