Example #1
0
    def testTasksFileToJobData(self):
        testpath = os.path.dirname(__file__)
        expected_tsv_file = os.path.join(testpath,
                                         '../testdata/params_tasks.tsv')
        input_file_param_util = param_util.InputFileParamUtil('input')
        output_file_param_util = param_util.OutputFileParamUtil('output')
        all_job_data = param_util.tasks_file_to_job_data(
            {'path': expected_tsv_file}, input_file_param_util,
            output_file_param_util)
        self.assertEqual(4, len(all_job_data))

        for i in range(4):
            job_data = all_job_data[i]
            env = job_data['envs'].pop()
            input = job_data['inputs'].pop()
            output = job_data['outputs'].pop()

            self.assertEqual('SAMPLE_ID', env.name)
            self.assertEqual('sid-00%d' % i, env.value)
            self.assertEqual('VCF_FILE', input.name)
            self.assertEqual('input/gs/inputs/sid-00%d.vcf' % i,
                             input.docker_path)
            self.assertEqual('OUTPUT_PATH', output.name)
            self.assertEqual('output/gs/outputs/results-00%d/' % i,
                             output.docker_path)
Example #2
0
    def testTasksFileToTaskDescriptors(self):
        testpath = os.path.dirname(__file__)
        expected_tsv_file = os.path.join(testpath,
                                         '../testdata/params_tasks.tsv')
        input_file_param_util = param_util.InputFileParamUtil('input')
        output_file_param_util = param_util.OutputFileParamUtil('output')
        all_task_descriptors = param_util.tasks_file_to_task_descriptors(
            {'path': expected_tsv_file}, 0, input_file_param_util,
            output_file_param_util)
        self.assertEqual(4, len(all_task_descriptors))

        for i in range(4):
            task_params = all_task_descriptors[i].task_params
            task_env = task_params['envs'].pop()
            task_input = task_params['inputs'].pop()
            task_output = task_params['outputs'].pop()

            self.assertEqual('SAMPLE_ID', task_env.name)
            self.assertEqual('sid-00%d' % i, task_env.value)
            self.assertEqual('VCF_FILE', task_input.name)
            self.assertEqual('input/gs/inputs/sid-00%d.vcf' % i,
                             task_input.docker_path)
            self.assertEqual('OUTPUT_PATH', task_output.name)
            self.assertEqual('output/gs/outputs/results-00%d/' % i,
                             task_output.docker_path)
Example #3
0
 def test_out_file_docker_rewrite(self, _, recursive, uri, docker,
                                  provider):
     docker = os.path.join('output', docker)
     file_param_util = param_util.OutputFileParamUtil('output')
     param = file_param_util.make_param('TEST', uri, recursive)
     self.assertIsInstance(param, param_util.OutputFileParam)
     self.assertEqual('TEST', param.name)
     self.assertEqual(docker, param.docker_path)
     self.assertEqual(provider, param.file_provider)
Example #4
0
 def test_uri_rewrite_out(self, _, recursive, raw_uri, path, bn, provider):
     # perpare the path if local.
     if provider == PL:
         path = os.path.abspath(path).rstrip('/') + '/'
     out_util = param_util.OutputFileParamUtil('')
     out_param = out_util.make_param('TEST', raw_uri, recursive=recursive)
     self.assertEqual(path, out_param.uri.path)
     self.assertEqual(bn, out_param.uri.basename)
     self.assertEqual(path + bn, out_param.uri)
     self.assertEqual(provider, out_param.file_provider)
Example #5
0
 def test_uri_rewrite_out(self, unused_name, recursive, raw_uri, path, bn,
                          provider):
     del unused_name
     if provider == param_util.P_LOCAL:
         path = os.path.abspath(path).rstrip('/') + '/'
     out_util = param_util.OutputFileParamUtil('')
     out_param = out_util.make_param('TEST', raw_uri, recursive=recursive)
     self.assertEqual(path, out_param.uri.path)
     self.assertEqual(bn, out_param.uri.basename)
     self.assertEqual(path + bn, out_param.uri)
     self.assertEqual(provider, out_param.file_provider)
Example #6
0
    def testTasksFileToJobData(self):
        expected_tsv_file = 'test/testdata/params_tasks.tsv'
        input_file_param_util = param_util.InputFileParamUtil('input')
        output_file_param_util = param_util.OutputFileParamUtil('output')
        all_job_data = param_util.tasks_file_to_job_data(
            {'path': expected_tsv_file}, input_file_param_util,
            output_file_param_util)
        self.assertEqual(4, len(all_job_data))

        for i in range(4):
            job_data = all_job_data[i]
            self.assertEqual('SAMPLE_ID', job_data['envs'][0].name)
            self.assertEqual('sid-00%d' % i, job_data['envs'][0].value)
            self.assertEqual('VCF_FILE', job_data['inputs'][0].name)
            self.assertEqual('input/gs/inputs/sid-00%d.vcf' % i,
                             job_data['inputs'][0].docker_path)
            self.assertEqual('OUTPUT_PATH', job_data['outputs'][0].name)
            self.assertEqual('output/gs/outputs/results-00%d/' % i,
                             job_data['outputs'][0].docker_path)
Example #7
0
    def testParseTasksFileHeader(self):
        header = '--env SAMPLE_ID\t--input VCF_FILE\t--output-recursive OUTPUT_PATH'
        header = header.split('\t')
        input_file_param_util = param_util.InputFileParamUtil('input')
        output_file_param_util = param_util.OutputFileParamUtil('output')
        job_params = param_util.parse_tasks_file_header(
            header, input_file_param_util, output_file_param_util)
        self.assertEqual(3, len(job_params))

        # The first one is the SAMPLE env param.
        self.assertTrue(isinstance(job_params[0], param_util.EnvParam))
        self.assertEqual('SAMPLE_ID', job_params[0].name)

        self.assertTrue(isinstance(job_params[1], param_util.InputFileParam))
        self.assertEqual('VCF_FILE', job_params[1].name)
        self.assertFalse(job_params[1].recursive)

        self.assertTrue(isinstance(job_params[2], param_util.OutputFileParam))
        self.assertEqual('OUTPUT_PATH', job_params[2].name)
        self.assertTrue(job_params[2].recursive)
Example #8
0
def dsub_start_job(command,
                   job_name=None,
                   envs=None,
                   labels=None,
                   inputs=None,
                   inputs_recursive=None,
                   outputs=None,
                   outputs_recursive=None,
                   wait=False):

    envs = envs or {}
    labels = labels or {}
    inputs = inputs or {}
    inputs_recursive = inputs_recursive or {}
    outputs = outputs or {}
    outputs_recursive = outputs_recursive or {}

    labels['test-token'] = test_setup.TEST_TOKEN
    labels['test-name'] = test_setup.TEST_NAME

    logging = param_util.build_logging_param(test.LOGGING)
    job_resources = job_model.Resources(image='ubuntu',
                                        logging=logging,
                                        zones=['us-central1-*'])

    env_data = {job_model.EnvParam(k, v) for (k, v) in envs.items()}
    label_data = {job_model.LabelParam(k, v) for (k, v) in labels.items()}

    input_file_param_util = param_util.InputFileParamUtil('input')
    input_data = set()
    for (recursive, items) in ((False, inputs.items()),
                               (True, inputs_recursive.items())):
        for (name, value) in items:
            name = input_file_param_util.get_variable_name(name)
            input_data.add(
                input_file_param_util.make_param(name, value, recursive))

    output_file_param_util = param_util.OutputFileParamUtil('output')
    output_data = set()
    for (recursive, items) in ((False, outputs.items()),
                               (True, outputs_recursive.items())):
        for (name, value) in items:
            name = output_file_param_util.get_variable_name(name)
            output_data.add(
                output_file_param_util.make_param(name, value, recursive))

    job_params = {
        'envs': env_data,
        'inputs': input_data,
        'outputs': output_data,
        'labels': label_data,
    }
    task_descriptors = [
        job_model.TaskDescriptor({'task-id': None}, {
            'envs': set(),
            'labels': set(),
            'inputs': set(),
            'outputs': set(),
        }, job_model.Resources())
    ]

    return dsub.run(get_dsub_provider(),
                    job_resources,
                    job_params,
                    task_descriptors,
                    name=job_name,
                    command=command,
                    wait=wait,
                    disable_warning=True)
Example #9
0
 def test_file_provider_err(self, unused_name, uri, regex):
     del unused_name
     file_param_util = param_util.OutputFileParamUtil('output')
     with self.assertRaisesRegexp(ValueError, regex):
         file_param_util.parse_file_provider(uri)
Example #10
0
 def test_output_val_err(self, unused_name, recursive, uri, regex):
     del unused_name
     file_param_util = param_util.OutputFileParamUtil('output')
     with self.assertRaisesRegexp(ValueError, regex):
         file_param_util.parse_uri(uri, recursive)
Example #11
0
        def start_job(self,
                      command,
                      name=None,
                      envs={},
                      labels={},
                      inputs={},
                      inputs_recursive={},
                      outputs={},
                      outputs_recursive={},
                      task_count=1,
                      wait=False):
            logging = param_util.build_logging_param(self.log_path)
            resources = job_model.Resources(image=DOCKER_IMAGE,
                                            logging=logging,
                                            zones=['us-central1*'])

            env_data = {param_util.EnvParam(k, v) for (k, v) in envs.items()}
            label_data = {
                job_model.LabelParam(k, v)
                for (k, v) in labels.items()
            }

            # This is mostly an extraction dsubs argument parsing here:
            # https://github.com/googlegenomics/dsub/blob/master/dsub/lib/param_util.py#L720
            # Reworked it to handle dictionaries rather than a list of items
            # of the form 'key=val'
            input_file_param_util = param_util.InputFileParamUtil('input')
            input_data = set()
            for (recursive, items) in ((False, inputs.items()),
                                       (True, inputs_recursive.items())):
                for (name, value) in items:
                    name = input_file_param_util.get_variable_name(name)
                    input_data.add(
                        input_file_param_util.make_param(
                            name, value, recursive))

            output_file_param_util = param_util.OutputFileParamUtil('output')
            output_data = set()
            for (recursive, items) in ((False, outputs.items()),
                                       (True, outputs_recursive.items())):
                for (name, value) in items:
                    name = output_file_param_util.get_variable_name(name)
                    output_data.add(
                        output_file_param_util.make_param(
                            name, value, recursive))

            job_params = {
                'envs': env_data,
                'inputs': input_data,
                'outputs': output_data,
                'labels': label_data,
            }

            if task_count > 1:
                task_descriptors = [
                    job_model.TaskDescriptor({'task-id': i + 1}, {
                        'envs': env_data,
                        'inputs': input_data,
                        'outputs': output_data,
                        'labels': label_data,
                    }, job_model.Resources()) for i in xrange(task_count)
                ]
                all_task_data = [{
                    'task-id': i + 1
                } for i in xrange(task_count)]
            else:
                task_descriptors = [
                    job_model.TaskDescriptor({'task-id': None}, {
                        'labels': set(),
                        'envs': set(),
                        'inputs': set(),
                        'outputs': set()
                    }, job_model.Resources())
                ]

            return execute_redirect_stdout(
                lambda: dsub.run(self.provider,
                                 resources,
                                 job_params,
                                 task_descriptors,
                                 name=name,
                                 command=command,
                                 wait=wait,
                                 disable_warning=True))