def testTasksFileToJobData(self): testpath = os.path.dirname(__file__) expected_tsv_file = os.path.join(testpath, '../testdata/params_tasks.tsv') input_file_param_util = param_util.InputFileParamUtil('input') output_file_param_util = param_util.OutputFileParamUtil('output') all_job_data = param_util.tasks_file_to_job_data( {'path': expected_tsv_file}, input_file_param_util, output_file_param_util) self.assertEqual(4, len(all_job_data)) for i in range(4): job_data = all_job_data[i] env = job_data['envs'].pop() input = job_data['inputs'].pop() output = job_data['outputs'].pop() self.assertEqual('SAMPLE_ID', env.name) self.assertEqual('sid-00%d' % i, env.value) self.assertEqual('VCF_FILE', input.name) self.assertEqual('input/gs/inputs/sid-00%d.vcf' % i, input.docker_path) self.assertEqual('OUTPUT_PATH', output.name) self.assertEqual('output/gs/outputs/results-00%d/' % i, output.docker_path)
def testTasksFileToTaskDescriptors(self): testpath = os.path.dirname(__file__) expected_tsv_file = os.path.join(testpath, '../testdata/params_tasks.tsv') input_file_param_util = param_util.InputFileParamUtil('input') output_file_param_util = param_util.OutputFileParamUtil('output') all_task_descriptors = param_util.tasks_file_to_task_descriptors( {'path': expected_tsv_file}, 0, input_file_param_util, output_file_param_util) self.assertEqual(4, len(all_task_descriptors)) for i in range(4): task_params = all_task_descriptors[i].task_params task_env = task_params['envs'].pop() task_input = task_params['inputs'].pop() task_output = task_params['outputs'].pop() self.assertEqual('SAMPLE_ID', task_env.name) self.assertEqual('sid-00%d' % i, task_env.value) self.assertEqual('VCF_FILE', task_input.name) self.assertEqual('input/gs/inputs/sid-00%d.vcf' % i, task_input.docker_path) self.assertEqual('OUTPUT_PATH', task_output.name) self.assertEqual('output/gs/outputs/results-00%d/' % i, task_output.docker_path)
def test_input_file_docker_rewrite(self, _, recursive, uri, docker, provider): docker = os.path.join('input', docker) file_param_util = param_util.InputFileParamUtil('input') param = file_param_util.make_param('TEST', uri, recursive) self.assertIsInstance(param, param_util.InputFileParam) self.assertEqual('TEST', param.name) self.assertEqual(docker, param.docker_path) self.assertEqual(provider, param.file_provider)
def test_uri_rewrite_in(self, _, recursive, uri_raw, path, bn, provider): # perpare the path if local. if provider == PL: path = os.path.abspath(path).rstrip('/') + '/' in_util = param_util.InputFileParamUtil('') in_param = in_util.make_param('TEST', uri_raw, recursive=recursive) self.assertEqual(path, in_param.uri.path) self.assertEqual(bn, in_param.uri.basename) self.assertEqual(path + bn, in_param.uri) self.assertEqual(provider, in_param.file_provider)
def test_uri_rewrite_in(self, unused_name, recursive, uri_raw, path, bn, provider): del unused_name if provider == param_util.P_LOCAL: path = os.path.abspath(path).rstrip('/') + '/' in_util = param_util.InputFileParamUtil('') in_param = in_util.make_param('TEST', uri_raw, recursive=recursive) self.assertEqual(path, in_param.uri.path) self.assertEqual(bn, in_param.uri.basename) self.assertEqual(path + bn, in_param.uri) self.assertEqual(provider, in_param.file_provider)
def testTasksFileToJobData(self): expected_tsv_file = 'test/testdata/params_tasks.tsv' input_file_param_util = param_util.InputFileParamUtil('input') output_file_param_util = param_util.OutputFileParamUtil('output') all_job_data = param_util.tasks_file_to_job_data( {'path': expected_tsv_file}, input_file_param_util, output_file_param_util) self.assertEqual(4, len(all_job_data)) for i in range(4): job_data = all_job_data[i] self.assertEqual('SAMPLE_ID', job_data['envs'][0].name) self.assertEqual('sid-00%d' % i, job_data['envs'][0].value) self.assertEqual('VCF_FILE', job_data['inputs'][0].name) self.assertEqual('input/gs/inputs/sid-00%d.vcf' % i, job_data['inputs'][0].docker_path) self.assertEqual('OUTPUT_PATH', job_data['outputs'][0].name) self.assertEqual('output/gs/outputs/results-00%d/' % i, job_data['outputs'][0].docker_path)
def testParseTasksFileHeader(self): header = '--env SAMPLE_ID\t--input VCF_FILE\t--output-recursive OUTPUT_PATH' header = header.split('\t') input_file_param_util = param_util.InputFileParamUtil('input') output_file_param_util = param_util.OutputFileParamUtil('output') job_params = param_util.parse_tasks_file_header( header, input_file_param_util, output_file_param_util) self.assertEqual(3, len(job_params)) # The first one is the SAMPLE env param. self.assertTrue(isinstance(job_params[0], param_util.EnvParam)) self.assertEqual('SAMPLE_ID', job_params[0].name) self.assertTrue(isinstance(job_params[1], param_util.InputFileParam)) self.assertEqual('VCF_FILE', job_params[1].name) self.assertFalse(job_params[1].recursive) self.assertTrue(isinstance(job_params[2], param_util.OutputFileParam)) self.assertEqual('OUTPUT_PATH', job_params[2].name) self.assertTrue(job_params[2].recursive)
def dsub_start_job(command, job_name=None, envs=None, labels=None, inputs=None, inputs_recursive=None, outputs=None, outputs_recursive=None, wait=False): envs = envs or {} labels = labels or {} inputs = inputs or {} inputs_recursive = inputs_recursive or {} outputs = outputs or {} outputs_recursive = outputs_recursive or {} labels['test-token'] = test_setup.TEST_TOKEN labels['test-name'] = test_setup.TEST_NAME logging = param_util.build_logging_param(test.LOGGING) job_resources = job_model.Resources(image='ubuntu', logging=logging, zones=['us-central1-*']) env_data = {job_model.EnvParam(k, v) for (k, v) in envs.items()} label_data = {job_model.LabelParam(k, v) for (k, v) in labels.items()} input_file_param_util = param_util.InputFileParamUtil('input') input_data = set() for (recursive, items) in ((False, inputs.items()), (True, inputs_recursive.items())): for (name, value) in items: name = input_file_param_util.get_variable_name(name) input_data.add( input_file_param_util.make_param(name, value, recursive)) output_file_param_util = param_util.OutputFileParamUtil('output') output_data = set() for (recursive, items) in ((False, outputs.items()), (True, outputs_recursive.items())): for (name, value) in items: name = output_file_param_util.get_variable_name(name) output_data.add( output_file_param_util.make_param(name, value, recursive)) job_params = { 'envs': env_data, 'inputs': input_data, 'outputs': output_data, 'labels': label_data, } task_descriptors = [ job_model.TaskDescriptor({'task-id': None}, { 'envs': set(), 'labels': set(), 'inputs': set(), 'outputs': set(), }, job_model.Resources()) ] return dsub.run(get_dsub_provider(), job_resources, job_params, task_descriptors, name=job_name, command=command, wait=wait, disable_warning=True)
def start_job(self, command, name=None, envs={}, labels={}, inputs={}, inputs_recursive={}, outputs={}, outputs_recursive={}, task_count=1, wait=False): logging = param_util.build_logging_param(self.log_path) resources = job_model.Resources(image=DOCKER_IMAGE, logging=logging, zones=['us-central1*']) env_data = {param_util.EnvParam(k, v) for (k, v) in envs.items()} label_data = { job_model.LabelParam(k, v) for (k, v) in labels.items() } # This is mostly an extraction dsubs argument parsing here: # https://github.com/googlegenomics/dsub/blob/master/dsub/lib/param_util.py#L720 # Reworked it to handle dictionaries rather than a list of items # of the form 'key=val' input_file_param_util = param_util.InputFileParamUtil('input') input_data = set() for (recursive, items) in ((False, inputs.items()), (True, inputs_recursive.items())): for (name, value) in items: name = input_file_param_util.get_variable_name(name) input_data.add( input_file_param_util.make_param( name, value, recursive)) output_file_param_util = param_util.OutputFileParamUtil('output') output_data = set() for (recursive, items) in ((False, outputs.items()), (True, outputs_recursive.items())): for (name, value) in items: name = output_file_param_util.get_variable_name(name) output_data.add( output_file_param_util.make_param( name, value, recursive)) job_params = { 'envs': env_data, 'inputs': input_data, 'outputs': output_data, 'labels': label_data, } if task_count > 1: task_descriptors = [ job_model.TaskDescriptor({'task-id': i + 1}, { 'envs': env_data, 'inputs': input_data, 'outputs': output_data, 'labels': label_data, }, job_model.Resources()) for i in xrange(task_count) ] all_task_data = [{ 'task-id': i + 1 } for i in xrange(task_count)] else: task_descriptors = [ job_model.TaskDescriptor({'task-id': None}, { 'labels': set(), 'envs': set(), 'inputs': set(), 'outputs': set() }, job_model.Resources()) ] return execute_redirect_stdout( lambda: dsub.run(self.provider, resources, job_params, task_descriptors, name=name, command=command, wait=wait, disable_warning=True))