def setUp(self): uploads_path = get_mountpoint('uploads')[0][1] # Create prep test file to point at self.update_fp = join(uploads_path, '1', 'update.txt') with open(self.update_fp, 'w') as f: f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""") self._files_to_remove = [self.update_fp] self._files_to_remove = [] # creating temporal files and artifact # NOTE: we don't need to remove the artifact created cause it's # used to test the delete functionality fd, fp = mkstemp(suffix='_seqs.fna') close(fd) with open(fp, 'w') as f: f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 " "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n" "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n") # 4 Demultiplexed filepaths_processed = [(fp, 4)] # 1 for default parameters and input data exp_params = Parameters.from_default_params(DefaultParameters(1), {'input_data': 1}) self.artifact = Artifact.create(filepaths_processed, "Demultiplexed", parents=[Artifact(1)], processing_parameters=exp_params)
def write_demux_files(self, prep_template, generate_hdf5=True): """Writes a demux test file to avoid duplication of code""" fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') if generate_hdf5: with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) else: with open(demux_fp, 'w') as f: f.write('') if prep_template.artifact is None: ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", prep_template=prep_template) else: params = Parameters.from_default_params( DefaultParameters(1), {'input_data': prep_template.artifact.id}) ppd = Artifact.create([(demux_fp, 6)], "Demultiplexed", parents=[prep_template.artifact], processing_parameters=params) return ppd
def test_generate_param_str(self): params = DefaultParameters(10) obs = generate_param_str(params) exp = ("<b>Reference:</b> Greengenes 13_8<br/>" "<b>similarity:</b> 0.97<br/>" "<b>sortmerna_e_value:</b> 1<br/>" "<b>sortmerna_max_pos:</b> 10000<br/>" "<b>threads:</b> 1<br/>" "<b>sortmerna_coverage:</b> 0.97") self.assertEqual(obs, exp)
def post(self): study_id = int(self.get_argument('study_id')) preprocessed_data_id = int(self.get_argument('preprocessed_data_id')) param_id = self.get_argument('parameter-set-%s' % preprocessed_data_id) parameters = Parameters.from_default_params( DefaultParameters(param_id), {'input_data': preprocessed_data_id}) job_id = plugin_submit(self.current_user, parameters) self.render('compute_wait.html', job_id=job_id, title='Processing', completion_redirect='/study/description/%d?top_tab=' 'preprocessed_data_tab&sub_tab=%s' % (study_id, preprocessed_data_id))
def post(self): study_id = int(self.get_argument('study_id')) prep_template_id = int(self.get_argument('prep_template_id')) raw_data = PrepTemplate(prep_template_id).artifact param_id = int(self.get_argument('preprocessing_parameters_id')) parameters = Parameters.from_default_params( DefaultParameters(param_id), {'input_data': raw_data.id}) job_id = plugin_submit(self.current_user, parameters) self.render('compute_wait.html', job_id=job_id, title='Preprocessing', completion_redirect='/study/description/%d?top_tab=' 'prep_template_tab&sub_tab=%s' % (study_id, prep_template_id))
def test_submit_to_EBI(self): # setting up test fna_fp = join(self.temp_dir, 'seqs.fna') demux_fp = join(self.temp_dir, 'demux.seqs') with open(fna_fp, 'w') as f: f.write(FASTA_EXAMPLE) with File(demux_fp, "w") as f: to_hdf5(fna_fp, f) pt = PrepTemplate(1) params = Parameters.from_default_params(DefaultParameters(1), {'input_data': pt.artifact.id}) artifact = Artifact.create([(demux_fp, 6)], "Demultiplexed", parents=[pt.artifact], processing_parameters=params) # submit job job = self._create_job('submit_to_EBI', { 'artifact': artifact.id, 'submission_type': 'VALIDATE' }) job._set_status('in_construction') job.submit() # wait for the job to fail, and check that the status is submitting checked_submitting = True while job.status != 'error': if checked_submitting: self.assertEqual('submitting', artifact.study.ebi_submission_status) checked_submitting = False # once it fails wait for a few to check status again sleep(5) exp = 'Some artifact submissions failed: %d' % artifact.id obs = artifact.study.ebi_submission_status self.assertEqual(obs, exp) # make sure that the error is correct, we have 2 options if environ.get('ASPERA_SCP_PASS', '') != '': self.assertIn('1.SKM2.640199', job.log.msg) else: self.assertIn('ASCP Error:', job.log.msg) # wait for everything to finish to avoid DB deadlocks sleep(5)
def workflow_handler_post_req(user_id, dflt_params_id, req_params): """Creates a new workflow in the system Parameters ---------- user_id : str The user creating the workflow dflt_params_id : int The default parameters to use for the first command of the workflow req_params : str JSON representations of the required parameters for the first command of the workflow Returns ------- dict of objects A dictionary containing the commands information {'status': str, 'message': str, 'workflow_id': int} """ dflt_params = DefaultParameters(dflt_params_id) req_params = loads(req_params) parameters = Parameters.from_default_params(dflt_params, req_params) wf = ProcessingWorkflow.from_scratch(User(user_id), parameters) # this is safe as we are creating the workflow for the first time and there # is only one node. Remember networkx doesn't assure order of nodes job = wf.graph.nodes()[0] inputs = [a.id for a in job.input_artifacts] job_cmd = job.command return { 'status': 'success', 'message': '', 'workflow_id': wf.id, 'job': { 'id': job.id, 'inputs': inputs, 'label': job_cmd.name, 'outputs': job_cmd.outputs } }
def workflow_handler_patch_req(req_op, req_path, req_value=None, req_from=None): """Patches a workflow Parameters ---------- req_op : str The operation to perform on the workflow req_path : str Path parameter with the workflow to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str: str} A dictionary of the form: {'status': str, 'message': str} in which status is the status of the request ('error' or 'success') and message is a human readable string with the error message in case that status is 'error'. """ if req_op == 'add': req_path = [v for v in req_path.split('/') if v] if len(req_path) != 1: return {'status': 'error', 'message': 'Incorrect path parameter'} req_path = req_path[0] try: wf = ProcessingWorkflow(req_path) except QiitaDBUnknownIDError: return { 'status': 'error', 'message': 'Workflow %s does not exist' % req_path } req_value = loads(req_value) dflt_params = DefaultParameters(req_value['dflt_params']) req_params = req_value.get('req_params', None) opt_params = req_value.get('opt_params', None) connections = { ProcessingJob(k): v for k, v in req_value['connections'].items() } job = wf.add(dflt_params, connections=connections, req_params=req_params, opt_params=opt_params) job_cmd = job.command return { 'status': 'success', 'message': '', 'job': { 'id': job.id, 'inputs': list(req_value['connections'].keys()), 'label': job_cmd.name, 'outputs': job_cmd.outputs } } elif req_op == 'remove': req_path = [v for v in req_path.split('/') if v] if len(req_path) != 2: return {'status': 'error', 'message': 'Incorrect path parameter'} wf_id = req_path[0] job_id = req_path[1] wf = ProcessingWorkflow(wf_id) job = ProcessingJob(job_id) wf.remove(job, cascade=True) return {'status': 'success', 'message': ''} else: return { 'status': 'error', 'message': 'Operation "%s" not supported. Current supported ' 'operations: add' % req_op }
def test_get_analysis_graph_handler(self): response = self.get('/analysis/description/1/graph/') self.assertEqual(response.code, 200) # The job id is randomly generated in the test environment. Gather # it here. There is only 1 job in the first artifact of the analysis job_id = Analysis(1).artifacts[0].jobs()[0].id obs = loads(response.body) exp = { 'edges': [[8, job_id], [job_id, 9]], 'nodes': [['job', 'job', job_id, 'Single Rarefaction', 'success'], ['artifact', 'BIOM', 9, 'noname\n(BIOM)', 'artifact'], ['artifact', 'BIOM', 8, 'noname\n(BIOM)', 'artifact']], 'workflow': None } self.assertItemsEqual(obs, exp) self.assertItemsEqual(obs['edges'], exp['edges']) self.assertItemsEqual(obs['nodes'], exp['nodes']) self.assertIsNone(obs['workflow']) # Create a new analysis with 2 starting BIOMs to be able to test # the different if statements of the request BaseHandler.get_current_user = Mock( return_value=User('*****@*****.**')) user = User('*****@*****.**') dflt_analysis = user.default_analysis dflt_analysis.add_samples({ 4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'], 6: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'] }) args = {'name': 'New Test Graph Analysis', 'description': 'Desc'} response = self.post('/analysis/create/', args) new_id = response.effective_url.split('/')[-2] a = Analysis(new_id) # Wait until all the jobs are done so the BIOM tables exist for j in a.jobs: wait_for_processing_job(j.id) artifacts = a.artifacts self.assertEqual(len(artifacts), 2) # Create a new workflow starting on the first artifact # Magic number 9 -> Summarize Taxa command params = Parameters.load(Command(9), values_dict={ 'metadata_category': 'None', 'sort': 'False', 'biom_table': artifacts[0].id }) wf = ProcessingWorkflow.from_scratch(user, params) # There is only one job in the workflow job_id = wf.graph.nodes()[0].id response = self.get('/analysis/description/%s/graph/' % new_id) self.assertEqual(response.code, 200) obs = loads(response.body) exp = { 'edges': [[artifacts[0].id, job_id], [job_id, '%s:taxa_summary' % job_id]], 'nodes': [['job', 'job', job_id, 'Summarize Taxa', 'in_construction'], [ 'artifact', 'BIOM', artifacts[0].id, 'noname\n(BIOM)', 'artifact' ], [ 'artifact', 'BIOM', artifacts[1].id, 'noname\n(BIOM)', 'artifact' ], [ 'type', 'taxa_summary', '%s:taxa_summary' % job_id, 'taxa_summary\n(taxa_summary)', 'type' ]], 'workflow': wf.id } # Check that the keys are the same self.assertItemsEqual(obs, exp) # Check the edges self.assertItemsEqual(obs['edges'], exp['edges']) # Check the edges self.assertItemsEqual(obs['nodes'], exp['nodes']) # Check the edges self.assertEqual(obs['workflow'], exp['workflow']) # Add a job to the second BIOM to make sure that the edges and nodes # are respected. Magic number 12 -> Single Rarefaction job2 = wf.add(DefaultParameters(16), req_params={ 'depth': '100', 'biom_table': artifacts[1].id }) job_id_2 = job2.id response = self.get('/analysis/description/%s/graph/' % new_id) self.assertEqual(response.code, 200) obs = loads(response.body) exp = { 'edges': [[artifacts[0].id, job_id], [job_id, '%s:taxa_summary' % job_id], [artifacts[1].id, job_id_2], [job_id_2, '%s:rarefied_table' % job_id_2]], 'nodes': [['job', 'job', job_id, 'Summarize Taxa', 'in_construction'], ['job', 'job', job_id_2, 'Single Rarefaction', 'in_construction'], [ 'artifact', 'BIOM', artifacts[0].id, 'noname\n(BIOM)', 'artifact' ], [ 'artifact', 'BIOM', artifacts[1].id, 'noname\n(BIOM)', 'artifact' ], [ 'type', 'taxa_summary', '%s:taxa_summary' % job_id, 'taxa_summary\n(taxa_summary)', 'type' ], [ 'type', 'BIOM', '%s:rarefied_table' % job_id_2, 'rarefied_table\n(BIOM)', 'type' ]], 'workflow': wf.id } # Check that the keys are the same self.assertItemsEqual(obs, exp) # Check the edges self.assertItemsEqual(obs['edges'], exp['edges']) # Check the edges self.assertItemsEqual(obs['nodes'], exp['nodes']) # Check the edges self.assertEqual(obs['workflow'], exp['workflow']) # Add a second Workflow to the second artifact to force the raise of # the error. This situation should never happen when using # the interface wf.remove(job2) params = Parameters.load(Command(9), values_dict={ 'metadata_category': 'None', 'sort': 'False', 'biom_table': artifacts[1].id }) wf = ProcessingWorkflow.from_scratch(user, params) response = self.get('/analysis/description/%s/graph/' % new_id) self.assertEqual(response.code, 500)