Beispiel #1
0
    def setUp(self):
        uploads_path = get_mountpoint('uploads')[0][1]
        # Create prep test file to point at
        self.update_fp = join(uploads_path, '1', 'update.txt')
        with open(self.update_fp, 'w') as f:
            f.write("""sample_name\tnew_col\n1.SKD6.640190\tnew_value\n""")

        self._files_to_remove = [self.update_fp]
        self._files_to_remove = []

        # creating temporal files and artifact
        # NOTE: we don't need to remove the artifact created cause it's
        # used to test the delete functionality
        fd, fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        with open(fp, 'w') as f:
            f.write(">1.sid_r4_0 M02034:17:000000000-A5U18:1:1101:15370:1394 "
                    "1:N:0:1 orig_bc=CATGAGCT new_bc=CATGAGCT bc_diffs=0\n"
                    "GTGTGCCAGCAGCCGCGGTAATACGTAGGG\n")
        # 4 Demultiplexed
        filepaths_processed = [(fp, 4)]
        # 1 for default parameters and input data
        exp_params = Parameters.from_default_params(DefaultParameters(1),
                                                    {'input_data': 1})
        self.artifact = Artifact.create(filepaths_processed, "Demultiplexed",
                                        parents=[Artifact(1)],
                                        processing_parameters=exp_params)
Beispiel #2
0
    def write_demux_files(self, prep_template, generate_hdf5=True):
        """Writes a demux test file to avoid duplication of code"""
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        if generate_hdf5:
            with open(fna_fp, 'w') as f:
                f.write(FASTA_EXAMPLE)
            with File(demux_fp, "w") as f:
                to_hdf5(fna_fp, f)
        else:
            with open(demux_fp, 'w') as f:
                f.write('')

        if prep_template.artifact is None:
            ppd = Artifact.create([(demux_fp, 6)],
                                  "Demultiplexed",
                                  prep_template=prep_template)
        else:
            params = Parameters.from_default_params(
                DefaultParameters(1),
                {'input_data': prep_template.artifact.id})
            ppd = Artifact.create([(demux_fp, 6)],
                                  "Demultiplexed",
                                  parents=[prep_template.artifact],
                                  processing_parameters=params)
        return ppd
Beispiel #3
0
 def test_generate_param_str(self):
     params = DefaultParameters(10)
     obs = generate_param_str(params)
     exp = ("<b>Reference:</b> Greengenes 13_8<br/>"
            "<b>similarity:</b> 0.97<br/>"
            "<b>sortmerna_e_value:</b> 1<br/>"
            "<b>sortmerna_max_pos:</b> 10000<br/>"
            "<b>threads:</b> 1<br/>"
            "<b>sortmerna_coverage:</b> 0.97")
     self.assertEqual(obs, exp)
Beispiel #4
0
    def post(self):
        study_id = int(self.get_argument('study_id'))
        preprocessed_data_id = int(self.get_argument('preprocessed_data_id'))
        param_id = self.get_argument('parameter-set-%s' % preprocessed_data_id)

        parameters = Parameters.from_default_params(
            DefaultParameters(param_id), {'input_data': preprocessed_data_id})
        job_id = plugin_submit(self.current_user, parameters)

        self.render('compute_wait.html',
                    job_id=job_id,
                    title='Processing',
                    completion_redirect='/study/description/%d?top_tab='
                    'preprocessed_data_tab&sub_tab=%s' %
                    (study_id, preprocessed_data_id))
Beispiel #5
0
    def post(self):
        study_id = int(self.get_argument('study_id'))
        prep_template_id = int(self.get_argument('prep_template_id'))
        raw_data = PrepTemplate(prep_template_id).artifact
        param_id = int(self.get_argument('preprocessing_parameters_id'))

        parameters = Parameters.from_default_params(
            DefaultParameters(param_id), {'input_data': raw_data.id})

        job_id = plugin_submit(self.current_user, parameters)

        self.render('compute_wait.html',
                    job_id=job_id,
                    title='Preprocessing',
                    completion_redirect='/study/description/%d?top_tab='
                    'prep_template_tab&sub_tab=%s' %
                    (study_id, prep_template_id))
Beispiel #6
0
    def test_submit_to_EBI(self):
        # setting up test
        fna_fp = join(self.temp_dir, 'seqs.fna')
        demux_fp = join(self.temp_dir, 'demux.seqs')
        with open(fna_fp, 'w') as f:
            f.write(FASTA_EXAMPLE)
        with File(demux_fp, "w") as f:
            to_hdf5(fna_fp, f)

        pt = PrepTemplate(1)
        params = Parameters.from_default_params(DefaultParameters(1),
                                                {'input_data': pt.artifact.id})
        artifact = Artifact.create([(demux_fp, 6)],
                                   "Demultiplexed",
                                   parents=[pt.artifact],
                                   processing_parameters=params)

        # submit job
        job = self._create_job('submit_to_EBI', {
            'artifact': artifact.id,
            'submission_type': 'VALIDATE'
        })
        job._set_status('in_construction')
        job.submit()

        # wait for the job to fail, and check that the status is submitting
        checked_submitting = True
        while job.status != 'error':
            if checked_submitting:
                self.assertEqual('submitting',
                                 artifact.study.ebi_submission_status)
                checked_submitting = False
        # once it fails wait for a few to check status again
        sleep(5)
        exp = 'Some artifact submissions failed: %d' % artifact.id
        obs = artifact.study.ebi_submission_status
        self.assertEqual(obs, exp)
        # make sure that the error is correct, we have 2 options
        if environ.get('ASPERA_SCP_PASS', '') != '':
            self.assertIn('1.SKM2.640199', job.log.msg)
        else:
            self.assertIn('ASCP Error:', job.log.msg)
        # wait for everything to finish to avoid DB deadlocks
        sleep(5)
Beispiel #7
0
def workflow_handler_post_req(user_id, dflt_params_id, req_params):
    """Creates a new workflow in the system

    Parameters
    ----------
    user_id : str
        The user creating the workflow
    dflt_params_id : int
        The default parameters to use for the first command of the workflow
    req_params : str
        JSON representations of the required parameters for the first
        command of the workflow

    Returns
    -------
    dict of objects
        A dictionary containing the commands information
        {'status': str,
         'message': str,
         'workflow_id': int}
    """
    dflt_params = DefaultParameters(dflt_params_id)
    req_params = loads(req_params)
    parameters = Parameters.from_default_params(dflt_params, req_params)
    wf = ProcessingWorkflow.from_scratch(User(user_id), parameters)
    # this is safe as we are creating the workflow for the first time and there
    # is only one node. Remember networkx doesn't assure order of nodes
    job = wf.graph.nodes()[0]
    inputs = [a.id for a in job.input_artifacts]
    job_cmd = job.command
    return {
        'status': 'success',
        'message': '',
        'workflow_id': wf.id,
        'job': {
            'id': job.id,
            'inputs': inputs,
            'label': job_cmd.name,
            'outputs': job_cmd.outputs
        }
    }
Beispiel #8
0
def workflow_handler_patch_req(req_op,
                               req_path,
                               req_value=None,
                               req_from=None):
    """Patches a workflow

    Parameters
    ----------
    req_op : str
        The operation to perform on the workflow
    req_path : str
        Path parameter with the workflow to patch
    req_value : str, optional
        The value that needs to be modified
    req_from : str, optional
        The original path of the element

    Returns
    -------
    dict of {str: str}
        A dictionary of the form: {'status': str, 'message': str} in which
        status is the status of the request ('error' or 'success') and message
        is a human readable string with the error message in case that status
        is 'error'.
    """
    if req_op == 'add':
        req_path = [v for v in req_path.split('/') if v]
        if len(req_path) != 1:
            return {'status': 'error', 'message': 'Incorrect path parameter'}
        req_path = req_path[0]
        try:
            wf = ProcessingWorkflow(req_path)
        except QiitaDBUnknownIDError:
            return {
                'status': 'error',
                'message': 'Workflow %s does not exist' % req_path
            }

        req_value = loads(req_value)
        dflt_params = DefaultParameters(req_value['dflt_params'])
        req_params = req_value.get('req_params', None)
        opt_params = req_value.get('opt_params', None)
        connections = {
            ProcessingJob(k): v
            for k, v in req_value['connections'].items()
        }
        job = wf.add(dflt_params,
                     connections=connections,
                     req_params=req_params,
                     opt_params=opt_params)
        job_cmd = job.command
        return {
            'status': 'success',
            'message': '',
            'job': {
                'id': job.id,
                'inputs': list(req_value['connections'].keys()),
                'label': job_cmd.name,
                'outputs': job_cmd.outputs
            }
        }
    elif req_op == 'remove':
        req_path = [v for v in req_path.split('/') if v]
        if len(req_path) != 2:
            return {'status': 'error', 'message': 'Incorrect path parameter'}
        wf_id = req_path[0]
        job_id = req_path[1]
        wf = ProcessingWorkflow(wf_id)
        job = ProcessingJob(job_id)
        wf.remove(job, cascade=True)
        return {'status': 'success', 'message': ''}
    else:
        return {
            'status':
            'error',
            'message':
            'Operation "%s" not supported. Current supported '
            'operations: add' % req_op
        }
Beispiel #9
0
    def test_get_analysis_graph_handler(self):
        response = self.get('/analysis/description/1/graph/')
        self.assertEqual(response.code, 200)
        # The job id is randomly generated in the test environment. Gather
        # it here. There is only 1 job in the first artifact of the analysis
        job_id = Analysis(1).artifacts[0].jobs()[0].id
        obs = loads(response.body)
        exp = {
            'edges': [[8, job_id], [job_id, 9]],
            'nodes': [['job', 'job', job_id, 'Single Rarefaction', 'success'],
                      ['artifact', 'BIOM', 9, 'noname\n(BIOM)', 'artifact'],
                      ['artifact', 'BIOM', 8, 'noname\n(BIOM)', 'artifact']],
            'workflow':
            None
        }
        self.assertItemsEqual(obs, exp)
        self.assertItemsEqual(obs['edges'], exp['edges'])
        self.assertItemsEqual(obs['nodes'], exp['nodes'])
        self.assertIsNone(obs['workflow'])

        # Create a new analysis with 2 starting BIOMs to be able to test
        # the different if statements of the request
        BaseHandler.get_current_user = Mock(
            return_value=User('*****@*****.**'))
        user = User('*****@*****.**')
        dflt_analysis = user.default_analysis
        dflt_analysis.add_samples({
            4: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196'],
            6: ['1.SKB8.640193', '1.SKD8.640184', '1.SKB7.640196']
        })
        args = {'name': 'New Test Graph Analysis', 'description': 'Desc'}
        response = self.post('/analysis/create/', args)
        new_id = response.effective_url.split('/')[-2]
        a = Analysis(new_id)
        # Wait until all the jobs are done so the BIOM tables exist
        for j in a.jobs:
            wait_for_processing_job(j.id)

        artifacts = a.artifacts
        self.assertEqual(len(artifacts), 2)

        # Create a new workflow starting on the first artifact
        # Magic number 9 -> Summarize Taxa command
        params = Parameters.load(Command(9),
                                 values_dict={
                                     'metadata_category': 'None',
                                     'sort': 'False',
                                     'biom_table': artifacts[0].id
                                 })
        wf = ProcessingWorkflow.from_scratch(user, params)

        # There is only one job in the workflow
        job_id = wf.graph.nodes()[0].id

        response = self.get('/analysis/description/%s/graph/' % new_id)
        self.assertEqual(response.code, 200)
        obs = loads(response.body)
        exp = {
            'edges': [[artifacts[0].id, job_id],
                      [job_id, '%s:taxa_summary' % job_id]],
            'nodes':
            [['job', 'job', job_id, 'Summarize Taxa', 'in_construction'],
             [
                 'artifact', 'BIOM', artifacts[0].id, 'noname\n(BIOM)',
                 'artifact'
             ],
             [
                 'artifact', 'BIOM', artifacts[1].id, 'noname\n(BIOM)',
                 'artifact'
             ],
             [
                 'type', 'taxa_summary',
                 '%s:taxa_summary' % job_id, 'taxa_summary\n(taxa_summary)',
                 'type'
             ]],
            'workflow':
            wf.id
        }
        # Check that the keys are the same
        self.assertItemsEqual(obs, exp)
        # Check the edges
        self.assertItemsEqual(obs['edges'], exp['edges'])
        # Check the edges
        self.assertItemsEqual(obs['nodes'], exp['nodes'])
        # Check the edges
        self.assertEqual(obs['workflow'], exp['workflow'])

        # Add a job to the second BIOM to make sure that the edges and nodes
        # are respected. Magic number 12 -> Single Rarefaction
        job2 = wf.add(DefaultParameters(16),
                      req_params={
                          'depth': '100',
                          'biom_table': artifacts[1].id
                      })
        job_id_2 = job2.id

        response = self.get('/analysis/description/%s/graph/' % new_id)
        self.assertEqual(response.code, 200)
        obs = loads(response.body)
        exp = {
            'edges': [[artifacts[0].id, job_id],
                      [job_id, '%s:taxa_summary' % job_id],
                      [artifacts[1].id, job_id_2],
                      [job_id_2, '%s:rarefied_table' % job_id_2]],
            'nodes':
            [['job', 'job', job_id, 'Summarize Taxa', 'in_construction'],
             ['job', 'job', job_id_2, 'Single Rarefaction', 'in_construction'],
             [
                 'artifact', 'BIOM', artifacts[0].id, 'noname\n(BIOM)',
                 'artifact'
             ],
             [
                 'artifact', 'BIOM', artifacts[1].id, 'noname\n(BIOM)',
                 'artifact'
             ],
             [
                 'type', 'taxa_summary',
                 '%s:taxa_summary' % job_id, 'taxa_summary\n(taxa_summary)',
                 'type'
             ],
             [
                 'type', 'BIOM',
                 '%s:rarefied_table' % job_id_2, 'rarefied_table\n(BIOM)',
                 'type'
             ]],
            'workflow':
            wf.id
        }
        # Check that the keys are the same
        self.assertItemsEqual(obs, exp)
        # Check the edges
        self.assertItemsEqual(obs['edges'], exp['edges'])
        # Check the edges
        self.assertItemsEqual(obs['nodes'], exp['nodes'])
        # Check the edges
        self.assertEqual(obs['workflow'], exp['workflow'])

        # Add a second Workflow to the second artifact to force the raise of
        # the error. This situation should never happen when using
        # the interface
        wf.remove(job2)
        params = Parameters.load(Command(9),
                                 values_dict={
                                     'metadata_category': 'None',
                                     'sort': 'False',
                                     'biom_table': artifacts[1].id
                                 })
        wf = ProcessingWorkflow.from_scratch(user, params)
        response = self.get('/analysis/description/%s/graph/' % new_id)
        self.assertEqual(response.code, 500)