Ejemplo n.º 1
0
    def test_create(self):
        """Correctly creates the rows in the DB for the reference"""
        # Check that the returned object has the correct id
        obs = Reference.create(self.name, self.version, self.seqs_fp,
                               self.tax_fp, self.tree_fp)
        self.assertEqual(obs.id, 2)

        # Check that the information on the database is correct
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.reference WHERE reference_id=2")
        exp = [[2, self.name, self.version, 15, 16, 17]]
        self.assertEqual(obs, exp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=15 or "
            "filepath_id=16 or filepath_id=17")
        exp_seq = join(
            self.db_dir,
            "%s_%s_%s" % (self.name, self.version, basename(self.seqs_fp)))
        exp_tax = join(
            self.db_dir,
            "%s_%s_%s" % (self.name, self.version, basename(self.tax_fp)))
        exp_tree = join(
            self.db_dir,
            "%s_%s_%s" % (self.name, self.version, basename(self.tree_fp)))
        exp = [[15, exp_seq, 9, '0', 1], [16, exp_tax, 10, '0', 1],
               [17, exp_tree, 11, '0', 1]]
        self.assertEqual(obs, exp)
Ejemplo n.º 2
0
    def get(self):
        # Format sel_data to get study IDs for the processed data
        sel_data = defaultdict(dict)
        proc_data_info = {}
        sel_samps = self.current_user.default_analysis.samples
        for pid, samps in viewitems(sel_samps):
            proc_data = Artifact(pid)
            sel_data[proc_data.study][pid] = samps
            # Also get processed data info
            parameters = proc_data.processing_parameters
            reference = Reference(parameters.values['reference'])

            proc_data_info[pid] = {
                'processed_date': str(proc_data.timestamp),
                'algorithm': parameters.command.name,
                'reference_name': reference.name,
                'reference_version': reference.version,
                'sequence_filepath': reference.sequence_fp,
                'taxonomy_filepath': reference.taxonomy_fp,
                'tree_filepath': reference.tree_fp,
                'data_type': proc_data.data_type
            }

        self.render("analysis_selected.html",
                    sel_data=sel_data,
                    proc_info=proc_data_info)
Ejemplo n.º 3
0
    def get(self):
        # Format sel_data to get study IDs for the processed data
        sel_data = defaultdict(dict)
        proc_data_info = {}
        sel_samps = self.current_user.default_analysis.samples
        for aid, samples in viewitems(sel_samps):
            a = Artifact(aid)
            sel_data[a.study][aid] = samples
            # Also get processed data info
            processing_parameters = a.processing_parameters
            if processing_parameters is None:
                params = None
                algorithm = None
            else:
                cmd = processing_parameters.command
                params = processing_parameters.values
                if 'reference' in params:
                    ref = Reference(params['reference'])
                    del params['reference']

                    params['reference_name'] = ref.name
                    params['reference_version'] = ref.version
                algorithm = '%s (%s)' % (cmd.software.name, cmd.name)

            proc_data_info[aid] = {
                'processed_date': str(a.timestamp),
                'algorithm': algorithm,
                'data_type': a.data_type,
                'params': params
            }

        self.render("analysis_selected.html", sel_data=sel_data,
                    proc_info=proc_data_info)
Ejemplo n.º 4
0
    def test_create(self):
        """Correctly creates the rows in the DB for the reference"""
        # Check that the returned object has the correct id
        obs = Reference.create(self.name, self.version, self.seqs_fp,
                               self.tax_fp, self.tree_fp)
        self.assertEqual(obs.id, 2)

        # Check that the information on the database is correct
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.reference WHERE reference_id=2")
        exp = [[2, self.name, self.version, 19, 20, 21]]
        self.assertEqual(obs, exp)

        # Check that the filepaths have been correctly added to the DB
        obs = self.conn_handler.execute_fetchall(
            "SELECT * FROM qiita.filepath WHERE filepath_id=19 or "
            "filepath_id=20 or filepath_id=21")
        exp_seq = "%s_%s_%s" % (self.name, self.version,
                                basename(self.seqs_fp))
        exp_tax = "%s_%s_%s" % (self.name, self.version,
                                basename(self.tax_fp))
        exp_tree = "%s_%s_%s" % (self.name, self.version,
                                 basename(self.tree_fp))
        exp = [[19, exp_seq, 10, '0', 1, 6],
               [20, exp_tax, 11, '0', 1, 6],
               [21, exp_tree, 12, '0', 1, 6]]
        self.assertEqual(obs, exp)
Ejemplo n.º 5
0
    def test_purge_filepaths_null_cols(self):
        # For more details about the source of the issue that motivates this
        # test: http://www.depesz.com/2008/08/13/nulls-vs-not-in/
        # In the current set up, the only place where we can actually have a
        # null value in a filepath id is in the reference table. Add a new
        # reference without tree and taxonomy:
        fd, seqs_fp = mkstemp(suffix="_seqs.fna")
        close(fd)
        ref = Reference.create("null_db", "13_2", seqs_fp)
        self.files_to_remove.append(ref.sequence_fp)

        self._common_purge_filpeaths_test()
Ejemplo n.º 6
0
    def test_purge_filepaths_null_cols(self):
        # For more details about the source of the issue that motivates this
        # test: http://www.depesz.com/2008/08/13/nulls-vs-not-in/
        # In the current set up, the only place where we can actually have a
        # null value in a filepath id is in the reference table. Add a new
        # reference without tree and taxonomy:
        fd, seqs_fp = mkstemp(suffix="_seqs.fna")
        close(fd)
        ref = Reference.create("null_db", "13_2", seqs_fp)
        self.files_to_remove.append(ref.sequence_fp)

        self._common_purge_filpeaths_test()
Ejemplo n.º 7
0
Archivo: util.py Proyecto: tanaes/qiita
def generate_param_str(param):
    """Generate an html string with the parameter values

    Parameters
    ----------
    param : BaseParameters
        The parameter to generate the str

    Returns
    -------
    str
        The html string with the parameter set values
    """
    values = param.values
    ref = Reference(values['reference'])
    result = ["<b>Reference:</b> %s %s" % (ref.name, ref.version)]
    result.extend("<b>%s:</b> %s" % (name, value)
                  for name, value in viewitems(values) if name != 'reference')
    return "<br/>".join(result)
Ejemplo n.º 8
0
def _build_single_proc_data_info(proc_data_id, data_type, samples):
    """Build the proc data info list for the child row in datatable

    Parameters
    ----------
    proc_data_id : int
        The processed data attached to he study, in the form
        {study_id: [proc_data_id, proc_data_id, ...], ...}
    data_type : str
        Data type of the processed data
    proc_samples : dict of lists
        The samples available in the processed data, in the form
        {proc_data_id: [samp1, samp2, ...], ...}

    Returns
    -------
    dict
        The information for the processed data, in the form {info: value, ...}
    """
    proc_data = Artifact(proc_data_id)
    proc_info = {'processed_date': str(proc_data.timestamp)}
    proc_info['pid'] = proc_data_id
    proc_info['data_type'] = data_type
    proc_info['processed_date'] = str(proc_info['processed_date'])
    params = proc_data.processing_parameters.values
    del params['input_data']
    ref = Reference(params.pop('reference'))
    proc_info['reference_name'] = ref.name
    proc_info['taxonomy_filepath'] = basename(ref.taxonomy_fp)
    proc_info['sequence_filepath'] = basename(ref.sequence_fp)
    proc_info['tree_filepath'] = basename(ref.tree_fp)
    proc_info['reference_version'] = ref.version
    proc_info['algorithm'] = 'sortmerna'
    proc_info['samples'] = sorted(proc_data.prep_templates[0].keys())
    proc_info.update(params)

    return proc_info
Ejemplo n.º 9
0
 def test_tree_fp(self):
     ref = Reference(1)
     exp = join(self.db_dir, "GreenGenes_13_8_97_otus.tree")
     self.assertEqual(ref.tree_fp, exp)
Ejemplo n.º 10
0
 def test_taxonomy_fp(self):
     ref = Reference(1)
     exp = join(self.db_dir, "GreenGenes_13_8_97_otu_taxonomy.txt")
     self.assertEqual(ref.taxonomy_fp, exp)
Ejemplo n.º 11
0
 def test_sequence_fp(self):
     ref = Reference(1)
     exp = join(self.db_dir, "GreenGenes_13_8_97_otus.fasta")
     self.assertEqual(ref.sequence_fp, exp)
Ejemplo n.º 12
0
    def _construct_job_graph(self, analysis, commands, comm_opts=None,
                             rarefaction_depth=None,
                             merge_duplicated_sample_ids=False):
        """Builds the job graph for running an analysis

        Parameters
        ----------
        analysis: Analysis object
            Analysis to finalize.
        commands : list of tuples
            Commands to add as jobs in the analysis.
            Format [(data_type, command name), ...]
        comm_opts : dict of dicts, optional
            Options for commands. Format {command name: {opt1: value,...},...}
            Default None (use default options).
        rarefaction_depth : int, optional
            Rarefaction depth for analysis' biom tables. Default None.
        merge_duplicated_sample_ids : bool, optional
            If the duplicated sample ids in the selected studies should be
            merged or prepended with the artifact ids. False (default) prepends
            the artifact id
        """
        self._logger = stderr
        self.analysis = analysis
        analysis_id = analysis.id

        # Add jobs to analysis
        if comm_opts is None:
            comm_opts = {}

        analysis.status = "running"
        # creating bioms at this point cause all this section runs on a worker
        # node, currently an ipython job
        analysis.build_files(rarefaction_depth, merge_duplicated_sample_ids)
        mapping_file = analysis.mapping_file

        tree_commands = ["Beta Diversity", "Alpha Rarefaction"]
        for data_type, biom_fp in viewitems(analysis.biom_tables):
            biom_table = load_table(biom_fp)
            # getting reference_id and software_command_id from the first
            # sample of the biom. This decision was discussed on the qiita
            # meeting on 02/24/16
            metadata = biom_table.metadata(biom_table.ids()[0])
            rid = metadata['reference_id']
            sci = metadata['command_id']

            if rid != 'na':
                reference = Reference(rid)
                tree = reference.tree_fp
            else:
                reference = None
                tree = ''

            cmd = Command(sci) if sci != 'na' else None

            for cmd_data_type, command in commands:
                if data_type != cmd_data_type:
                    continue

                # get opts set by user, else make it empty dict
                opts = comm_opts.get(command, {})
                opts["--otu_table_fp"] = biom_fp
                opts["--mapping_fp"] = mapping_file

                if command in tree_commands:
                    if tree != '':
                        opts["--tree_fp"] = tree
                    else:
                        opts["--parameter_fp"] = join(
                            get_db_files_base_dir(), "reference",
                            "params_qiime.txt")

                if command == "Alpha Rarefaction":
                    opts["-n"] = 4

                Job.create(data_type, command, opts, analysis, reference, cmd,
                           return_existing=True)

        # Add the jobs
        job_nodes = []
        for job in analysis.jobs:
            node_name = "%d_JOB_%d" % (analysis_id, job.id)
            job_nodes.append(node_name)
            job_name = "%s: %s" % (job.datatype, job.command[0])
            self._job_graph.add_node(node_name,
                                     func=system_call_from_job,
                                     args=(job.id,),
                                     job_name=job_name,
                                     requires_deps=False)

        # tgz-ing the analysis results
        tgz_node_name = "TGZ_ANALYSIS_%d" % (analysis_id)
        job_name = "tgz_analysis_%d" % (analysis_id)
        self._job_graph.add_node(tgz_node_name,
                                 func=_generate_analysis_tgz,
                                 args=(analysis,),
                                 job_name=job_name,
                                 requires_deps=False)
        # Adding the dependency edges to the graph
        for job_node_name in job_nodes:
            self._job_graph.add_edge(job_node_name, tgz_node_name)

        # Finalize the analysis.
        node_name = "FINISH_ANALYSIS_%d" % analysis.id
        self._job_graph.add_node(node_name,
                                 func=_finish_analysis,
                                 args=(analysis,),
                                 job_name='Finalize analysis',
                                 requires_deps=False)
        self._job_graph.add_edge(tgz_node_name, node_name)
Ejemplo n.º 13
0
def _get_process_target_gene_cmd(preprocessed_data, params):
    """Generates the pick_closed_reference_otus.py command

    Parameters
    ----------
    preprocessed_data : PreprocessedData
        The preprocessed_data to process
    params : ProcessedSortmernaParams
        The parameters to use for the processing

    Returns
    -------
    tuple (str, str)
        A 2-tuple of strings. The first string is the command to be executed.
        The second string is the path to the command's output directory

    Raises
    ------
    ValueError
        If no sequence file is found on the preprocessed data
    """
    # Get the filepaths from the preprocessed data object
    seqs_fp = None
    for fpid, fp, fp_type in preprocessed_data.get_filepaths():
        if fp_type == "preprocessed_fasta":
            seqs_fp = fp
            break

    if not seqs_fp:
        raise ValueError("No sequence file found on the preprocessed data %s" %
                         preprocessed_data.id)

    # Create a temporary directory to store the pick otus results
    output_dir = mkdtemp(dir=qiita_config.working_dir,
                         prefix='pick_otus_otu_%s_' % preprocessed_data.id)
    # mkdtemp creates the directory, so we remove it here so the script
    # can safely run
    rmdir(output_dir)

    # We need to generate a parameters file with the parameters for
    # pick_otus.py
    fd, param_fp = mkstemp(dir=qiita_config.working_dir,
                           prefix='params_%s_' % preprocessed_data.id,
                           suffix='.txt')
    close(fd)

    with open(param_fp, 'w') as f:
        params.to_file(f)

    ref = Reference(params.reference)

    reference_fp = ref.sequence_fp
    taxonomy_fp = ref.taxonomy_fp
    if taxonomy_fp:
        params_str = "-t %s" % taxonomy_fp
    else:
        params_str = ""

    # Create the split_libraries_fastq.py command
    cmd = str("pick_closed_reference_otus.py -i %s -r %s -o %s -p %s %s" %
              (seqs_fp, reference_fp, output_dir, param_fp, params_str))

    return (cmd, output_dir)