コード例 #1
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        # This part should fail
        fp1 = self.path_builder('sample1_failure.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        forward_filepath_id = convert_to_id('raw_forward_seqs',
                                            'filepath_type')
        barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type')

        fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        with self.assertRaises(ValueError):
            _get_preprocess_fastq_cmd(raw_data, prep_template, params)
コード例 #2
0
ファイル: logger.py プロジェクト: Jorge-C/qiita
    def create(cls, severity, msg, info=None):
        """Creates a new LogEntry object

        Parameters
        ----------
        severity : str  {Warning, Runtime, Fatal}
            The level of severity to use for the LogEntry. Refers to an entry
            in the SEVERITY table.
        msg : str
            The message text
        info : dict, optional
            Defaults to ``None``. If supplied, the information will be added
            as the first entry in a list of information dicts. If ``None``,
            an empty dict will be added.

        Notes
        -----
        - When `info` is added, keys can be of any type, but upon retrieval,
          they will be of type str
        """
        if info is None:
            info = {}

        info = dumps([info])

        conn_handler = SQLConnectionHandler()
        sql = ("INSERT INTO qiita.{} (time, severity_id, msg, information) "
               "VALUES (NOW(), %s, %s, %s) "
               "RETURNING logging_id".format(cls._table))
        severity_id = convert_to_id(severity, "severity")
        id_ = conn_handler.execute_fetchone(sql, (severity_id, msg, info))[0]

        return cls(id_)
コード例 #3
0
ファイル: test_study.py プロジェクト: MarkBruns/qiita
 def _change_processed_data_status(self, new_status):
     # Change the status of the studies by changing the status of their
     # processed data
     id_status = convert_to_id(new_status, 'processed_data_status')
     self.conn_handler.execute(
         "UPDATE qiita.processed_data SET processed_data_status_id = %s",
         (id_status,))
コード例 #4
0
ファイル: prep_template_tab.py プロジェクト: MarkBruns/qiita
    def render(self, study, full_access):
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))
                 if f.endswith(('txt', 'tsv'))]
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))
        prep_templates_info = [
            res for res in _template_generator(study, full_access)]
        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        return self.render_string(
            "study_description_templates/prep_template_tab.html",
            files=files,
            data_types=data_types,
            available_prep_templates=prep_templates_info,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            study=study,
            full_access=full_access)
コード例 #5
0
ファイル: description_handlers.py プロジェクト: adamrp/qiita
    def _process_investigation_type(self, inv_type, user_def_type, new_type):
        """Return the investigation_type and add it to the ontology if needed

        Parameters
        ----------
        inv_type : str
            The investigation type
        user_def_type : str
            The user-defined investigation type
        new_type : str
            The new user-defined investigation_type

        Returns
        -------
        str
            The investigation type chosen by the user
        """
        if inv_type == 'None Selected':
            inv_type = None
        elif inv_type == 'Other' and user_def_type == 'New Type':
            # This is a nre user defined investigation type so store it
            inv_type = new_type
            ontology = Ontology(convert_to_id('ENA', 'ontology'))
            ontology.add_user_defined_term(inv_type)
        elif inv_type == 'Other' and user_def_type != 'New Type':
            inv_type = user_def_type
        return inv_type
コード例 #6
0
    def test_insert_processed_data_target_gene(self):
        fd, fna_fp = mkstemp(suffix='_seqs.fna')
        close(fd)
        fd, qual_fp = mkstemp(suffix='_seqs.qual')
        close(fd)
        filepaths = [
            (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')),
            (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))]

        preprocessed_data = PreprocessedData.create(
            Study(1), "preprocessed_sequence_illumina_params", 1,
            filepaths, data_type="18S")

        params = ProcessedSortmernaParams(1)
        pick_dir = mkdtemp()
        path_builder = partial(join, pick_dir)
        db_path_builder = partial(join, get_mountpoint('processed_data')[0][1])

        # Create a placeholder for the otu table
        with open(path_builder('otu_table.biom'), 'w') as f:
            f.write('\n')

        # Create a placeholder for the directory
        mkdir(path_builder('sortmerna_picked_otus'))

        # Create the log file
        fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt')
        close(fd)
        with open(fp, 'w') as f:
            f.write('\n')

        _insert_processed_data_target_gene(preprocessed_data, params, pick_dir)

        new_id = get_count('qiita.processed_data')

        # Check that the files have been copied
        db_files = [db_path_builder("%s_otu_table.biom" % new_id),
                    db_path_builder("%s_sortmerna_picked_otus" % new_id),
                    db_path_builder("%s_%s" % (new_id, basename(fp)))]
        for fp in db_files:
            self.assertTrue(exists(fp))

        # Check that a new preprocessed data has been created
        self.assertTrue(self.conn_handler.execute_fetchone(
            "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE "
            "processed_data_id=%s)", (new_id, ))[0])
コード例 #7
0
ファイル: ebi.py プロジェクト: BrindhaBioinfo/qiita
    def __init__(self, preprocessed_data_id, study_title, study_abstract,
                 investigation_type, empty_value='no_data',
                 new_investigation_type=None, pmids=None, **kwargs):
        self.preprocessed_data_id = preprocessed_data_id
        self.study_title = study_title
        self.study_abstract = study_abstract
        self.investigation_type = investigation_type
        self.empty_value = empty_value
        self.new_investigation_type = new_investigation_type
        self.sequence_files = []

        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.pmids = pmids if pmids is not None else []

        self.ebi_dir = self._get_ebi_dir()

        if self.investigation_type == 'Other' and \
                self.new_investigation_type is None:
            raise ValueError("If the investigation_type is 'Other' you have "
                             " to specify a value for new_investigation_type.")

        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        if ontology.term_type(self.investigation_type) == 'not_ontology':
            raise ValueError("The investigation type must be part of ENA's "
                             "ontology, '%s' is not valid" %
                             self.investigation_type)

        # dicts that map investigation_type to library attributes
        lib_strategies = {'metagenome': 'POOLCLONE',
                          'mimarks-survey': 'AMPLICON'}
        lib_selections = {'mimarks-survey': 'PCR'}
        lib_sources = {}

        # if the investigation_type is 'Other' we should use the value in
        # the new_investigation_type attribute to retrieve this information
        if self.investigation_type == 'Other':
            key = self.new_investigation_type
        else:
            key = self.investigation_type

        self.library_strategy = lib_strategies.get(key, "OTHER")
        self.library_source = lib_sources.get(key, "METAGENOMIC")
        self.library_selection = lib_selections.get(key, "unspecified")

        # This allows addition of other arbitrary study metadata
        self.additional_metadata = self._stringify_kwargs(kwargs)

        # This will hold the submission's samples, keyed by the sample name
        self.samples = {}
コード例 #8
0
ファイル: base_metadata_template.py プロジェクト: RNAer/qiita
    def add_filepath(self, filepath, conn_handler=None):
        r"""Populates the DB tables for storing the filepath and connects the
        `self` objects with this filepath"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = conn_handler if conn_handler else SQLConnectionHandler()

        if self._table == 'required_sample_info':
            fp_id = convert_to_id("sample_template", "filepath_type",
                                  conn_handler)
            table = 'sample_template_filepath'
            column = 'study_id'
        elif self._table == 'common_prep_info':
            fp_id = convert_to_id("prep_template", "filepath_type",
                                  conn_handler)
            table = 'prep_template_filepath'
            column = 'prep_template_id'
        else:
            raise QiitaDBNotImplementedError(
                'add_filepath for %s' % self._table)

        try:
            fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates",
                                      "filepath", conn_handler,
                                      move_files=False)[0]
            values = (self._id, fpp_id)
            conn_handler.execute(
                "INSERT INTO qiita.{0} ({1}, filepath_id) "
                "VALUES (%s, %s)".format(table, column), values)
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e
コード例 #9
0
    def render(self, study_id, preprocessed_data):
        user = User(self.current_user)
        ppd_id = preprocessed_data.id
        ebi_status = preprocessed_data.submitted_to_insdc_status()
        ebi_study_accession = preprocessed_data.ebi_study_accession
        ebi_submission_accession = preprocessed_data.ebi_submission_accession
        vamps_status = preprocessed_data.submitted_to_vamps_status()
        filepaths = preprocessed_data.get_filepaths()
        is_local_request = self._is_local()
        show_ebi_btn = user.level == "admin"

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        if PrepTemplate.exists(preprocessed_data.prep_template):
            prep_template_id = preprocessed_data.prep_template
            prep_template = PrepTemplate(prep_template_id)
            raw_data_id = prep_template.raw_data
            inv_type = prep_template.investigation_type or "None Selected"
        else:
            prep_template_id = None
            raw_data_id = None
            inv_type = "None Selected"

        return self.render_string(
            "study_description_templates/preprocessed_data_info_tab.html",
            ppd_id=ppd_id,
            show_ebi_btn=show_ebi_btn,
            ebi_status=ebi_status,
            ebi_study_accession=ebi_study_accession,
            ebi_submission_accession=ebi_submission_accession,
            filepaths=filepaths,
            is_local_request=is_local_request,
            prep_template_id=prep_template_id,
            raw_data_id=raw_data_id,
            inv_type=inv_type,
            ena_terms=ena_terms,
            vamps_status=vamps_status,
            user_defined_terms=user_defined_terms)
コード例 #10
0
ファイル: prep_template.py プロジェクト: ElDeveloper/qiita
def _get_ENA_ontology():
    """Returns the information of the ENA ontology

    Returns
    -------
    dict of {str: list of strings}
        A dictionary of the form {'ENA': list of str, 'User': list of str}
        with the ENA-defined terms and the User-defined terms, respectivelly.
    """
    ontology = Ontology(convert_to_id('ENA', 'ontology'))
    ena_terms = sorted(ontology.terms)
    # make "Other" last on the list
    ena_terms.remove('Other')
    ena_terms.append('Other')

    return {'ENA': ena_terms, 'User': sorted(ontology.user_defined_terms)}
コード例 #11
0
    def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self):
        metadata_dict = {
            'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'},
            'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A',
                            'barcode': 'A', 'center_name': 'ANL',
                            'platform': 'ILLUMINA',
                            'instrument_model': 'Illumina MiSeq',
                            'library_construction_protocol': 'A',
                            'experiment_design_description': 'A'}}
        md_template = pd.DataFrame.from_dict(metadata_dict, orient='index')
        prep_template = PrepTemplate.create(md_template, Study(1), '16S')

        fp1 = self.path_builder('sample1.fastq')
        with open(fp1, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp1)
        fp2 = self.path_builder('sample2.fastq.gz')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.files_to_remove.append(fp2)
        filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type')

        fps = [(fp1, filepath_id), (fp2, filepath_id)]

        filetype_id = get_filetypes()['per_sample_FASTQ']
        raw_data = RawData.create(filetype_id, [prep_template], fps)
        params = [p for p in list(PreprocessedIlluminaParams.iter())
                  if p.name == 'per sample FASTQ defaults'][0]

        obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data,
                                                            prep_template,
                                                            params)

        raw_fps = ','.join([fp for _, fp, _ in
                            sorted(raw_data.get_filepaths())])
        exp_cmd = (
            "split_libraries_fastq.py --store_demultiplexed_fastq -i "
            "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type "
            "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 "
            "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 "
            "--sequence_max_n 0").format(raw_fps, obs_output_dir)
        self.assertEqual(obs_cmd, exp_cmd)
コード例 #12
0
ファイル: prep_template.py プロジェクト: RNAer/qiita
    def validate_investigation_type(self, investigation_type):
        """Simple investigation validation to avoid code duplication

        Parameters
        ----------
        investigation_type : str
            The investigation type, should be part of the ENA ontology

        Raises
        -------
        QiitaDBColumnError
            The investigation type is not in the ENA ontology
        """
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        terms = ontology.terms + ontology.user_defined_terms
        if investigation_type not in terms:
            raise QiitaDBColumnError("'%s' is Not a valid investigation_type. "
                                     "Choose from: %s" % (investigation_type,
                                                          ', '.join(terms)))
コード例 #13
0
ファイル: ontology.py プロジェクト: josenavas/QiiTa
def ontology_patch_handler(req_op, req_path, req_value=None, req_from=None):
    """Patches an ontology

    Parameters
    ----------
    req_op : str
        The operation to perform on the ontology
    req_path : str
        The ontology to patch
    req_value : str, optional
        The value that needs to be modified
    req_from : str, optional
        The original path of the element

    Returns
    -------
    dict of {str: str}
        A dictionary of the form: {'status': str, 'message': str} in which
        status is the status of the request ('error' or 'success') and message
        is a human readable string with the error message in case that status
        is 'error'.
    """
    if req_op == "add":
        req_path = [v for v in req_path.split("/") if v]
        if len(req_path) != 1:
            return {"status": "error", "message": "Incorrect path parameter"}
        req_path = req_path[0]

        try:
            o_id = convert_to_id(req_path, "ontology")
        except QiitaDBLookupError:
            return {"status": "error", "message": 'Ontology "%s" does not exist' % req_path}

        ontology = Ontology(o_id)
        ontology.add_user_defined_term(req_value)

        return {"status": "success", "message": ""}
    else:
        return {
            "status": "error",
            "message": 'Operation "%s" not supported. ' "Current supported operations: add" % req_op,
        }
コード例 #14
0
ファイル: test_data.py プロジェクト: jenwei/qiita
    def test_create_data_type_only(self):
        # Check that the returned object has the correct id
        new_id = get_count('qiita.preprocessed_data') + 1
        obs = PreprocessedData.create(self.study, self.params_table,
                                      self.params_id, self.filepaths,
                                      data_type="18S")
        self.assertEqual(obs.id, new_id)

        # Check that all the information is initialized correctly
        self.assertEqual(obs.processed_data, [])
        self.assertEqual(obs.prep_template, [])
        self.assertEqual(obs.study, self.study.id)
        self.assertEqual(obs.data_type(), "18S")
        self.assertEqual(obs.data_type(ret_id=True),
                         convert_to_id("18S", "data_type"))
        self.assertEqual(obs.submitted_to_vamps_status(), "not submitted")
        self.assertEqual(obs.processing_status, "not_processed")
        self.assertEqual(obs.status, "sandbox")
        self.assertEqual(obs.preprocessing_info,
                         (self.params_table, self.params_id))
コード例 #15
0
ファイル: 58.py プロジェクト: ElDeveloper/qiita
def create_command(software, name, description, parameters, outputs=None,
                   analysis_only=False):
    r"""Replicates the Command.create code at the time the patch was written"""
    # Perform some sanity checks in the parameters dictionary
    if not parameters:
        raise QiitaDBError(
            "Error creating command %s. At least one parameter should "
            "be provided." % name)
    sql_param_values = []
    sql_artifact_params = []
    for pname, vals in parameters.items():
        if len(vals) != 2:
            raise QiitaDBError(
                "Malformed parameters dictionary, the format should be "
                "{param_name: [parameter_type, default]}. Found: "
                "%s for parameter name %s" % (vals, pname))

        ptype, dflt = vals
        # Check that the type is one of the supported types
        supported_types = ['string', 'integer', 'float', 'reference',
                           'boolean', 'prep_template', 'analysis']
        if ptype not in supported_types and not ptype.startswith(
                ('choice', 'mchoice', 'artifact')):
            supported_types.extend(['choice', 'mchoice', 'artifact'])
            raise QiitaDBError(
                "Unsupported parameters type '%s' for parameter %s. "
                "Supported types are: %s"
                % (ptype, pname, ', '.join(supported_types)))

        if ptype.startswith(('choice', 'mchoice')) and dflt is not None:
            choices = set(loads(ptype.split(':')[1]))
            dflt_val = dflt
            if ptype.startswith('choice'):
                # In the choice case, the dflt value is a single string,
                # create a list with it the string on it to use the
                # issuperset call below
                dflt_val = [dflt_val]
            else:
                # jsonize the list to store it in the DB
                dflt = dumps(dflt)
            if not choices.issuperset(dflt_val):
                raise QiitaDBError(
                    "The default value '%s' for the parameter %s is not "
                    "listed in the available choices: %s"
                    % (dflt, pname, ', '.join(choices)))

        if ptype.startswith('artifact'):
            atypes = loads(ptype.split(':')[1])
            sql_artifact_params.append(
                [pname, 'artifact', atypes])
        else:
            if dflt is not None:
                sql_param_values.append([pname, ptype, False, dflt])
            else:
                sql_param_values.append([pname, ptype, True, None])

    with TRN:
        sql = """SELECT EXISTS(SELECT *
                               FROM qiita.software_command
                               WHERE software_id = %s AND name = %s)"""
        TRN.add(sql, [software.id, name])
        if TRN.execute_fetchlast():
            raise QiitaDBDuplicateError(
                "command", "software: %d, name: %s"
                           % (software.id, name))
        # Add the command to the DB
        sql = """INSERT INTO qiita.software_command
                        (name, software_id, description, is_analysis)
                 VALUES (%s, %s, %s, %s)
                 RETURNING command_id"""
        sql_params = [name, software.id, description, analysis_only]
        TRN.add(sql, sql_params)
        c_id = TRN.execute_fetchlast()

        # Add the parameters to the DB
        sql = """INSERT INTO qiita.command_parameter
                    (command_id, parameter_name, parameter_type, required,
                     default_value)
                 VALUES (%s, %s, %s, %s, %s)
                 RETURNING command_parameter_id"""
        sql_params = [[c_id, pname, p_type, reqd, default]
                      for pname, p_type, reqd, default in sql_param_values]
        TRN.add(sql, sql_params, many=True)
        TRN.execute()

        # Add the artifact parameters
        sql_type = """INSERT INTO qiita.parameter_artifact_type
                        (command_parameter_id, artifact_type_id)
                      VALUES (%s, %s)"""
        supported_types = []
        for pname, p_type, atypes in sql_artifact_params:
            sql_params = [c_id, pname, p_type, True, None]
            TRN.add(sql, sql_params)
            pid = TRN.execute_fetchlast()
            sql_params = [[pid, convert_to_id(at, 'artifact_type')]
                          for at in atypes]
            TRN.add(sql_type, sql_params, many=True)
            supported_types.extend([atid for _, atid in sql_params])

        # If the software type is 'artifact definition', there are a couple
        # of extra steps
        if software.type == 'artifact definition':
            # If supported types is not empty, link the software with these
            # types
            if supported_types:
                sql = """INSERT INTO qiita.software_artifact_type
                                (software_id, artifact_type_id)
                            VALUES (%s, %s)"""
                sql_params = [[software.id, atid]
                              for atid in supported_types]
                TRN.add(sql, sql_params, many=True)
            # If this is the validate command, we need to add the
            # provenance and name parameters. These are used internally,
            # that's why we are adding them here
            if name == 'Validate':
                sql = """INSERT INTO qiita.command_parameter
                            (command_id, parameter_name, parameter_type,
                             required, default_value)
                         VALUES (%s, 'name', 'string', 'False',
                                 'dflt_name'),
                                (%s, 'provenance', 'string', 'False', NULL)
                         """
                TRN.add(sql, [c_id, c_id])

        # Add the outputs to the command
        if outputs:
            sql = """INSERT INTO qiita.command_output
                        (name, command_id, artifact_type_id)
                     VALUES (%s, %s, %s)"""
            sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')]
                        for pname, at in outputs.items()]
            TRN.add(sql, sql_args, many=True)
            TRN.execute()

    return Command(c_id)
コード例 #16
0
ファイル: test_util.py プロジェクト: MarkBruns/qiita
 def test_convert_to_id_bad_value(self):
     """Tests that ids are returned correctly"""
     with self.assertRaises(QiitaDBLookupError):
         convert_to_id("FAKE", "filepath_type")
コード例 #17
0
ファイル: test_util.py プロジェクト: MarkBruns/qiita
 def test_convert_to_id(self):
     """Tests that ids are returned correctly"""
     self.assertEqual(convert_to_id("directory", "filepath_type"), 8)
     self.assertEqual(convert_to_id("running", "analysis_status",
                                    "status"), 3)
     self.assertEqual(convert_to_id("EMP", "portal_type", "portal"), 2)
コード例 #18
0
ファイル: commands.py プロジェクト: MarkBruns/qiita
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    fastq_dir_fp : str, optional
        The fastq filepath

    Notes
    -----
    If fastq_dir_fp is passed, it must not contain any empty files, or
    gzipped empty files
    """
    preprocessed_data = PreprocessedData(preprocessed_data_id)
    preprocessed_data_id_str = str(preprocessed_data_id)
    study = Study(preprocessed_data.study)
    sample_template = SampleTemplate(study.sample_template)
    prep_template = PrepTemplate(preprocessed_data.prep_template)

    investigation_type = None
    new_investigation_type = None

    status = preprocessed_data.submitted_to_insdc_status()
    if status in ("submitting", "success"):
        raise ValueError("Cannot resubmit! Current status is: %s" % status)

    if send:
        # If we intend actually to send the files, then change the status in
        # the database
        preprocessed_data.update_insdc_status("submitting")

    # we need to figure out whether the investigation type is a known one
    # or if we have to submit a "new_investigation_type" to EBI
    current_type = prep_template.investigation_type
    ena_ontology = Ontology(convert_to_id("ENA", "ontology"))
    if current_type in ena_ontology.terms:
        investigation_type = current_type
    elif current_type in ena_ontology.user_defined_terms:
        investigation_type = "Other"
        new_investigation_type = current_type
    else:
        # This should never happen
        raise ValueError(
            "Unrecognized investigation type: '%s'. This term "
            "is neither one of the official terms nor one of the "
            "user-defined terms in the ENA ontology"
        )

    if fastq_dir_fp is not None:
        # If the user specifies a FASTQ directory, use it

        # Set demux_samples to None so that MetadataTemplate.to_file will put
        # all samples in the template files
        demux_samples = None
    else:
        # If the user does not specify a FASTQ directory, create one and
        # re-serialize the per-sample FASTQs from the demux file
        fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir)
        demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0]

        # Keep track of which files were actually in the demux file so that we
        # can write those rows to the prep and samples templates
        demux_samples = set()

        with open_file(demux) as demux_fh:
            for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)):
                demux_samples.add(samp)
                sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp)
                wrote_sequences = False
                with gzopen(sample_fp, "w") as fh:
                    for record in iterator:
                        fh.write(record)
                        wrote_sequences = True

                if not wrote_sequences:
                    remove(sample_fp)

    output_dir = fastq_dir_fp + "_submission"

    samp_fp = join(fastq_dir_fp, "sample_metadata.txt")
    prep_fp = join(fastq_dir_fp, "prep_metadata.txt")

    sample_template.to_file(samp_fp, demux_samples)
    prep_template.to_file(prep_fp, demux_samples)

    # Get specific output directory and set filepaths
    get_output_fp = partial(join, output_dir)
    study_fp = get_output_fp("study.xml")
    sample_fp = get_output_fp("sample.xml")
    experiment_fp = get_output_fp("experiment.xml")
    run_fp = get_output_fp("run.xml")
    submission_fp = get_output_fp("submission.xml")

    if not isdir(output_dir):
        makedirs(output_dir)
    else:
        raise IOError("The output folder already exists: %s" % output_dir)

    with open(samp_fp, "U") as st, open(prep_fp, "U") as pt:
        submission = EBISubmission.from_templates_and_per_sample_fastqs(
            preprocessed_data_id_str,
            study.title,
            study.info["study_abstract"],
            investigation_type,
            st,
            pt,
            fastq_dir_fp,
            new_investigation_type=new_investigation_type,
            pmids=study.pmids,
        )

    submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action)

    if send:
        submission.send_sequences()
        study_accession, submission_accession = submission.send_xml()

        if study_accession is None or submission_accession is None:
            preprocessed_data.update_insdc_status("failed")

            raise ComputeError("EBI Submission failed!")
        else:
            preprocessed_data.update_insdc_status("success", study_accession, submission_accession)
    else:
        study_accession, submission_accession = None, None

    return study_accession, submission_accession
コード例 #19
0
ファイル: 58.py プロジェクト: yotohoshi/qiita
def create_command(software,
                   name,
                   description,
                   parameters,
                   outputs=None,
                   analysis_only=False):
    r"""Replicates the Command.create code at the time the patch was written"""
    # Perform some sanity checks in the parameters dictionary
    if not parameters:
        raise QiitaDBError(
            "Error creating command %s. At least one parameter should "
            "be provided." % name)
    sql_param_values = []
    sql_artifact_params = []
    for pname, vals in parameters.items():
        if len(vals) != 2:
            raise QiitaDBError(
                "Malformed parameters dictionary, the format should be "
                "{param_name: [parameter_type, default]}. Found: "
                "%s for parameter name %s" % (vals, pname))

        ptype, dflt = vals
        # Check that the type is one of the supported types
        supported_types = [
            'string', 'integer', 'float', 'reference', 'boolean',
            'prep_template', 'analysis'
        ]
        if ptype not in supported_types and not ptype.startswith(
            ('choice', 'mchoice', 'artifact')):
            supported_types.extend(['choice', 'mchoice', 'artifact'])
            raise QiitaDBError(
                "Unsupported parameters type '%s' for parameter %s. "
                "Supported types are: %s" %
                (ptype, pname, ', '.join(supported_types)))

        if ptype.startswith(('choice', 'mchoice')) and dflt is not None:
            choices = set(loads(ptype.split(':')[1]))
            dflt_val = dflt
            if ptype.startswith('choice'):
                # In the choice case, the dflt value is a single string,
                # create a list with it the string on it to use the
                # issuperset call below
                dflt_val = [dflt_val]
            else:
                # jsonize the list to store it in the DB
                dflt = dumps(dflt)
            if not choices.issuperset(dflt_val):
                raise QiitaDBError(
                    "The default value '%s' for the parameter %s is not "
                    "listed in the available choices: %s" %
                    (dflt, pname, ', '.join(choices)))

        if ptype.startswith('artifact'):
            atypes = loads(ptype.split(':')[1])
            sql_artifact_params.append([pname, 'artifact', atypes])
        else:
            if dflt is not None:
                sql_param_values.append([pname, ptype, False, dflt])
            else:
                sql_param_values.append([pname, ptype, True, None])

    with TRN:
        sql = """SELECT EXISTS(SELECT *
                               FROM qiita.software_command
                               WHERE software_id = %s AND name = %s)"""
        TRN.add(sql, [software.id, name])
        if TRN.execute_fetchlast():
            raise QiitaDBDuplicateError(
                "command", "software: %d, name: %s" % (software.id, name))
        # Add the command to the DB
        sql = """INSERT INTO qiita.software_command
                        (name, software_id, description, is_analysis)
                 VALUES (%s, %s, %s, %s)
                 RETURNING command_id"""
        sql_params = [name, software.id, description, analysis_only]
        TRN.add(sql, sql_params)
        c_id = TRN.execute_fetchlast()

        # Add the parameters to the DB
        sql = """INSERT INTO qiita.command_parameter
                    (command_id, parameter_name, parameter_type, required,
                     default_value)
                 VALUES (%s, %s, %s, %s, %s)
                 RETURNING command_parameter_id"""
        sql_params = [[c_id, pname, p_type, reqd, default]
                      for pname, p_type, reqd, default in sql_param_values]
        TRN.add(sql, sql_params, many=True)
        TRN.execute()

        # Add the artifact parameters
        sql_type = """INSERT INTO qiita.parameter_artifact_type
                        (command_parameter_id, artifact_type_id)
                      VALUES (%s, %s)"""
        supported_types = []
        for pname, p_type, atypes in sql_artifact_params:
            sql_params = [c_id, pname, p_type, True, None]
            TRN.add(sql, sql_params)
            pid = TRN.execute_fetchlast()
            sql_params = [[pid, convert_to_id(at, 'artifact_type')]
                          for at in atypes]
            TRN.add(sql_type, sql_params, many=True)
            supported_types.extend([atid for _, atid in sql_params])

        # If the software type is 'artifact definition', there are a couple
        # of extra steps
        if software.type == 'artifact definition':
            # If supported types is not empty, link the software with these
            # types
            if supported_types:
                sql = """INSERT INTO qiita.software_artifact_type
                                (software_id, artifact_type_id)
                            VALUES (%s, %s)"""
                sql_params = [[software.id, atid] for atid in supported_types]
                TRN.add(sql, sql_params, many=True)
            # If this is the validate command, we need to add the
            # provenance and name parameters. These are used internally,
            # that's why we are adding them here
            if name == 'Validate':
                sql = """INSERT INTO qiita.command_parameter
                            (command_id, parameter_name, parameter_type,
                             required, default_value)
                         VALUES (%s, 'name', 'string', 'False',
                                 'dflt_name'),
                                (%s, 'provenance', 'string', 'False', NULL)
                         """
                TRN.add(sql, [c_id, c_id])

        # Add the outputs to the command
        if outputs:
            sql = """INSERT INTO qiita.command_output
                        (name, command_id, artifact_type_id)
                     VALUES (%s, %s, %s)"""
            sql_args = [[pname, c_id,
                         convert_to_id(at, 'artifact_type')]
                        for pname, at in outputs.items()]
            TRN.add(sql, sql_args, many=True)
            TRN.execute()

    return Command(c_id)
コード例 #20
0
    def render(self, study_id, preprocessed_data):
        user = self.current_user
        ppd_id = preprocessed_data.id
        ebi_status = preprocessed_data.submitted_to_insdc_status()
        ebi_study_accession = preprocessed_data.ebi_study_accession
        ebi_submission_accession = preprocessed_data.ebi_submission_accession
        vamps_status = preprocessed_data.submitted_to_vamps_status()
        filepaths = preprocessed_data.get_filepaths()
        is_local_request = self._is_local()
        show_ebi_btn = user.level == "admin"
        processing_status = preprocessed_data.processing_status
        processed_data = preprocessed_data.processed_data

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        if PrepTemplate.exists(preprocessed_data.prep_template):
            prep_template_id = preprocessed_data.prep_template
            prep_template = PrepTemplate(prep_template_id)
            raw_data_id = prep_template.raw_data
            inv_type = prep_template.investigation_type or "None Selected"
        else:
            prep_template_id = None
            raw_data_id = None
            inv_type = "None Selected"

        process_params = {param.id: (generate_param_str(param), param.name)
                          for param in ProcessedSortmernaParams.iter()}
        # We just need to provide an ID for the default parameters,
        # so we can initialize the interface
        default_params = 1

        return self.render_string(
            "study_description_templates/preprocessed_data_info_tab.html",
            ppd_id=ppd_id,
            show_ebi_btn=show_ebi_btn,
            ebi_status=ebi_status,
            ebi_study_accession=ebi_study_accession,
            ebi_submission_accession=ebi_submission_accession,
            filepaths=filepaths,
            is_local_request=is_local_request,
            prep_template_id=prep_template_id,
            raw_data_id=raw_data_id,
            inv_type=inv_type,
            ena_terms=ena_terms,
            vamps_status=vamps_status,
            user_defined_terms=user_defined_terms,
            process_params=process_params,
            default_params=default_params,
            study_id=preprocessed_data.study,
            processing_status=processing_status,
            processed_data=processed_data)
コード例 #21
0
ファイル: prep_template.py プロジェクト: RNAer/qiita
    def create(cls, md_template, raw_data, study, data_type,
               investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        raw_data : RawData
            The raw_data to which the prep template belongs to.
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        # If the investigation_type is supplied, make sure it is one of
        # the recognized investigation types
        if investigation_type is not None:
            cls.validate_investigation_type(investigation_type)

        # Get a connection handler
        conn_handler = SQLConnectionHandler()
        queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id
        conn_handler.create_queue(queue_name)

        # Check if the data_type is the id or the string
        if isinstance(data_type, (int, long)):
            data_type_id = data_type
            data_type_str = convert_from_id(data_type, "data_type",
                                            conn_handler)
        else:
            data_type_id = convert_to_id(data_type, "data_type", conn_handler)
            data_type_str = data_type

        md_template = cls._clean_validate_template(md_template, study.id,
                                                   data_type_str, conn_handler)

        # Insert the metadata template
        # We need the prep_id for multiple calls below, which currently is not
        # supported by the queue system. Thus, executing this outside the queue
        prep_id = conn_handler.execute_fetchone(
            "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, "
            "investigation_type) VALUES (%s, %s, %s) RETURNING "
            "prep_template_id", (data_type_id, raw_data.id,
                                 investigation_type))[0]

        cls._add_common_creation_steps_to_queue(md_template, prep_id,
                                                conn_handler, queue_name)

        try:
            conn_handler.execute_queue(queue_name)
        except Exception:
            # Clean up row from qiita.prep_template
            conn_handler.execute(
                "DELETE FROM qiita.prep_template where "
                "{0} = %s".format(cls._id_column), (prep_id,))

            # Check if sample IDs present here but not in sample template
            sql = ("SELECT sample_id from qiita.required_sample_info WHERE "
                   "study_id = %s")
            # Get list of study sample IDs, prep template study IDs,
            # and their intersection
            prep_samples = set(md_template.index.values)
            unknown_samples = prep_samples.difference(
                s[0] for s in conn_handler.execute_fetchall(sql, [study.id]))
            if unknown_samples:
                raise QiitaDBExecutionError(
                    'Samples found in prep template but not sample template: '
                    '%s' % ', '.join(unknown_samples))

            # some other error we haven't seen before so raise it
            raise

        pt = cls(prep_id)
        pt.generate_files()

        return pt
コード例 #22
0
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from random import SystemRandom
from string import ascii_letters, digits
from os.path import exists, join, basename
from tarfile import open as taropen

from qiita_db.sql_connection import TRN
from qiita_db.artifact import Artifact
from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint,
                           get_mountpoint_path_by_id)

pool = ascii_letters + digits
tgz_id = convert_to_id("tgz", "filepath_type")
_id, analysis_mp = get_mountpoint('analysis')[0]
with TRN:
    # 2 and 3 are the ids of the 2 new software rows, the BIOM and
    # target gene type plugins
    for i in [2, 3]:
        client_id = ''.join([SystemRandom().choice(pool) for _ in range(50)])
        client_secret = ''.join(
            [SystemRandom().choice(pool) for _ in range(255)])

        sql = """INSERT INTO qiita.oauth_identifiers (client_id, client_secret)
                VALUES (%s, %s)"""
        TRN.add(sql, [client_id, client_secret])

        sql = """INSERT INTO qiita.oauth_software (software_id, client_id)
                 VALUES (%s, %s)"""
コード例 #23
0
 def test_convert_to_id_bad_value(self):
     """Tests that ids are returned correctly"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         convert_to_id("FAKE", "filepath_type")
コード例 #24
0
ファイル: ebi.py プロジェクト: jwdebelius/qiita
    def __init__(self,
                 preprocessed_data_id,
                 study_title,
                 study_abstract,
                 investigation_type,
                 empty_value='no_data',
                 new_investigation_type=None,
                 pmids=None,
                 **kwargs):
        self.preprocessed_data_id = preprocessed_data_id
        self.study_title = study_title
        self.study_abstract = study_abstract
        self.investigation_type = investigation_type
        self.empty_value = empty_value
        self.new_investigation_type = new_investigation_type
        self.sequence_files = []

        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.pmids = pmids if pmids is not None else []

        self.ebi_dir = self._get_ebi_dir()

        if self.investigation_type == 'Other' and \
                self.new_investigation_type is None:
            raise ValueError("If the investigation_type is 'Other' you have "
                             " to specify a value for new_investigation_type.")

        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        if ontology.term_type(self.investigation_type) == 'not_ontology':
            raise ValueError("The investigation type must be part of ENA's "
                             "ontology, '%s' is not valid" %
                             self.investigation_type)

        # dicts that map investigation_type to library attributes
        lib_strategies = {
            'metagenome': 'POOLCLONE',
            'mimarks-survey': 'AMPLICON'
        }
        lib_selections = {'mimarks-survey': 'PCR'}
        lib_sources = {}

        # if the investigation_type is 'Other' we should use the value in
        # the new_investigation_type attribute to retrieve this information
        if self.investigation_type == 'Other':
            key = self.new_investigation_type
        else:
            key = self.investigation_type

        self.library_strategy = lib_strategies.get(key, "OTHER")
        self.library_source = lib_sources.get(key, "METAGENOMIC")
        self.library_selection = lib_selections.get(key, "unspecified")

        # This allows addition of other arbitrary study metadata
        self.additional_metadata = self._stringify_kwargs(kwargs)

        # This will hold the submission's samples, keyed by the sample name
        self.samples = {}
コード例 #25
0
ファイル: test_util.py プロジェクト: Jorge-C/qiita
 def test_convert_to_id_bad_value(self):
     """Tests that ids are returned correctly"""
     with self.assertRaises(IncompetentQiitaDeveloperError):
         convert_to_id("FAKE", "filepath_type")
コード例 #26
0
ファイル: test_ontology.py プロジェクト: Jorge-C/qiita
 def testConvertToID(self):
     self.assertEqual(convert_to_id('ENA', 'ontology'), 807481739)
コード例 #27
0
ファイル: prep_template.py プロジェクト: DarcyMyers/qiita
    def create_qiime_mapping_file(self):
        """This creates the QIIME mapping file and links it in the db.

        Returns
        -------
        filepath : str
            The filepath of the created QIIME mapping file

        Raises
        ------
        ValueError
            If the prep template is not a subset of the sample template
        QiitaDBWarning
            If the QIIME-required columns are not present in the template

        Notes
        -----
        We cannot ensure that the QIIME-required columns are present in the
        metadata map. However, we have to generate a QIIME-compliant mapping
        file. Since the user may need a QIIME mapping file, but not these
        QIIME-required columns, we are going to create them and
        populate them with the value XXQIITAXX.
        """
        rename_cols = {
            'barcode': 'BarcodeSequence',
            'primer': 'LinkerPrimerSequence',
            'description': 'Description',
        }

        if 'reverselinkerprimer' in self.categories():
            rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer'
            new_cols = ['BarcodeSequence', 'LinkerPrimerSequence',
                        'ReverseLinkerPrimer']
        else:
            new_cols = ['BarcodeSequence', 'LinkerPrimerSequence']

        # getting the latest sample template
        conn_handler = SQLConnectionHandler()
        sql = """SELECT filepath_id, filepath
                 FROM qiita.filepath
                    JOIN qiita.sample_template_filepath
                    USING (filepath_id)
                 WHERE study_id=%s
                 ORDER BY filepath_id DESC"""
        sample_template_fname = conn_handler.execute_fetchall(
            sql, (self.study_id,))[0][1]
        _, fp = get_mountpoint('templates')[0]
        sample_template_fp = join(fp, sample_template_fname)

        # reading files via pandas
        st = load_template_to_dataframe(sample_template_fp)
        pt = self.to_dataframe()

        st_sample_names = set(st.index)
        pt_sample_names = set(pt.index)

        if not pt_sample_names.issubset(st_sample_names):
            raise ValueError(
                "Prep template is not a sub set of the sample template, files"
                "%s - samples: %s"
                % (sample_template_fp,
                   ', '.join(pt_sample_names-st_sample_names)))

        mapping = pt.join(st, lsuffix="_prep")
        mapping.rename(columns=rename_cols, inplace=True)

        # Pre-populate the QIIME-required columns with the value XXQIITAXX
        index = mapping.index
        placeholder = ['XXQIITAXX'] * len(index)
        missing = []
        for val in viewvalues(rename_cols):
            if val not in mapping:
                missing.append(val)
                mapping[val] = pd.Series(placeholder, index=index)

        if missing:
            warnings.warn(
                "Some columns required to generate a QIIME-compliant mapping "
                "file are not present in the template. A placeholder value "
                "(XXQIITAXX) has been used to populate these columns. Missing "
                "columns: %s" % ', '.join(missing),
                QiitaDBWarning)

        # Gets the orginal mapping columns and readjust the order to comply
        # with QIIME requirements
        cols = mapping.columns.values.tolist()
        cols.remove('BarcodeSequence')
        cols.remove('LinkerPrimerSequence')
        cols.remove('Description')
        new_cols.extend(cols)
        new_cols.append('Description')
        mapping = mapping[new_cols]

        # figuring out the filepath for the QIIME map file
        _id, fp = get_mountpoint('templates')[0]
        filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id,
                        self.id, strftime("%Y%m%d-%H%M%S")))

        # Save the mapping file
        mapping.to_csv(filepath, index_label='#SampleID', na_rep='',
                       sep='\t')

        # adding the fp to the object
        self.add_filepath(
            filepath,
            fp_id=convert_to_id("qiime_map", "filepath_type"))

        return filepath
コード例 #28
0
ファイル: raw_data_tab.py プロジェクト: RNAer/qiita
    def render(self, study, raw_data, full_access):
        user = self.current_user
        study_status = study.status
        user_level = user.level
        raw_data_id = raw_data.id
        files = [f for _, f in get_files_from_uploads_folders(str(study.id))]

        # Get the available prep template data types
        data_types = sorted(viewitems(get_data_types()), key=itemgetter(1))

        # Get all the ENA terms for the investigation type
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        # make "Other" show at the bottom of the drop down menu
        ena_terms = []
        for v in sorted(ontology.terms):
            if v != 'Other':
                ena_terms.append('<option value="%s">%s</option>' % (v, v))
        ena_terms.append('<option value="Other">Other</option>')

        # New Type is for users to add a new user-defined investigation type
        user_defined_terms = ontology.user_defined_terms + ['New Type']

        # Get all the information about the prep templates
        available_prep_templates = []
        for p in sorted(raw_data.prep_templates):
            if PrepTemplate.exists(p):
                pt = PrepTemplate(p)
                # if the prep template doesn't belong to this study, skip
                if (study.id == pt.study_id and
                        (full_access or pt.status == 'public')):
                    available_prep_templates.append(pt)

        # getting filepath_types
        if raw_data.filetype == 'SFF':
            fts = ['sff']
        elif raw_data.filetype == 'FASTA':
            fts = ['fasta', 'qual']
        elif raw_data.filetype == 'FASTQ':
            fts = ['barcodes', 'forward seqs', 'reverse seqs']
        else:
            fts = [k.split('_', 1)[1].replace('_', ' ')
                   for k in get_filepath_types() if k.startswith('raw_')]

        # The raw data can be edited (e.i. adding prep templates and files)
        # only if the study is sandboxed or the current user is an admin
        is_editable = study_status == 'sandbox' or user_level == 'admin'

        # Get the files linked with the raw_data
        raw_data_files = raw_data.get_filepaths()

        # Get the status of the data linking
        raw_data_link_status = raw_data.link_filepaths_status

        # By default don't show the unlink button
        show_unlink_btn = False
        # By default disable the the link file button
        disable_link_btn = True
        # Define the message for the link status
        if raw_data_link_status == 'linking':
            link_msg = "Linking files..."
        elif raw_data_link_status == 'unlinking':
            link_msg = "Unlinking files..."
        else:
            # The link button is only disable if raw data link status is
            # linking or unlinking, so we can enable it here
            disable_link_btn = False
            # The unlink button is only shown if the study is editable, the raw
            # data linking status is not in linking or unlinking, and there are
            # files attached to the raw data. At this  point, we are sure that
            # the raw data linking status is not in linking or unlinking so we
            # still need to check if it is editable or there are files attached
            show_unlink_btn = is_editable and raw_data_files
            if raw_data_link_status.startswith('failed'):
                link_msg = "Error (un)linking files: %s" % raw_data_link_status
            else:
                link_msg = ""

        # Get the raw_data filetype
        raw_data_filetype = raw_data.filetype

        return self.render_string(
            "study_description_templates/raw_data_editor_tab.html",
            study_id=study.id,
            study_status=study_status,
            user_level=user_level,
            raw_data_id=raw_data_id,
            files=files,
            data_types=data_types,
            ena_terms=ena_terms,
            user_defined_terms=user_defined_terms,
            available_prep_templates=available_prep_templates,
            filepath_types=fts,
            is_editable=is_editable,
            show_unlink_btn=show_unlink_btn,
            link_msg=link_msg,
            raw_data_files=raw_data_files,
            raw_data_filetype=raw_data_filetype,
            disable_link_btn=disable_link_btn)
コード例 #29
0
ファイル: prep_template.py プロジェクト: adamrp/qiita
    def create(cls, md_template, study, data_type, investigation_type=None):
        r"""Creates the metadata template in the database

        Parameters
        ----------
        md_template : DataFrame
            The metadata template file contents indexed by samples Ids
        study : Study
            The study to which the prep template belongs to.
        data_type : str or int
            The data_type of the prep template
        investigation_type : str, optional
            The investigation type, if relevant

        Returns
        -------
        A new instance of `cls` to access to the PrepTemplate stored in the DB

        Raises
        ------
        QiitaDBColumnError
            If the investigation_type is not valid
            If a required column is missing in md_template
        """
        with TRN:
            # If the investigation_type is supplied, make sure it is one of
            # the recognized investigation types
            if investigation_type is not None:
                cls.validate_investigation_type(investigation_type)

            # Check if the data_type is the id or the string
            if isinstance(data_type, (int, long)):
                data_type_id = data_type
                data_type_str = convert_from_id(data_type, "data_type")
            else:
                data_type_id = convert_to_id(data_type, "data_type")
                data_type_str = data_type

            pt_cols = PREP_TEMPLATE_COLUMNS
            if data_type_str in TARGET_GENE_DATA_TYPES:
                pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS)
                pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE)

            md_template = cls._clean_validate_template(md_template, study.id,
                                                       pt_cols)

            # Insert the metadata template
            sql = """INSERT INTO qiita.prep_template
                        (data_type_id, investigation_type)
                     VALUES (%s, %s)
                     RETURNING prep_template_id"""
            TRN.add(sql, [data_type_id, investigation_type])
            prep_id = TRN.execute_fetchlast()

            try:
                cls._common_creation_steps(md_template, prep_id)
            except Exception:
                # Check if sample IDs present here but not in sample template
                sql = """SELECT sample_id from qiita.study_sample
                         WHERE study_id = %s"""
                # Get list of study sample IDs, prep template study IDs,
                # and their intersection
                TRN.add(sql, [study.id])
                prep_samples = set(md_template.index.values)
                unknown_samples = prep_samples.difference(
                    TRN.execute_fetchflatten())
                if unknown_samples:
                    raise QiitaDBExecutionError(
                        'Samples found in prep template but not sample '
                        'template: %s' % ', '.join(unknown_samples))

                # some other error we haven't seen before so raise it
                raise

            # Link the prep template with the study
            sql = """INSERT INTO qiita.study_prep_template
                        (study_id, prep_template_id)
                     VALUES (%s, %s)"""
            TRN.add(sql, [study.id, prep_id])

            TRN.execute()

            pt = cls(prep_id)
            pt.generate_files()

            return pt
コード例 #30
0
ファイル: ebi.py プロジェクト: josenavas/QiiTa
    def __init__(self, artifact_id, action):
        error_msgs = []

        if action not in self.valid_ebi_actions:
            error_msg = ("%s is not a valid EBI submission action, valid "
                         "actions are: %s" %
                         (action, ', '.join(self.valid_ebi_actions)))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
        self.action = action
        self.artifact = Artifact(artifact_id)
        if not self.artifact.can_be_submitted_to_ebi:
            error_msg = ("Artifact %d cannot be submitted to EBI"
                         % self.artifact.id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.study = self.artifact.study
        self.sample_template = self.study.sample_template
        # If we reach this point, there should be only one prep template
        # attached to the artifact. By design, each artifact has at least one
        # prep template. Artifacts with more than one prep template cannot be
        # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should
        # be set to false, which is checked in the previous if statement
        self.prep_template = self.artifact.prep_templates[0]

        if self.artifact.is_submitted_to_ebi and action != 'MODIFY':
            error_msg = ("Cannot resubmit! Artifact %d has already "
                         "been submitted to EBI." % artifact_id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        status = self.study.ebi_submission_status
        if status in self.valid_ebi_submission_states:
            error_msg = ("Cannot perform parallel EBI submission for the same "
                         "study. Current status of the study: %s" % status)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.artifact_id = artifact_id
        self.study_title = self.study.title
        self.study_abstract = self.study.info['study_abstract']

        it = self.prep_template.investigation_type
        if it in ena_ontology.terms:
            self.investigation_type = it
            self.new_investigation_type = None
        elif it in ena_ontology.user_defined_terms:
            self.investigation_type = 'Other'
            self.new_investigation_type = it
        else:
            # This should never happen
            error_msgs.append("Unrecognized investigation type: '%s'. This "
                              "term is neither one of the official terms nor "
                              "one of the user-defined terms in the ENA "
                              "ontology." % it)
        _, base_fp = get_mountpoint("preprocessed_data")[0]
        self.ebi_dir = '%d_ebi_submission' % artifact_id
        self.full_ebi_dir = join(base_fp, self.ebi_dir)
        self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt')
        self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml')
        self.xml_dir = join(self.full_ebi_dir, 'xml_dir')
        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.publications = self.study.publications

        # getting the restrictions
        st_restrictions = [self.sample_template.columns_restrictions['EBI']]
        pt_restrictions = [self.prep_template.columns_restrictions['EBI']]
        if self.artifact.data_type in TARGET_GENE_DATA_TYPES:
            # adding restictions on primer and barcode as these are
            # conditionally requiered for target gene
            pt_restrictions.append(
                PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex'])
        st_missing = self.sample_template.check_restrictions(st_restrictions)
        pt_missing = self.prep_template.check_restrictions(pt_restrictions)
        # testing if there are any missing columns
        if st_missing:
            error_msgs.append("Missing column in the sample template: %s" %
                              ', '.join(list(st_missing)))
        if pt_missing:
            error_msgs.append("Missing column in the prep template: %s" %
                              ', '.join(list(pt_missing)))

        # generating all samples from sample template
        self.samples = {}
        self.samples_prep = {}
        self.sample_demux_fps = {}
        get_output_fp = partial(join, self.full_ebi_dir)
        nvp = []
        nvim = []
        for k, v in viewitems(self.sample_template):
            if k not in self.prep_template:
                continue
            sample_prep = self.prep_template[k]

            # validating required fields
            if ('platform' not in sample_prep or
                    sample_prep['platform'] is None):
                nvp.append(k)
            else:
                platform = sample_prep['platform'].upper()
                if platform not in self.valid_platforms:
                    nvp.append(k)
                else:
                    if ('instrument_model' not in sample_prep or
                            sample_prep['instrument_model'] is None):
                        nvim.append(k)
                    else:
                        im = sample_prep['instrument_model'].upper()
                        if im not in self.valid_platforms[platform]:
                            nvim.append(k)

            self.samples[k] = v
            self.samples_prep[k] = sample_prep
            self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k)

        if nvp:
            error_msgs.append("These samples do not have a valid platform "
                              "(instrumet model wasn't checked): %s" % (
                                  ', '.join(nvp)))
        if nvim:
            error_msgs.append("These samples do not have a valid instrument "
                              "model: %s" % (', '.join(nvim)))
        if error_msgs:
            error_msgs = ("Errors found during EBI submission for study #%d, "
                          "artifact #%d and prep template #%d:\n%s"
                          % (self.study.id, artifact_id,
                             self.prep_template.id, '\n'.join(error_msgs)))
            LogEntry.create('Runtime', error_msgs)
            raise EBISubmissionError(error_msgs)

        self._sample_aliases = {}
        self._experiment_aliases = {}
        self._run_aliases = {}

        self._ebi_sample_accessions = \
            self.sample_template.ebi_sample_accessions
        self._ebi_experiment_accessions = \
            self.prep_template.ebi_experiment_accessions
コード例 #31
0
    def setUp(self):
        self.db_dir = get_db_files_base_dir()

        # Create a SFF dataset: add prep template and a RawData
        study = Study(1)
        md_dict = {
            'SKB8.640193': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'GTCCGCAAGTTA',
                            'run_prefix': "preprocess_test",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'},
            'SKD8.640184': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'CGTAGAGCTCTC',
                            'run_prefix': "preprocess_test",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'},
            'SKB7.640196': {'center_name': 'ANL',
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
                            'barcode': 'CCTCTGAGAGCT',
                            'run_prefix': "preprocess_test",
                            'platform': 'ILLUMINA',
                            'library_construction_protocol': 'AAAA',
                            'experiment_design_description': 'BBBB'}
        }
        md = pd.DataFrame.from_dict(md_dict, orient='index')
        self.sff_prep_template = PrepTemplate.create(md, study, "16S")

        tmp_dir = mkdtemp()
        self.path_builder = partial(join, tmp_dir)
        fp1 = self.path_builder('preprocess_test1.sff')
        with open(fp1, 'w') as f:
            f.write('\n')
        fp2 = self.path_builder('preprocess_test2.sff')
        with open(fp2, 'w') as f:
            f.write('\n')
        self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type')
        fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)]

        # Magic number 1: is the filetype id
        self.raw_data = RawData.create(1, [self.sff_prep_template], fps)

        md = pd.DataFrame.from_dict(md_dict, orient='index')
        self.sff_prep_template_gz = PrepTemplate.create(md, study, "16S")
        fp1_gz = self.path_builder('preprocess_test1.sff.gz')
        with gzip.open(fp1_gz, 'w') as f:
            f.write('\n')
        fps = [(fp1_gz, self.raw_sff_id)]
        self.raw_data_gz = RawData.create(1, [self.sff_prep_template_gz], fps)

        # Create a SFF dataset with multiple run prefix:
        # add prep template and a RawData
        md_dict['SKD8.640184']['run_prefix'] = "new"
        md_rp = pd.DataFrame.from_dict(md_dict, orient='index')
        self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S")

        rp_fp1 = self.path_builder('preprocess_test1.sff')
        with open(rp_fp1, 'w') as f:
            f.write('\n')
        rp_fp2 = self.path_builder('preprocess_test2.sff')
        with open(rp_fp2, 'w') as f:
            f.write('\n')
        fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)]

        # Magic number 1: is the filetype id
        self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps)

        # Make sure that we clean up all created files
        self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2]
        self.dirs_to_remove = [tmp_dir]

        for pt in [self.sff_prep_template, self.sff_prep_template_rp]:
            for _, fp in pt.get_filepaths():
                self.files_to_remove.append(fp)
コード例 #32
0
 def testConvertToID(self):
     self.assertEqual(convert_to_id('ENA', 'ontology'), 999999999)
コード例 #33
0
    def __init__(self, artifact_id, action):
        error_msgs = []

        if action not in self.valid_ebi_actions:
            error_msg = ("%s is not a valid EBI submission action, valid "
                         "actions are: %s" %
                         (action, ', '.join(self.valid_ebi_actions)))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
        self.action = action
        self.artifact = Artifact(artifact_id)
        if not self.artifact.can_be_submitted_to_ebi:
            error_msg = ("Artifact %d cannot be submitted to EBI" %
                         self.artifact.id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.study = self.artifact.study
        self.sample_template = self.study.sample_template
        # If we reach this point, there should be only one prep template
        # attached to the artifact. By design, each artifact has at least one
        # prep template. Artifacts with more than one prep template cannot be
        # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should
        # be set to false, which is checked in the previous if statement
        self.prep_template = self.artifact.prep_templates[0]

        if self.artifact.is_submitted_to_ebi and action != 'MODIFY':
            error_msg = ("Cannot resubmit! Artifact %d has already "
                         "been submitted to EBI." % artifact_id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        status = self.study.ebi_submission_status
        if status in self.valid_ebi_submission_states:
            error_msg = ("Cannot perform parallel EBI submission for the same "
                         "study. Current status of the study: %s" % status)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.artifact_id = artifact_id
        self.study_title = self.study.title
        self.study_abstract = self.study.info['study_abstract']

        it = self.prep_template.investigation_type
        if it in ena_ontology.terms:
            self.investigation_type = it
            self.new_investigation_type = None
        elif it in ena_ontology.user_defined_terms:
            self.investigation_type = 'Other'
            self.new_investigation_type = it
        else:
            # This should never happen
            error_msgs.append("Unrecognized investigation type: '%s'. This "
                              "term is neither one of the official terms nor "
                              "one of the user-defined terms in the ENA "
                              "ontology." % it)
        _, base_fp = get_mountpoint("preprocessed_data")[0]
        self.ebi_dir = '%d_ebi_submission' % artifact_id
        self.full_ebi_dir = join(base_fp, self.ebi_dir)
        self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt')
        self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml')
        self.xml_dir = join(self.full_ebi_dir, 'xml_dir')
        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.publications = self.study.publications

        # getting the restrictions
        st_missing = self.sample_template.check_restrictions(
            [self.sample_template.columns_restrictions['EBI']])
        pt_missing = self.prep_template.check_restrictions(
            [self.prep_template.columns_restrictions['EBI']])
        # testing if there are any missing columns
        if st_missing:
            error_msgs.append("Missing column in the sample template: %s" %
                              ', '.join(list(st_missing)))
        if pt_missing:
            error_msgs.append("Missing column in the prep template: %s" %
                              ', '.join(list(pt_missing)))

        # generating all samples from sample template
        self.samples = {}
        self.samples_prep = {}
        self.sample_demux_fps = {}
        get_output_fp = partial(join, self.full_ebi_dir)
        nvp = []
        nvim = []
        for k, v in viewitems(self.sample_template):
            if k not in self.prep_template:
                continue
            sample_prep = self.prep_template[k]

            # validating required fields
            if ('platform' not in sample_prep
                    or sample_prep['platform'] is None):
                nvp.append(k)
            else:
                platform = sample_prep['platform'].upper()
                if platform not in self.valid_platforms:
                    nvp.append(k)
                else:
                    if ('instrument_model' not in sample_prep
                            or sample_prep['instrument_model'] is None):
                        nvim.append(k)
                    else:
                        im = sample_prep['instrument_model'].upper()
                        if im not in self.valid_platforms[platform]:
                            nvim.append(k)

            self.samples[k] = v
            self.samples_prep[k] = sample_prep
            self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k)

        if nvp:
            error_msgs.append("These samples do not have a valid platform "
                              "(instrumet model wasn't checked): %s" %
                              (', '.join(nvp)))
        if nvim:
            error_msgs.append("These samples do not have a valid instrument "
                              "model: %s" % (', '.join(nvim)))
        if error_msgs:
            error_msgs = ("Errors found during EBI submission for study #%d, "
                          "artifact #%d and prep template #%d:\n%s" %
                          (self.study.id, artifact_id, self.prep_template.id,
                           '\n'.join(error_msgs)))
            LogEntry.create('Runtime', error_msgs)
            raise EBISubmissionError(error_msgs)

        self._sample_aliases = {}
        self._experiment_aliases = {}
        self._run_aliases = {}

        self._ebi_sample_accessions = \
            self.sample_template.ebi_sample_accessions
        self._ebi_experiment_accessions = \
            self.prep_template.ebi_experiment_accessions
コード例 #34
0
ファイル: test_util.py プロジェクト: Jorge-C/qiita
 def test_convert_to_id(self):
     """Tests that ids are returned correctly"""
     self.assertEqual(convert_to_id("directory", "filepath_type"), 7)
コード例 #35
0
ファイル: 36.py プロジェクト: ElDeveloper/qiita
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from random import SystemRandom
from string import ascii_letters, digits
from os.path import exists, join, basename
from tarfile import open as taropen

from qiita_db.sql_connection import TRN
from qiita_db.artifact import Artifact
from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint,
                           get_mountpoint_path_by_id)


pool = ascii_letters + digits
tgz_id = convert_to_id("tgz", "filepath_type")
_id, analysis_mp = get_mountpoint('analysis')[0]
with TRN:
    # 2 and 3 are the ids of the 2 new software rows, the BIOM and
    # target gene type plugins
    for i in [2, 3]:
        client_id = ''.join([SystemRandom().choice(pool) for _ in range(50)])
        client_secret = ''.join(
            [SystemRandom().choice(pool) for _ in range(255)])

        sql = """INSERT INTO qiita.oauth_identifiers (client_id, client_secret)
                VALUES (%s, %s)"""
        TRN.add(sql, [client_id, client_secret])

        sql = """INSERT INTO qiita.oauth_software (software_id, client_id)
                 VALUES (%s, %s)"""
コード例 #36
0
def make_environment(load_ontologies, download_reference, add_demo_user):
    r"""Creates the new environment specified in the configuration

    Parameters
    ----------
    load_ontologies : bool
        Whether or not to retrieve and unpack ontology information
    download_reference : bool
        Whether or not to download greengenes reference files
    add_demo_user : bool
        Whether or not to add a demo user to the database with username
        [email protected] and password "password"

    Raises
    ------
    IOError
        If `download_reference` is true but one of the files cannot be
        retrieved
    QiitaEnvironmentError
        If the environment already exists
    """
    if load_ontologies and qiita_config.test_environment:
        raise EnvironmentError("Cannot load ontologies in a test environment! "
                               "Pass --no-load-ontologies, or set "
                               "TEST_ENVIRONMENT = FALSE in your "
                               "configuration")

    # Connect to the postgres server
    admin_conn = SQLConnectionHandler(admin='admin_without_database')

    # Check that it does not already exists
    if _check_db_exists(qiita_config.database, admin_conn):
        raise QiitaEnvironmentError(
            "Database {0} already present on the system. You can drop it "
            "by running 'qiita_env drop'".format(qiita_config.database))

    # Create the database
    print('Creating database')
    admin_conn.set_autocommit('on')
    admin_conn.execute('CREATE DATABASE %s' % qiita_config.database)
    admin_conn.set_autocommit('off')

    del admin_conn

    # Connect to the postgres server, but this time to the just created db
    conn = SQLConnectionHandler()

    print('Inserting database metadata')
    # Build the SQL layout into the database
    with open(SETTINGS_FP, 'U') as f:
        conn.execute(f.read())

    # Insert the settings values to the database
    conn.execute("INSERT INTO settings (test, base_data_dir, base_work_dir) "
                 "VALUES (%s, %s, %s)",
                 (qiita_config.test_environment, qiita_config.base_data_dir,
                  qiita_config.working_dir))

    create_layout_and_patch(conn, verbose=True)

    if load_ontologies:
        _add_ontology_data(conn)

        # these values can only be added if the environment is being loaded
        # with the ontologies, thus this cannot exist inside intialize.sql
        # because otherwise loading the ontologies would be a requirement
        ontology = Ontology(convert_to_id('ENA', 'ontology'))
        ontology.add_user_defined_term('Amplicon Sequencing')

    if download_reference:
        _download_reference_files(conn)

    # we don't do this if it's a test environment because populate.sql
    # already adds this user...
    if add_demo_user and not qiita_config.test_environment:
        conn.execute("""
            INSERT INTO qiita.qiita_user (email, user_level_id, password,
                                          name, affiliation, address, phone)
            VALUES
            ('*****@*****.**', 4,
             '$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe',
             'Demo', 'Qitta Dev', '1345 Colorado Avenue', '303-492-1984')""")
        conn.execute("""
            INSERT INTO qiita.analysis (email, name, description, dflt,
                                        analysis_status_id)
            VALUES
            ('*****@*****.**', '[email protected]', 'dflt', 't', 1)""")

        print('Demo user successfully created')

    if qiita_config.test_environment:
        _populate_test_db(conn)
        print('Test environment successfully created')
    else:
        print('Production environment successfully created')
コード例 #37
0
 def test_convert_to_id(self):
     """Tests that ids are returned correctly"""
     self.assertEqual(convert_to_id("directory", "filepath_type"), 7)