def test_get_preprocess_fastq_cmd_per_sample_FASTQ_failure(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1_failure", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') # This part should fail fp1 = self.path_builder('sample1_failure.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample1_failure.barcodes.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) forward_filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') barcode_filepath_id = convert_to_id('raw_barcodes', 'filepath_type') fps = [(fp1, forward_filepath_id), (fp2, barcode_filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] with self.assertRaises(ValueError): _get_preprocess_fastq_cmd(raw_data, prep_template, params)
def create(cls, severity, msg, info=None): """Creates a new LogEntry object Parameters ---------- severity : str {Warning, Runtime, Fatal} The level of severity to use for the LogEntry. Refers to an entry in the SEVERITY table. msg : str The message text info : dict, optional Defaults to ``None``. If supplied, the information will be added as the first entry in a list of information dicts. If ``None``, an empty dict will be added. Notes ----- - When `info` is added, keys can be of any type, but upon retrieval, they will be of type str """ if info is None: info = {} info = dumps([info]) conn_handler = SQLConnectionHandler() sql = ("INSERT INTO qiita.{} (time, severity_id, msg, information) " "VALUES (NOW(), %s, %s, %s) " "RETURNING logging_id".format(cls._table)) severity_id = convert_to_id(severity, "severity") id_ = conn_handler.execute_fetchone(sql, (severity_id, msg, info))[0] return cls(id_)
def _change_processed_data_status(self, new_status): # Change the status of the studies by changing the status of their # processed data id_status = convert_to_id(new_status, 'processed_data_status') self.conn_handler.execute( "UPDATE qiita.processed_data SET processed_data_status_id = %s", (id_status,))
def render(self, study, full_access): files = [f for _, f in get_files_from_uploads_folders(str(study.id)) if f.endswith(('txt', 'tsv'))] data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) prep_templates_info = [ res for res in _template_generator(study, full_access)] # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] return self.render_string( "study_description_templates/prep_template_tab.html", files=files, data_types=data_types, available_prep_templates=prep_templates_info, ena_terms=ena_terms, user_defined_terms=user_defined_terms, study=study, full_access=full_access)
def _process_investigation_type(self, inv_type, user_def_type, new_type): """Return the investigation_type and add it to the ontology if needed Parameters ---------- inv_type : str The investigation type user_def_type : str The user-defined investigation type new_type : str The new user-defined investigation_type Returns ------- str The investigation type chosen by the user """ if inv_type == 'None Selected': inv_type = None elif inv_type == 'Other' and user_def_type == 'New Type': # This is a nre user defined investigation type so store it inv_type = new_type ontology = Ontology(convert_to_id('ENA', 'ontology')) ontology.add_user_defined_term(inv_type) elif inv_type == 'Other' and user_def_type != 'New Type': inv_type = user_def_type return inv_type
def test_insert_processed_data_target_gene(self): fd, fna_fp = mkstemp(suffix='_seqs.fna') close(fd) fd, qual_fp = mkstemp(suffix='_seqs.qual') close(fd) filepaths = [ (fna_fp, convert_to_id('preprocessed_fasta', 'filepath_type')), (qual_fp, convert_to_id('preprocessed_fastq', 'filepath_type'))] preprocessed_data = PreprocessedData.create( Study(1), "preprocessed_sequence_illumina_params", 1, filepaths, data_type="18S") params = ProcessedSortmernaParams(1) pick_dir = mkdtemp() path_builder = partial(join, pick_dir) db_path_builder = partial(join, get_mountpoint('processed_data')[0][1]) # Create a placeholder for the otu table with open(path_builder('otu_table.biom'), 'w') as f: f.write('\n') # Create a placeholder for the directory mkdir(path_builder('sortmerna_picked_otus')) # Create the log file fd, fp = mkstemp(dir=pick_dir, prefix='log_', suffix='.txt') close(fd) with open(fp, 'w') as f: f.write('\n') _insert_processed_data_target_gene(preprocessed_data, params, pick_dir) new_id = get_count('qiita.processed_data') # Check that the files have been copied db_files = [db_path_builder("%s_otu_table.biom" % new_id), db_path_builder("%s_sortmerna_picked_otus" % new_id), db_path_builder("%s_%s" % (new_id, basename(fp)))] for fp in db_files: self.assertTrue(exists(fp)) # Check that a new preprocessed data has been created self.assertTrue(self.conn_handler.execute_fetchone( "SELECT EXISTS(SELECT * FROM qiita.processed_data WHERE " "processed_data_id=%s)", (new_id, ))[0])
def __init__(self, preprocessed_data_id, study_title, study_abstract, investigation_type, empty_value='no_data', new_investigation_type=None, pmids=None, **kwargs): self.preprocessed_data_id = preprocessed_data_id self.study_title = study_title self.study_abstract = study_abstract self.investigation_type = investigation_type self.empty_value = empty_value self.new_investigation_type = new_investigation_type self.sequence_files = [] self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None self.run_xml_fp = None self.submission_xml_fp = None self.pmids = pmids if pmids is not None else [] self.ebi_dir = self._get_ebi_dir() if self.investigation_type == 'Other' and \ self.new_investigation_type is None: raise ValueError("If the investigation_type is 'Other' you have " " to specify a value for new_investigation_type.") ontology = Ontology(convert_to_id('ENA', 'ontology')) if ontology.term_type(self.investigation_type) == 'not_ontology': raise ValueError("The investigation type must be part of ENA's " "ontology, '%s' is not valid" % self.investigation_type) # dicts that map investigation_type to library attributes lib_strategies = {'metagenome': 'POOLCLONE', 'mimarks-survey': 'AMPLICON'} lib_selections = {'mimarks-survey': 'PCR'} lib_sources = {} # if the investigation_type is 'Other' we should use the value in # the new_investigation_type attribute to retrieve this information if self.investigation_type == 'Other': key = self.new_investigation_type else: key = self.investigation_type self.library_strategy = lib_strategies.get(key, "OTHER") self.library_source = lib_sources.get(key, "METAGENOMIC") self.library_selection = lib_selections.get(key, "unspecified") # This allows addition of other arbitrary study metadata self.additional_metadata = self._stringify_kwargs(kwargs) # This will hold the submission's samples, keyed by the sample name self.samples = {}
def add_filepath(self, filepath, conn_handler=None): r"""Populates the DB tables for storing the filepath and connects the `self` objects with this filepath""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = conn_handler if conn_handler else SQLConnectionHandler() if self._table == 'required_sample_info': fp_id = convert_to_id("sample_template", "filepath_type", conn_handler) table = 'sample_template_filepath' column = 'study_id' elif self._table == 'common_prep_info': fp_id = convert_to_id("prep_template", "filepath_type", conn_handler) table = 'prep_template_filepath' column = 'prep_template_id' else: raise QiitaDBNotImplementedError( 'add_filepath for %s' % self._table) try: fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", conn_handler, move_files=False)[0] values = (self._id, fpp_id) conn_handler.execute( "INSERT INTO qiita.{0} ({1}, filepath_id) " "VALUES (%s, %s)".format(table, column), values) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e
def render(self, study_id, preprocessed_data): user = User(self.current_user) ppd_id = preprocessed_data.id ebi_status = preprocessed_data.submitted_to_insdc_status() ebi_study_accession = preprocessed_data.ebi_study_accession ebi_submission_accession = preprocessed_data.ebi_submission_accession vamps_status = preprocessed_data.submitted_to_vamps_status() filepaths = preprocessed_data.get_filepaths() is_local_request = self._is_local() show_ebi_btn = user.level == "admin" # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] if PrepTemplate.exists(preprocessed_data.prep_template): prep_template_id = preprocessed_data.prep_template prep_template = PrepTemplate(prep_template_id) raw_data_id = prep_template.raw_data inv_type = prep_template.investigation_type or "None Selected" else: prep_template_id = None raw_data_id = None inv_type = "None Selected" return self.render_string( "study_description_templates/preprocessed_data_info_tab.html", ppd_id=ppd_id, show_ebi_btn=show_ebi_btn, ebi_status=ebi_status, ebi_study_accession=ebi_study_accession, ebi_submission_accession=ebi_submission_accession, filepaths=filepaths, is_local_request=is_local_request, prep_template_id=prep_template_id, raw_data_id=raw_data_id, inv_type=inv_type, ena_terms=ena_terms, vamps_status=vamps_status, user_defined_terms=user_defined_terms)
def _get_ENA_ontology(): """Returns the information of the ENA ontology Returns ------- dict of {str: list of strings} A dictionary of the form {'ENA': list of str, 'User': list of str} with the ENA-defined terms and the User-defined terms, respectivelly. """ ontology = Ontology(convert_to_id('ENA', 'ontology')) ena_terms = sorted(ontology.terms) # make "Other" last on the list ena_terms.remove('Other') ena_terms.append('Other') return {'ENA': ena_terms, 'User': sorted(ontology.user_defined_terms)}
def test_get_preprocess_fastq_cmd_per_sample_FASTQ(self): metadata_dict = { 'SKB8.640193': {'run_prefix': "sample1", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}, 'SKD8.640184': {'run_prefix': "sample2", 'primer': 'A', 'barcode': 'A', 'center_name': 'ANL', 'platform': 'ILLUMINA', 'instrument_model': 'Illumina MiSeq', 'library_construction_protocol': 'A', 'experiment_design_description': 'A'}} md_template = pd.DataFrame.from_dict(metadata_dict, orient='index') prep_template = PrepTemplate.create(md_template, Study(1), '16S') fp1 = self.path_builder('sample1.fastq') with open(fp1, 'w') as f: f.write('\n') self.files_to_remove.append(fp1) fp2 = self.path_builder('sample2.fastq.gz') with open(fp2, 'w') as f: f.write('\n') self.files_to_remove.append(fp2) filepath_id = convert_to_id('raw_forward_seqs', 'filepath_type') fps = [(fp1, filepath_id), (fp2, filepath_id)] filetype_id = get_filetypes()['per_sample_FASTQ'] raw_data = RawData.create(filetype_id, [prep_template], fps) params = [p for p in list(PreprocessedIlluminaParams.iter()) if p.name == 'per sample FASTQ defaults'][0] obs_cmd, obs_output_dir = _get_preprocess_fastq_cmd(raw_data, prep_template, params) raw_fps = ','.join([fp for _, fp, _ in sorted(raw_data.get_filepaths())]) exp_cmd = ( "split_libraries_fastq.py --store_demultiplexed_fastq -i " "{} --sample_ids 1.SKB8.640193,1.SKD8.640184 -o {} --barcode_type " "not-barcoded --max_bad_run_length 3 --max_barcode_errors 1.5 " "--min_per_read_length_fraction 0.75 --phred_quality_threshold 3 " "--sequence_max_n 0").format(raw_fps, obs_output_dir) self.assertEqual(obs_cmd, exp_cmd)
def validate_investigation_type(self, investigation_type): """Simple investigation validation to avoid code duplication Parameters ---------- investigation_type : str The investigation type, should be part of the ENA ontology Raises ------- QiitaDBColumnError The investigation type is not in the ENA ontology """ ontology = Ontology(convert_to_id('ENA', 'ontology')) terms = ontology.terms + ontology.user_defined_terms if investigation_type not in terms: raise QiitaDBColumnError("'%s' is Not a valid investigation_type. " "Choose from: %s" % (investigation_type, ', '.join(terms)))
def ontology_patch_handler(req_op, req_path, req_value=None, req_from=None): """Patches an ontology Parameters ---------- req_op : str The operation to perform on the ontology req_path : str The ontology to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Returns ------- dict of {str: str} A dictionary of the form: {'status': str, 'message': str} in which status is the status of the request ('error' or 'success') and message is a human readable string with the error message in case that status is 'error'. """ if req_op == "add": req_path = [v for v in req_path.split("/") if v] if len(req_path) != 1: return {"status": "error", "message": "Incorrect path parameter"} req_path = req_path[0] try: o_id = convert_to_id(req_path, "ontology") except QiitaDBLookupError: return {"status": "error", "message": 'Ontology "%s" does not exist' % req_path} ontology = Ontology(o_id) ontology.add_user_defined_term(req_value) return {"status": "success", "message": ""} else: return { "status": "error", "message": 'Operation "%s" not supported. ' "Current supported operations: add" % req_op, }
def test_create_data_type_only(self): # Check that the returned object has the correct id new_id = get_count('qiita.preprocessed_data') + 1 obs = PreprocessedData.create(self.study, self.params_table, self.params_id, self.filepaths, data_type="18S") self.assertEqual(obs.id, new_id) # Check that all the information is initialized correctly self.assertEqual(obs.processed_data, []) self.assertEqual(obs.prep_template, []) self.assertEqual(obs.study, self.study.id) self.assertEqual(obs.data_type(), "18S") self.assertEqual(obs.data_type(ret_id=True), convert_to_id("18S", "data_type")) self.assertEqual(obs.submitted_to_vamps_status(), "not submitted") self.assertEqual(obs.processing_status, "not_processed") self.assertEqual(obs.status, "sandbox") self.assertEqual(obs.preprocessing_info, (self.params_table, self.params_id))
def create_command(software, name, description, parameters, outputs=None, analysis_only=False): r"""Replicates the Command.create code at the time the patch was written""" # Perform some sanity checks in the parameters dictionary if not parameters: raise QiitaDBError( "Error creating command %s. At least one parameter should " "be provided." % name) sql_param_values = [] sql_artifact_params = [] for pname, vals in parameters.items(): if len(vals) != 2: raise QiitaDBError( "Malformed parameters dictionary, the format should be " "{param_name: [parameter_type, default]}. Found: " "%s for parameter name %s" % (vals, pname)) ptype, dflt = vals # Check that the type is one of the supported types supported_types = ['string', 'integer', 'float', 'reference', 'boolean', 'prep_template', 'analysis'] if ptype not in supported_types and not ptype.startswith( ('choice', 'mchoice', 'artifact')): supported_types.extend(['choice', 'mchoice', 'artifact']) raise QiitaDBError( "Unsupported parameters type '%s' for parameter %s. " "Supported types are: %s" % (ptype, pname, ', '.join(supported_types))) if ptype.startswith(('choice', 'mchoice')) and dflt is not None: choices = set(loads(ptype.split(':')[1])) dflt_val = dflt if ptype.startswith('choice'): # In the choice case, the dflt value is a single string, # create a list with it the string on it to use the # issuperset call below dflt_val = [dflt_val] else: # jsonize the list to store it in the DB dflt = dumps(dflt) if not choices.issuperset(dflt_val): raise QiitaDBError( "The default value '%s' for the parameter %s is not " "listed in the available choices: %s" % (dflt, pname, ', '.join(choices))) if ptype.startswith('artifact'): atypes = loads(ptype.split(':')[1]) sql_artifact_params.append( [pname, 'artifact', atypes]) else: if dflt is not None: sql_param_values.append([pname, ptype, False, dflt]) else: sql_param_values.append([pname, ptype, True, None]) with TRN: sql = """SELECT EXISTS(SELECT * FROM qiita.software_command WHERE software_id = %s AND name = %s)""" TRN.add(sql, [software.id, name]) if TRN.execute_fetchlast(): raise QiitaDBDuplicateError( "command", "software: %d, name: %s" % (software.id, name)) # Add the command to the DB sql = """INSERT INTO qiita.software_command (name, software_id, description, is_analysis) VALUES (%s, %s, %s, %s) RETURNING command_id""" sql_params = [name, software.id, description, analysis_only] TRN.add(sql, sql_params) c_id = TRN.execute_fetchlast() # Add the parameters to the DB sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) VALUES (%s, %s, %s, %s, %s) RETURNING command_parameter_id""" sql_params = [[c_id, pname, p_type, reqd, default] for pname, p_type, reqd, default in sql_param_values] TRN.add(sql, sql_params, many=True) TRN.execute() # Add the artifact parameters sql_type = """INSERT INTO qiita.parameter_artifact_type (command_parameter_id, artifact_type_id) VALUES (%s, %s)""" supported_types = [] for pname, p_type, atypes in sql_artifact_params: sql_params = [c_id, pname, p_type, True, None] TRN.add(sql, sql_params) pid = TRN.execute_fetchlast() sql_params = [[pid, convert_to_id(at, 'artifact_type')] for at in atypes] TRN.add(sql_type, sql_params, many=True) supported_types.extend([atid for _, atid in sql_params]) # If the software type is 'artifact definition', there are a couple # of extra steps if software.type == 'artifact definition': # If supported types is not empty, link the software with these # types if supported_types: sql = """INSERT INTO qiita.software_artifact_type (software_id, artifact_type_id) VALUES (%s, %s)""" sql_params = [[software.id, atid] for atid in supported_types] TRN.add(sql, sql_params, many=True) # If this is the validate command, we need to add the # provenance and name parameters. These are used internally, # that's why we are adding them here if name == 'Validate': sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) VALUES (%s, 'name', 'string', 'False', 'dflt_name'), (%s, 'provenance', 'string', 'False', NULL) """ TRN.add(sql, [c_id, c_id]) # Add the outputs to the command if outputs: sql = """INSERT INTO qiita.command_output (name, command_id, artifact_type_id) VALUES (%s, %s, %s)""" sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')] for pname, at in outputs.items()] TRN.add(sql, sql_args, many=True) TRN.execute() return Command(c_id)
def test_convert_to_id_bad_value(self): """Tests that ids are returned correctly""" with self.assertRaises(QiitaDBLookupError): convert_to_id("FAKE", "filepath_type")
def test_convert_to_id(self): """Tests that ids are returned correctly""" self.assertEqual(convert_to_id("directory", "filepath_type"), 8) self.assertEqual(convert_to_id("running", "analysis_status", "status"), 3) self.assertEqual(convert_to_id("EMP", "portal_type", "portal"), 2)
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath Notes ----- If fastq_dir_fp is passed, it must not contain any empty files, or gzipped empty files """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ("submitting", "success"): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status("submitting") # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id("ENA", "ontology")) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = "Other" new_investigation_type = current_type else: # This should never happen raise ValueError( "Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology" ) if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) wrote_sequences = False with gzopen(sample_fp, "w") as fh: for record in iterator: fh.write(record) wrote_sequences = True if not wrote_sequences: remove(sample_fp) output_dir = fastq_dir_fp + "_submission" samp_fp = join(fastq_dir_fp, "sample_metadata.txt") prep_fp = join(fastq_dir_fp, "prep_metadata.txt") sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp("study.xml") sample_fp = get_output_fp("sample.xml") experiment_fp = get_output_fp("experiment.xml") run_fp = get_output_fp("run.xml") submission_fp = get_output_fp("submission.xml") if not isdir(output_dir): makedirs(output_dir) else: raise IOError("The output folder already exists: %s" % output_dir) with open(samp_fp, "U") as st, open(prep_fp, "U") as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info["study_abstract"], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids, ) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status("failed") raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status("success", study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
def create_command(software, name, description, parameters, outputs=None, analysis_only=False): r"""Replicates the Command.create code at the time the patch was written""" # Perform some sanity checks in the parameters dictionary if not parameters: raise QiitaDBError( "Error creating command %s. At least one parameter should " "be provided." % name) sql_param_values = [] sql_artifact_params = [] for pname, vals in parameters.items(): if len(vals) != 2: raise QiitaDBError( "Malformed parameters dictionary, the format should be " "{param_name: [parameter_type, default]}. Found: " "%s for parameter name %s" % (vals, pname)) ptype, dflt = vals # Check that the type is one of the supported types supported_types = [ 'string', 'integer', 'float', 'reference', 'boolean', 'prep_template', 'analysis' ] if ptype not in supported_types and not ptype.startswith( ('choice', 'mchoice', 'artifact')): supported_types.extend(['choice', 'mchoice', 'artifact']) raise QiitaDBError( "Unsupported parameters type '%s' for parameter %s. " "Supported types are: %s" % (ptype, pname, ', '.join(supported_types))) if ptype.startswith(('choice', 'mchoice')) and dflt is not None: choices = set(loads(ptype.split(':')[1])) dflt_val = dflt if ptype.startswith('choice'): # In the choice case, the dflt value is a single string, # create a list with it the string on it to use the # issuperset call below dflt_val = [dflt_val] else: # jsonize the list to store it in the DB dflt = dumps(dflt) if not choices.issuperset(dflt_val): raise QiitaDBError( "The default value '%s' for the parameter %s is not " "listed in the available choices: %s" % (dflt, pname, ', '.join(choices))) if ptype.startswith('artifact'): atypes = loads(ptype.split(':')[1]) sql_artifact_params.append([pname, 'artifact', atypes]) else: if dflt is not None: sql_param_values.append([pname, ptype, False, dflt]) else: sql_param_values.append([pname, ptype, True, None]) with TRN: sql = """SELECT EXISTS(SELECT * FROM qiita.software_command WHERE software_id = %s AND name = %s)""" TRN.add(sql, [software.id, name]) if TRN.execute_fetchlast(): raise QiitaDBDuplicateError( "command", "software: %d, name: %s" % (software.id, name)) # Add the command to the DB sql = """INSERT INTO qiita.software_command (name, software_id, description, is_analysis) VALUES (%s, %s, %s, %s) RETURNING command_id""" sql_params = [name, software.id, description, analysis_only] TRN.add(sql, sql_params) c_id = TRN.execute_fetchlast() # Add the parameters to the DB sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) VALUES (%s, %s, %s, %s, %s) RETURNING command_parameter_id""" sql_params = [[c_id, pname, p_type, reqd, default] for pname, p_type, reqd, default in sql_param_values] TRN.add(sql, sql_params, many=True) TRN.execute() # Add the artifact parameters sql_type = """INSERT INTO qiita.parameter_artifact_type (command_parameter_id, artifact_type_id) VALUES (%s, %s)""" supported_types = [] for pname, p_type, atypes in sql_artifact_params: sql_params = [c_id, pname, p_type, True, None] TRN.add(sql, sql_params) pid = TRN.execute_fetchlast() sql_params = [[pid, convert_to_id(at, 'artifact_type')] for at in atypes] TRN.add(sql_type, sql_params, many=True) supported_types.extend([atid for _, atid in sql_params]) # If the software type is 'artifact definition', there are a couple # of extra steps if software.type == 'artifact definition': # If supported types is not empty, link the software with these # types if supported_types: sql = """INSERT INTO qiita.software_artifact_type (software_id, artifact_type_id) VALUES (%s, %s)""" sql_params = [[software.id, atid] for atid in supported_types] TRN.add(sql, sql_params, many=True) # If this is the validate command, we need to add the # provenance and name parameters. These are used internally, # that's why we are adding them here if name == 'Validate': sql = """INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) VALUES (%s, 'name', 'string', 'False', 'dflt_name'), (%s, 'provenance', 'string', 'False', NULL) """ TRN.add(sql, [c_id, c_id]) # Add the outputs to the command if outputs: sql = """INSERT INTO qiita.command_output (name, command_id, artifact_type_id) VALUES (%s, %s, %s)""" sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')] for pname, at in outputs.items()] TRN.add(sql, sql_args, many=True) TRN.execute() return Command(c_id)
def render(self, study_id, preprocessed_data): user = self.current_user ppd_id = preprocessed_data.id ebi_status = preprocessed_data.submitted_to_insdc_status() ebi_study_accession = preprocessed_data.ebi_study_accession ebi_submission_accession = preprocessed_data.ebi_submission_accession vamps_status = preprocessed_data.submitted_to_vamps_status() filepaths = preprocessed_data.get_filepaths() is_local_request = self._is_local() show_ebi_btn = user.level == "admin" processing_status = preprocessed_data.processing_status processed_data = preprocessed_data.processed_data # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] if PrepTemplate.exists(preprocessed_data.prep_template): prep_template_id = preprocessed_data.prep_template prep_template = PrepTemplate(prep_template_id) raw_data_id = prep_template.raw_data inv_type = prep_template.investigation_type or "None Selected" else: prep_template_id = None raw_data_id = None inv_type = "None Selected" process_params = {param.id: (generate_param_str(param), param.name) for param in ProcessedSortmernaParams.iter()} # We just need to provide an ID for the default parameters, # so we can initialize the interface default_params = 1 return self.render_string( "study_description_templates/preprocessed_data_info_tab.html", ppd_id=ppd_id, show_ebi_btn=show_ebi_btn, ebi_status=ebi_status, ebi_study_accession=ebi_study_accession, ebi_submission_accession=ebi_submission_accession, filepaths=filepaths, is_local_request=is_local_request, prep_template_id=prep_template_id, raw_data_id=raw_data_id, inv_type=inv_type, ena_terms=ena_terms, vamps_status=vamps_status, user_defined_terms=user_defined_terms, process_params=process_params, default_params=default_params, study_id=preprocessed_data.study, processing_status=processing_status, processed_data=processed_data)
def create(cls, md_template, raw_data, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids raw_data : RawData The raw_data to which the prep template belongs to. study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Get a connection handler conn_handler = SQLConnectionHandler() queue_name = "CREATE_PREP_TEMPLATE_%d" % raw_data.id conn_handler.create_queue(queue_name) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type", conn_handler) else: data_type_id = convert_to_id(data_type, "data_type", conn_handler) data_type_str = data_type md_template = cls._clean_validate_template(md_template, study.id, data_type_str, conn_handler) # Insert the metadata template # We need the prep_id for multiple calls below, which currently is not # supported by the queue system. Thus, executing this outside the queue prep_id = conn_handler.execute_fetchone( "INSERT INTO qiita.prep_template (data_type_id, raw_data_id, " "investigation_type) VALUES (%s, %s, %s) RETURNING " "prep_template_id", (data_type_id, raw_data.id, investigation_type))[0] cls._add_common_creation_steps_to_queue(md_template, prep_id, conn_handler, queue_name) try: conn_handler.execute_queue(queue_name) except Exception: # Clean up row from qiita.prep_template conn_handler.execute( "DELETE FROM qiita.prep_template where " "{0} = %s".format(cls._id_column), (prep_id,)) # Check if sample IDs present here but not in sample template sql = ("SELECT sample_id from qiita.required_sample_info WHERE " "study_id = %s") # Get list of study sample IDs, prep template study IDs, # and their intersection prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( s[0] for s in conn_handler.execute_fetchall(sql, [study.id])) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample template: ' '%s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise pt = cls(prep_id) pt.generate_files() return pt
# # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from random import SystemRandom from string import ascii_letters, digits from os.path import exists, join, basename from tarfile import open as taropen from qiita_db.sql_connection import TRN from qiita_db.artifact import Artifact from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint, get_mountpoint_path_by_id) pool = ascii_letters + digits tgz_id = convert_to_id("tgz", "filepath_type") _id, analysis_mp = get_mountpoint('analysis')[0] with TRN: # 2 and 3 are the ids of the 2 new software rows, the BIOM and # target gene type plugins for i in [2, 3]: client_id = ''.join([SystemRandom().choice(pool) for _ in range(50)]) client_secret = ''.join( [SystemRandom().choice(pool) for _ in range(255)]) sql = """INSERT INTO qiita.oauth_identifiers (client_id, client_secret) VALUES (%s, %s)""" TRN.add(sql, [client_id, client_secret]) sql = """INSERT INTO qiita.oauth_software (software_id, client_id) VALUES (%s, %s)"""
def test_convert_to_id_bad_value(self): """Tests that ids are returned correctly""" with self.assertRaises(IncompetentQiitaDeveloperError): convert_to_id("FAKE", "filepath_type")
def __init__(self, preprocessed_data_id, study_title, study_abstract, investigation_type, empty_value='no_data', new_investigation_type=None, pmids=None, **kwargs): self.preprocessed_data_id = preprocessed_data_id self.study_title = study_title self.study_abstract = study_abstract self.investigation_type = investigation_type self.empty_value = empty_value self.new_investigation_type = new_investigation_type self.sequence_files = [] self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None self.run_xml_fp = None self.submission_xml_fp = None self.pmids = pmids if pmids is not None else [] self.ebi_dir = self._get_ebi_dir() if self.investigation_type == 'Other' and \ self.new_investigation_type is None: raise ValueError("If the investigation_type is 'Other' you have " " to specify a value for new_investigation_type.") ontology = Ontology(convert_to_id('ENA', 'ontology')) if ontology.term_type(self.investigation_type) == 'not_ontology': raise ValueError("The investigation type must be part of ENA's " "ontology, '%s' is not valid" % self.investigation_type) # dicts that map investigation_type to library attributes lib_strategies = { 'metagenome': 'POOLCLONE', 'mimarks-survey': 'AMPLICON' } lib_selections = {'mimarks-survey': 'PCR'} lib_sources = {} # if the investigation_type is 'Other' we should use the value in # the new_investigation_type attribute to retrieve this information if self.investigation_type == 'Other': key = self.new_investigation_type else: key = self.investigation_type self.library_strategy = lib_strategies.get(key, "OTHER") self.library_source = lib_sources.get(key, "METAGENOMIC") self.library_selection = lib_selections.get(key, "unspecified") # This allows addition of other arbitrary study metadata self.additional_metadata = self._stringify_kwargs(kwargs) # This will hold the submission's samples, keyed by the sample name self.samples = {}
def testConvertToID(self): self.assertEqual(convert_to_id('ENA', 'ontology'), 807481739)
def create_qiime_mapping_file(self): """This creates the QIIME mapping file and links it in the db. Returns ------- filepath : str The filepath of the created QIIME mapping file Raises ------ ValueError If the prep template is not a subset of the sample template QiitaDBWarning If the QIIME-required columns are not present in the template Notes ----- We cannot ensure that the QIIME-required columns are present in the metadata map. However, we have to generate a QIIME-compliant mapping file. Since the user may need a QIIME mapping file, but not these QIIME-required columns, we are going to create them and populate them with the value XXQIITAXX. """ rename_cols = { 'barcode': 'BarcodeSequence', 'primer': 'LinkerPrimerSequence', 'description': 'Description', } if 'reverselinkerprimer' in self.categories(): rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer' new_cols = ['BarcodeSequence', 'LinkerPrimerSequence', 'ReverseLinkerPrimer'] else: new_cols = ['BarcodeSequence', 'LinkerPrimerSequence'] # getting the latest sample template conn_handler = SQLConnectionHandler() sql = """SELECT filepath_id, filepath FROM qiita.filepath JOIN qiita.sample_template_filepath USING (filepath_id) WHERE study_id=%s ORDER BY filepath_id DESC""" sample_template_fname = conn_handler.execute_fetchall( sql, (self.study_id,))[0][1] _, fp = get_mountpoint('templates')[0] sample_template_fp = join(fp, sample_template_fname) # reading files via pandas st = load_template_to_dataframe(sample_template_fp) pt = self.to_dataframe() st_sample_names = set(st.index) pt_sample_names = set(pt.index) if not pt_sample_names.issubset(st_sample_names): raise ValueError( "Prep template is not a sub set of the sample template, files" "%s - samples: %s" % (sample_template_fp, ', '.join(pt_sample_names-st_sample_names))) mapping = pt.join(st, lsuffix="_prep") mapping.rename(columns=rename_cols, inplace=True) # Pre-populate the QIIME-required columns with the value XXQIITAXX index = mapping.index placeholder = ['XXQIITAXX'] * len(index) missing = [] for val in viewvalues(rename_cols): if val not in mapping: missing.append(val) mapping[val] = pd.Series(placeholder, index=index) if missing: warnings.warn( "Some columns required to generate a QIIME-compliant mapping " "file are not present in the template. A placeholder value " "(XXQIITAXX) has been used to populate these columns. Missing " "columns: %s" % ', '.join(missing), QiitaDBWarning) # Gets the orginal mapping columns and readjust the order to comply # with QIIME requirements cols = mapping.columns.values.tolist() cols.remove('BarcodeSequence') cols.remove('LinkerPrimerSequence') cols.remove('Description') new_cols.extend(cols) new_cols.append('Description') mapping = mapping[new_cols] # figuring out the filepath for the QIIME map file _id, fp = get_mountpoint('templates')[0] filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id, self.id, strftime("%Y%m%d-%H%M%S"))) # Save the mapping file mapping.to_csv(filepath, index_label='#SampleID', na_rep='', sep='\t') # adding the fp to the object self.add_filepath( filepath, fp_id=convert_to_id("qiime_map", "filepath_type")) return filepath
def render(self, study, raw_data, full_access): user = self.current_user study_status = study.status user_level = user.level raw_data_id = raw_data.id files = [f for _, f in get_files_from_uploads_folders(str(study.id))] # Get the available prep template data types data_types = sorted(viewitems(get_data_types()), key=itemgetter(1)) # Get all the ENA terms for the investigation type ontology = Ontology(convert_to_id('ENA', 'ontology')) # make "Other" show at the bottom of the drop down menu ena_terms = [] for v in sorted(ontology.terms): if v != 'Other': ena_terms.append('<option value="%s">%s</option>' % (v, v)) ena_terms.append('<option value="Other">Other</option>') # New Type is for users to add a new user-defined investigation type user_defined_terms = ontology.user_defined_terms + ['New Type'] # Get all the information about the prep templates available_prep_templates = [] for p in sorted(raw_data.prep_templates): if PrepTemplate.exists(p): pt = PrepTemplate(p) # if the prep template doesn't belong to this study, skip if (study.id == pt.study_id and (full_access or pt.status == 'public')): available_prep_templates.append(pt) # getting filepath_types if raw_data.filetype == 'SFF': fts = ['sff'] elif raw_data.filetype == 'FASTA': fts = ['fasta', 'qual'] elif raw_data.filetype == 'FASTQ': fts = ['barcodes', 'forward seqs', 'reverse seqs'] else: fts = [k.split('_', 1)[1].replace('_', ' ') for k in get_filepath_types() if k.startswith('raw_')] # The raw data can be edited (e.i. adding prep templates and files) # only if the study is sandboxed or the current user is an admin is_editable = study_status == 'sandbox' or user_level == 'admin' # Get the files linked with the raw_data raw_data_files = raw_data.get_filepaths() # Get the status of the data linking raw_data_link_status = raw_data.link_filepaths_status # By default don't show the unlink button show_unlink_btn = False # By default disable the the link file button disable_link_btn = True # Define the message for the link status if raw_data_link_status == 'linking': link_msg = "Linking files..." elif raw_data_link_status == 'unlinking': link_msg = "Unlinking files..." else: # The link button is only disable if raw data link status is # linking or unlinking, so we can enable it here disable_link_btn = False # The unlink button is only shown if the study is editable, the raw # data linking status is not in linking or unlinking, and there are # files attached to the raw data. At this point, we are sure that # the raw data linking status is not in linking or unlinking so we # still need to check if it is editable or there are files attached show_unlink_btn = is_editable and raw_data_files if raw_data_link_status.startswith('failed'): link_msg = "Error (un)linking files: %s" % raw_data_link_status else: link_msg = "" # Get the raw_data filetype raw_data_filetype = raw_data.filetype return self.render_string( "study_description_templates/raw_data_editor_tab.html", study_id=study.id, study_status=study_status, user_level=user_level, raw_data_id=raw_data_id, files=files, data_types=data_types, ena_terms=ena_terms, user_defined_terms=user_defined_terms, available_prep_templates=available_prep_templates, filepath_types=fts, is_editable=is_editable, show_unlink_btn=show_unlink_btn, link_msg=link_msg, raw_data_files=raw_data_files, raw_data_filetype=raw_data_filetype, disable_link_btn=disable_link_btn)
def create(cls, md_template, study, data_type, investigation_type=None): r"""Creates the metadata template in the database Parameters ---------- md_template : DataFrame The metadata template file contents indexed by samples Ids study : Study The study to which the prep template belongs to. data_type : str or int The data_type of the prep template investigation_type : str, optional The investigation type, if relevant Returns ------- A new instance of `cls` to access to the PrepTemplate stored in the DB Raises ------ QiitaDBColumnError If the investigation_type is not valid If a required column is missing in md_template """ with TRN: # If the investigation_type is supplied, make sure it is one of # the recognized investigation types if investigation_type is not None: cls.validate_investigation_type(investigation_type) # Check if the data_type is the id or the string if isinstance(data_type, (int, long)): data_type_id = data_type data_type_str = convert_from_id(data_type, "data_type") else: data_type_id = convert_to_id(data_type, "data_type") data_type_str = data_type pt_cols = PREP_TEMPLATE_COLUMNS if data_type_str in TARGET_GENE_DATA_TYPES: pt_cols = deepcopy(PREP_TEMPLATE_COLUMNS) pt_cols.update(PREP_TEMPLATE_COLUMNS_TARGET_GENE) md_template = cls._clean_validate_template(md_template, study.id, pt_cols) # Insert the metadata template sql = """INSERT INTO qiita.prep_template (data_type_id, investigation_type) VALUES (%s, %s) RETURNING prep_template_id""" TRN.add(sql, [data_type_id, investigation_type]) prep_id = TRN.execute_fetchlast() try: cls._common_creation_steps(md_template, prep_id) except Exception: # Check if sample IDs present here but not in sample template sql = """SELECT sample_id from qiita.study_sample WHERE study_id = %s""" # Get list of study sample IDs, prep template study IDs, # and their intersection TRN.add(sql, [study.id]) prep_samples = set(md_template.index.values) unknown_samples = prep_samples.difference( TRN.execute_fetchflatten()) if unknown_samples: raise QiitaDBExecutionError( 'Samples found in prep template but not sample ' 'template: %s' % ', '.join(unknown_samples)) # some other error we haven't seen before so raise it raise # Link the prep template with the study sql = """INSERT INTO qiita.study_prep_template (study_id, prep_template_id) VALUES (%s, %s)""" TRN.add(sql, [study.id, prep_id]) TRN.execute() pt = cls(prep_id) pt.generate_files() return pt
def __init__(self, artifact_id, action): error_msgs = [] if action not in self.valid_ebi_actions: error_msg = ("%s is not a valid EBI submission action, valid " "actions are: %s" % (action, ', '.join(self.valid_ebi_actions))) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) self.action = action self.artifact = Artifact(artifact_id) if not self.artifact.can_be_submitted_to_ebi: error_msg = ("Artifact %d cannot be submitted to EBI" % self.artifact.id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.study = self.artifact.study self.sample_template = self.study.sample_template # If we reach this point, there should be only one prep template # attached to the artifact. By design, each artifact has at least one # prep template. Artifacts with more than one prep template cannot be # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should # be set to false, which is checked in the previous if statement self.prep_template = self.artifact.prep_templates[0] if self.artifact.is_submitted_to_ebi and action != 'MODIFY': error_msg = ("Cannot resubmit! Artifact %d has already " "been submitted to EBI." % artifact_id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) status = self.study.ebi_submission_status if status in self.valid_ebi_submission_states: error_msg = ("Cannot perform parallel EBI submission for the same " "study. Current status of the study: %s" % status) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.artifact_id = artifact_id self.study_title = self.study.title self.study_abstract = self.study.info['study_abstract'] it = self.prep_template.investigation_type if it in ena_ontology.terms: self.investigation_type = it self.new_investigation_type = None elif it in ena_ontology.user_defined_terms: self.investigation_type = 'Other' self.new_investigation_type = it else: # This should never happen error_msgs.append("Unrecognized investigation type: '%s'. This " "term is neither one of the official terms nor " "one of the user-defined terms in the ENA " "ontology." % it) _, base_fp = get_mountpoint("preprocessed_data")[0] self.ebi_dir = '%d_ebi_submission' % artifact_id self.full_ebi_dir = join(base_fp, self.ebi_dir) self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt') self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml') self.xml_dir = join(self.full_ebi_dir, 'xml_dir') self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None self.run_xml_fp = None self.submission_xml_fp = None self.publications = self.study.publications # getting the restrictions st_restrictions = [self.sample_template.columns_restrictions['EBI']] pt_restrictions = [self.prep_template.columns_restrictions['EBI']] if self.artifact.data_type in TARGET_GENE_DATA_TYPES: # adding restictions on primer and barcode as these are # conditionally requiered for target gene pt_restrictions.append( PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex']) st_missing = self.sample_template.check_restrictions(st_restrictions) pt_missing = self.prep_template.check_restrictions(pt_restrictions) # testing if there are any missing columns if st_missing: error_msgs.append("Missing column in the sample template: %s" % ', '.join(list(st_missing))) if pt_missing: error_msgs.append("Missing column in the prep template: %s" % ', '.join(list(pt_missing))) # generating all samples from sample template self.samples = {} self.samples_prep = {} self.sample_demux_fps = {} get_output_fp = partial(join, self.full_ebi_dir) nvp = [] nvim = [] for k, v in viewitems(self.sample_template): if k not in self.prep_template: continue sample_prep = self.prep_template[k] # validating required fields if ('platform' not in sample_prep or sample_prep['platform'] is None): nvp.append(k) else: platform = sample_prep['platform'].upper() if platform not in self.valid_platforms: nvp.append(k) else: if ('instrument_model' not in sample_prep or sample_prep['instrument_model'] is None): nvim.append(k) else: im = sample_prep['instrument_model'].upper() if im not in self.valid_platforms[platform]: nvim.append(k) self.samples[k] = v self.samples_prep[k] = sample_prep self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k) if nvp: error_msgs.append("These samples do not have a valid platform " "(instrumet model wasn't checked): %s" % ( ', '.join(nvp))) if nvim: error_msgs.append("These samples do not have a valid instrument " "model: %s" % (', '.join(nvim))) if error_msgs: error_msgs = ("Errors found during EBI submission for study #%d, " "artifact #%d and prep template #%d:\n%s" % (self.study.id, artifact_id, self.prep_template.id, '\n'.join(error_msgs))) LogEntry.create('Runtime', error_msgs) raise EBISubmissionError(error_msgs) self._sample_aliases = {} self._experiment_aliases = {} self._run_aliases = {} self._ebi_sample_accessions = \ self.sample_template.ebi_sample_accessions self._ebi_experiment_accessions = \ self.prep_template.ebi_experiment_accessions
def setUp(self): self.db_dir = get_db_files_base_dir() # Create a SFF dataset: add prep template and a RawData study = Study(1) md_dict = { 'SKB8.640193': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'GTCCGCAAGTTA', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}, 'SKD8.640184': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CGTAGAGCTCTC', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'}, 'SKB7.640196': {'center_name': 'ANL', 'primer': 'GTGCCAGCMGCCGCGGTAA', 'barcode': 'CCTCTGAGAGCT', 'run_prefix': "preprocess_test", 'platform': 'ILLUMINA', 'library_construction_protocol': 'AAAA', 'experiment_design_description': 'BBBB'} } md = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template = PrepTemplate.create(md, study, "16S") tmp_dir = mkdtemp() self.path_builder = partial(join, tmp_dir) fp1 = self.path_builder('preprocess_test1.sff') with open(fp1, 'w') as f: f.write('\n') fp2 = self.path_builder('preprocess_test2.sff') with open(fp2, 'w') as f: f.write('\n') self.raw_sff_id = convert_to_id('raw_sff', 'filepath_type') fps = [(fp1, self.raw_sff_id), (fp2, self.raw_sff_id)] # Magic number 1: is the filetype id self.raw_data = RawData.create(1, [self.sff_prep_template], fps) md = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template_gz = PrepTemplate.create(md, study, "16S") fp1_gz = self.path_builder('preprocess_test1.sff.gz') with gzip.open(fp1_gz, 'w') as f: f.write('\n') fps = [(fp1_gz, self.raw_sff_id)] self.raw_data_gz = RawData.create(1, [self.sff_prep_template_gz], fps) # Create a SFF dataset with multiple run prefix: # add prep template and a RawData md_dict['SKD8.640184']['run_prefix'] = "new" md_rp = pd.DataFrame.from_dict(md_dict, orient='index') self.sff_prep_template_rp = PrepTemplate.create(md_rp, study, "16S") rp_fp1 = self.path_builder('preprocess_test1.sff') with open(rp_fp1, 'w') as f: f.write('\n') rp_fp2 = self.path_builder('preprocess_test2.sff') with open(rp_fp2, 'w') as f: f.write('\n') fps = [(rp_fp1, self.raw_sff_id), (rp_fp2, self.raw_sff_id)] # Magic number 1: is the filetype id self.raw_data_rp = RawData.create(1, [self.sff_prep_template_rp], fps) # Make sure that we clean up all created files self.files_to_remove = [fp1, fp2, rp_fp1, rp_fp2] self.dirs_to_remove = [tmp_dir] for pt in [self.sff_prep_template, self.sff_prep_template_rp]: for _, fp in pt.get_filepaths(): self.files_to_remove.append(fp)
def testConvertToID(self): self.assertEqual(convert_to_id('ENA', 'ontology'), 999999999)
def __init__(self, artifact_id, action): error_msgs = [] if action not in self.valid_ebi_actions: error_msg = ("%s is not a valid EBI submission action, valid " "actions are: %s" % (action, ', '.join(self.valid_ebi_actions))) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) self.action = action self.artifact = Artifact(artifact_id) if not self.artifact.can_be_submitted_to_ebi: error_msg = ("Artifact %d cannot be submitted to EBI" % self.artifact.id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.study = self.artifact.study self.sample_template = self.study.sample_template # If we reach this point, there should be only one prep template # attached to the artifact. By design, each artifact has at least one # prep template. Artifacts with more than one prep template cannot be # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should # be set to false, which is checked in the previous if statement self.prep_template = self.artifact.prep_templates[0] if self.artifact.is_submitted_to_ebi and action != 'MODIFY': error_msg = ("Cannot resubmit! Artifact %d has already " "been submitted to EBI." % artifact_id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) status = self.study.ebi_submission_status if status in self.valid_ebi_submission_states: error_msg = ("Cannot perform parallel EBI submission for the same " "study. Current status of the study: %s" % status) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.artifact_id = artifact_id self.study_title = self.study.title self.study_abstract = self.study.info['study_abstract'] it = self.prep_template.investigation_type if it in ena_ontology.terms: self.investigation_type = it self.new_investigation_type = None elif it in ena_ontology.user_defined_terms: self.investigation_type = 'Other' self.new_investigation_type = it else: # This should never happen error_msgs.append("Unrecognized investigation type: '%s'. This " "term is neither one of the official terms nor " "one of the user-defined terms in the ENA " "ontology." % it) _, base_fp = get_mountpoint("preprocessed_data")[0] self.ebi_dir = '%d_ebi_submission' % artifact_id self.full_ebi_dir = join(base_fp, self.ebi_dir) self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt') self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml') self.xml_dir = join(self.full_ebi_dir, 'xml_dir') self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None self.run_xml_fp = None self.submission_xml_fp = None self.publications = self.study.publications # getting the restrictions st_missing = self.sample_template.check_restrictions( [self.sample_template.columns_restrictions['EBI']]) pt_missing = self.prep_template.check_restrictions( [self.prep_template.columns_restrictions['EBI']]) # testing if there are any missing columns if st_missing: error_msgs.append("Missing column in the sample template: %s" % ', '.join(list(st_missing))) if pt_missing: error_msgs.append("Missing column in the prep template: %s" % ', '.join(list(pt_missing))) # generating all samples from sample template self.samples = {} self.samples_prep = {} self.sample_demux_fps = {} get_output_fp = partial(join, self.full_ebi_dir) nvp = [] nvim = [] for k, v in viewitems(self.sample_template): if k not in self.prep_template: continue sample_prep = self.prep_template[k] # validating required fields if ('platform' not in sample_prep or sample_prep['platform'] is None): nvp.append(k) else: platform = sample_prep['platform'].upper() if platform not in self.valid_platforms: nvp.append(k) else: if ('instrument_model' not in sample_prep or sample_prep['instrument_model'] is None): nvim.append(k) else: im = sample_prep['instrument_model'].upper() if im not in self.valid_platforms[platform]: nvim.append(k) self.samples[k] = v self.samples_prep[k] = sample_prep self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k) if nvp: error_msgs.append("These samples do not have a valid platform " "(instrumet model wasn't checked): %s" % (', '.join(nvp))) if nvim: error_msgs.append("These samples do not have a valid instrument " "model: %s" % (', '.join(nvim))) if error_msgs: error_msgs = ("Errors found during EBI submission for study #%d, " "artifact #%d and prep template #%d:\n%s" % (self.study.id, artifact_id, self.prep_template.id, '\n'.join(error_msgs))) LogEntry.create('Runtime', error_msgs) raise EBISubmissionError(error_msgs) self._sample_aliases = {} self._experiment_aliases = {} self._run_aliases = {} self._ebi_sample_accessions = \ self.sample_template.ebi_sample_accessions self._ebi_experiment_accessions = \ self.prep_template.ebi_experiment_accessions
def test_convert_to_id(self): """Tests that ids are returned correctly""" self.assertEqual(convert_to_id("directory", "filepath_type"), 7)
# The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from random import SystemRandom from string import ascii_letters, digits from os.path import exists, join, basename from tarfile import open as taropen from qiita_db.sql_connection import TRN from qiita_db.artifact import Artifact from qiita_db.util import (insert_filepaths, convert_to_id, get_mountpoint, get_mountpoint_path_by_id) pool = ascii_letters + digits tgz_id = convert_to_id("tgz", "filepath_type") _id, analysis_mp = get_mountpoint('analysis')[0] with TRN: # 2 and 3 are the ids of the 2 new software rows, the BIOM and # target gene type plugins for i in [2, 3]: client_id = ''.join([SystemRandom().choice(pool) for _ in range(50)]) client_secret = ''.join( [SystemRandom().choice(pool) for _ in range(255)]) sql = """INSERT INTO qiita.oauth_identifiers (client_id, client_secret) VALUES (%s, %s)""" TRN.add(sql, [client_id, client_secret]) sql = """INSERT INTO qiita.oauth_software (software_id, client_id) VALUES (%s, %s)"""
def make_environment(load_ontologies, download_reference, add_demo_user): r"""Creates the new environment specified in the configuration Parameters ---------- load_ontologies : bool Whether or not to retrieve and unpack ontology information download_reference : bool Whether or not to download greengenes reference files add_demo_user : bool Whether or not to add a demo user to the database with username [email protected] and password "password" Raises ------ IOError If `download_reference` is true but one of the files cannot be retrieved QiitaEnvironmentError If the environment already exists """ if load_ontologies and qiita_config.test_environment: raise EnvironmentError("Cannot load ontologies in a test environment! " "Pass --no-load-ontologies, or set " "TEST_ENVIRONMENT = FALSE in your " "configuration") # Connect to the postgres server admin_conn = SQLConnectionHandler(admin='admin_without_database') # Check that it does not already exists if _check_db_exists(qiita_config.database, admin_conn): raise QiitaEnvironmentError( "Database {0} already present on the system. You can drop it " "by running 'qiita_env drop'".format(qiita_config.database)) # Create the database print('Creating database') admin_conn.set_autocommit('on') admin_conn.execute('CREATE DATABASE %s' % qiita_config.database) admin_conn.set_autocommit('off') del admin_conn # Connect to the postgres server, but this time to the just created db conn = SQLConnectionHandler() print('Inserting database metadata') # Build the SQL layout into the database with open(SETTINGS_FP, 'U') as f: conn.execute(f.read()) # Insert the settings values to the database conn.execute("INSERT INTO settings (test, base_data_dir, base_work_dir) " "VALUES (%s, %s, %s)", (qiita_config.test_environment, qiita_config.base_data_dir, qiita_config.working_dir)) create_layout_and_patch(conn, verbose=True) if load_ontologies: _add_ontology_data(conn) # these values can only be added if the environment is being loaded # with the ontologies, thus this cannot exist inside intialize.sql # because otherwise loading the ontologies would be a requirement ontology = Ontology(convert_to_id('ENA', 'ontology')) ontology.add_user_defined_term('Amplicon Sequencing') if download_reference: _download_reference_files(conn) # we don't do this if it's a test environment because populate.sql # already adds this user... if add_demo_user and not qiita_config.test_environment: conn.execute(""" INSERT INTO qiita.qiita_user (email, user_level_id, password, name, affiliation, address, phone) VALUES ('*****@*****.**', 4, '$2a$12$gnUi8Qg.0tvW243v889BhOBhWLIHyIJjjgaG6dxuRJkUM8nXG9Efe', 'Demo', 'Qitta Dev', '1345 Colorado Avenue', '303-492-1984')""") conn.execute(""" INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES ('*****@*****.**', '[email protected]', 'dflt', 't', 1)""") print('Demo user successfully created') if qiita_config.test_environment: _populate_test_db(conn) print('Test environment successfully created') else: print('Production environment successfully created')