def test_get_filepaths(self): """Correctly returns the filepaths to the preprocessed files""" ppd = PreprocessedData(1) obs = ppd.get_filepaths() exp = [(join(self.db_test_ppd_dir, '1_seqs.fna'), 4), (join(self.db_test_ppd_dir, '1_seqs.qual'), 5)] self.assertEqual(obs, exp)
def post(self, preprocessed_data_id): # make sure user is admin and can therefore actually submit to VAMPS if self.current_user.level != 'admin': raise HTTPError(403, "User %s cannot submit to VAMPS!" % self.current_user.id) msg = '' msg_level = 'success' preprocessed_data = PreprocessedData(preprocessed_data_id) state = preprocessed_data.submitted_to_vamps_status() demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if state in ('submitting', 'success'): msg = "Cannot resubmit! Current state is: %s" % state msg_level = 'danger' elif demux_length != 1: msg = "The study doesn't have demux files or have too many" % state msg_level = 'danger' else: channel = self.current_user.id job_id = submit(channel, submit_to_VAMPS, int(preprocessed_data_id)) self.render('compute_wait.html', job_id=job_id, title='VAMPS Submission', completion_redirect='/compute_complete/%s' % job_id) return self.display_template(preprocessed_data_id, msg, msg_level)
def submit_VAMPS(preprocessed_data_id): """Submit preprocessed data to VAMPS Parameters ---------- preprocessed_data_id : int The preprocesssed data id """ preprocessed_data = PreprocessedData(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) status = preprocessed_data.submitted_to_vamps_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) preprocessed_data.update_vamps_status('submitting') # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) targz_fp = join(targz_folder, '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id)) targz = taropen(targz_fp, mode='w:gz') # adding sample/prep samp_fp = join(targz_folder, 'sample_metadata.txt') sample_template.to_file(samp_fp) targz.add(samp_fp, arcname='sample_metadata.txt') prep_fp = join(targz_folder, 'prep_metadata.txt') prep_template.to_file(prep_fp) targz.add(prep_fp, arcname='prep_metadata.txt') # adding preprocessed data for _, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == 'preprocessed_fasta': targz.add(fp, arcname='preprocessed_fasta.fna') targz.close() # submitting cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F " "press=UploadFile %s" % (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp, qiita_config.vamps_url)) obs, _, _ = system_call(cmd) exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n" "<body>\n</body>\n</html>") if obs != exp: preprocessed_data.update_vamps_status('failure') return False else: preprocessed_data.update_vamps_status('success') return True
def submit_VAMPS(preprocessed_data_id): """Submit preprocessed data to VAMPS Parameters ---------- preprocessed_data_id : int The preprocesssed data id """ preprocessed_data = PreprocessedData(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) status = preprocessed_data.submitted_to_vamps_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) preprocessed_data.update_vamps_status('submitting') # Generating a tgz targz_folder = mkdtemp(prefix=qiita_config.working_dir) targz_fp = join( targz_folder, '%d_%d_%d.tgz' % (study.id, prep_template.id, preprocessed_data.id)) targz = taropen(targz_fp, mode='w:gz') # adding sample/prep samp_fp = join(targz_folder, 'sample_metadata.txt') sample_template.to_file(samp_fp) targz.add(samp_fp, arcname='sample_metadata.txt') prep_fp = join(targz_folder, 'prep_metadata.txt') prep_template.to_file(prep_fp) targz.add(prep_fp, arcname='prep_metadata.txt') # adding preprocessed data for _, fp, fp_type in preprocessed_data.get_filepaths(): if fp_type == 'preprocessed_fasta': targz.add(fp, arcname='preprocessed_fasta.fna') targz.close() # submitting cmd = ("curl -F user=%s -F pass='******' -F uploadFile=@%s -F " "press=UploadFile %s" % (qiita_config.vamps_user, qiita_config.vamps_pass, targz_fp, qiita_config.vamps_url)) obs, _, _ = system_call(cmd) exp = ("<html>\n<head>\n<title>Process Uploaded File</title>\n</head>\n" "<body>\n</body>\n</html>") if obs != exp: preprocessed_data.update_vamps_status('failure') return False else: preprocessed_data.update_vamps_status('success') return True
def test_get_filepaths(self): """Correctly returns the filepaths to the preprocessed files""" ppd = PreprocessedData(1) obs = ppd.get_filepaths() exp = [(join(self.db_test_ppd_dir, '1_seqs.fna'), "preprocessed_sequences"), (join(self.db_test_ppd_dir, '1_seqs.qual'), "preprocessed_sequences_qual")] self.assertEqual(obs, exp)
def test_update_preprocessed_data_from_cmd(self): exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0]) exp_fps = exp_ppd.get_filepaths() # The original paths mush exist, but they're not included in the test # so create them here for _, fp, _ in exp_fps: with open(fp, 'w') as f: f.write("") next_fp_id = get_count('qiita.filepath') + 1 exp_fps.append( (next_fp_id, join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id), 'log')) ppd = update_preprocessed_data_from_cmd(self.test_slo, 1) # Check that the modified preprocessed data is the correct one self.assertEqual(ppd.id, exp_ppd.id) # Check that the filepaths returned are correct # We need to sort the list returned from the db because the ordering # on that list is based on db modification time, rather than id obs_fps = sorted(ppd.get_filepaths()) self.assertEqual(obs_fps, sorted(exp_fps)) # Check that the checksums have been updated sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s" # Checksum of the fasta file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[0][0],))[0] self.assertEqual(obs_checksum, '3532748626') # Checksum of the fastq file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[1][0],))[0] self.assertEqual(obs_checksum, '2958832064') # Checksum of the demux file # The checksum is generated dynamically, so the checksum changes # We are going to test that the checksum is not the one that was # before, which corresponds to an empty file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[2][0],))[0] self.assertTrue(isinstance(obs_checksum, str)) self.assertNotEqual(obs_checksum, '852952723') self.assertTrue(len(obs_checksum) > 0) # Checksum of the log file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[3][0],))[0] self.assertEqual(obs_checksum, '626839734')
def test_get_filepaths(self): """Correctly returns the filepaths to the preprocessed files""" ppd = PreprocessedData(1) obs = ppd.get_filepaths() exp = [(5, join(self.db_test_ppd_dir, '1_seqs.fna'), "preprocessed_fasta"), (6, join(self.db_test_ppd_dir, '1_seqs.qual'), "preprocessed_fastq"), (7, join(self.db_test_ppd_dir, '1_seqs.demux'), "preprocessed_demux")] self.assertEqual(obs, exp)
def test_update_preprocessed_data_from_cmd(self): exp_ppd = PreprocessedData(Study(1).preprocessed_data()[0]) exp_fps = exp_ppd.get_filepaths() # The original paths mush exist, but they're not included in the test # so create them here for _, fp, _ in exp_fps: with open(fp, 'w') as f: f.write("") next_fp_id = get_count('qiita.filepath') + 1 exp_fps.append((next_fp_id, join(self.db_ppd_dir, "%s_split_library_log.txt" % exp_ppd.id), 'log')) ppd = update_preprocessed_data_from_cmd(self.test_slo, 1) # Check that the modified preprocessed data is the correct one self.assertEqual(ppd.id, exp_ppd.id) # Check that the filepaths returned are correct # We need to sort the list returned from the db because the ordering # on that list is based on db modification time, rather than id obs_fps = sorted(ppd.get_filepaths()) self.assertEqual(obs_fps, exp_fps) # Check that the checksums have been updated sql = "SELECT checksum FROM qiita.filepath WHERE filepath_id=%s" # Checksum of the fasta file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[0][0], ))[0] self.assertEqual(obs_checksum, '3532748626') # Checksum of the fastq file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[1][0], ))[0] self.assertEqual(obs_checksum, '2958832064') # Checksum of the demux file # The checksum is generated dynamically, so the checksum changes # We are going to test that the checksum is not the one that was # before, which corresponds to an empty file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[2][0], ))[0] self.assertTrue(isinstance(obs_checksum, str)) self.assertNotEqual(obs_checksum, '852952723') self.assertTrue(len(obs_checksum) > 0) # Checksum of the log file obs_checksum = self.conn_handler.execute_fetchone( sql, (obs_fps[3][0], ))[0] self.assertEqual(obs_checksum, '626839734')
def _get_template_variables(self, preprocessed_data_id, callback): """Generates all the variables needed to render the template Parameters ---------- preprocessed_data_id : int The preprocessed data identifier callback : function The callback function to call with the results once the processing is done Raises ------ HTTPError If the preprocessed data does not have a log file """ # Get the objects and check user privileges ppd = PreprocessedData(preprocessed_data_id) study = Study(ppd.study) check_access(self.current_user, study, raise_error=True) # Get the return address back_button_path = self.get_argument( 'back_button_path', '/study/description/%d?top_tab=preprocessed_data_tab&sub_tab=%s' % (study.id, preprocessed_data_id)) # Get all the filepaths attached to the preprocessed data files_tuples = ppd.get_filepaths() # Group the files by filepath type files = defaultdict(list) for _, fp, fpt in files_tuples: files[fpt].append(fp) try: log_path = files['log'][0] except KeyError: raise HTTPError(500, "Log file not found in preprocessed data %s" % preprocessed_data_id) with open(log_path, 'U') as f: contents = f.read() contents = contents.replace('\n', '<br/>') contents = contents.replace('\t', ' ') title = 'Preprocessed Data: %d' % preprocessed_data_id callback((title, contents, back_button_path))
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != "admin": raise HTTPError(403, "No permissions of admin, " "get/VAMPSSubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [ ("Number of samples", len(prep_template)), ("Number of metadata headers", len(sample_template.categories())), ] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"] demux_length = len(demux) if not demux_length: msg = "Study does not appear to have demultiplexed " "sequences associated" msg_level = "danger" elif demux_length > 1: msg = "Study appears to have multiple demultiplexed files!" msg_level = "danger" elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(("Number of sequences", demux_file_stats.n)) msg_level = "success" self.render( "vamps_submission.html", study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, )
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = User(self.current_user) if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.metadata_headers()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('ebi_submission.html', user=self.current_user, study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.metadata_headers()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' self.render('ebi_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type)
def get(self, preprocessed_data_id): ppd_id = int(preprocessed_data_id) ppd = PreprocessedData(ppd_id) study = Study(ppd.study) check_access(User(self.current_user), study, raise_error=True) back_button_path = self.get_argument( 'back_button_path', '/study/description/%d' % study.id) files_tuples = ppd.get_filepaths() files = defaultdict(list) for fpid, fp, fpt in files_tuples: files[fpt].append(fp) with open(files['log'][0], 'U') as f: contents = f.read() contents = contents.replace('\n', '<br/>') contents = contents.replace('\t', ' ') title = ('Preprocessed Data: %d' % ppd_id) self.render('text_file.html', title=title, contents=contents, user=self.current_user, back_button_path=back_button_path)
def display_template(self, preprocessed_data_id, msg, msg_level): """Simple function to avoid duplication of code""" preprocessed_data_id = int(preprocessed_data_id) try: preprocessed_data = PreprocessedData(preprocessed_data_id) except QiitaDBUnknownIDError: raise HTTPError(404, "PreprocessedData %d does not exist!" % preprocessed_data_id) else: user = self.current_user if user.level != 'admin': raise HTTPError(403, "No permissions of admin, " "get/EBISubmitHandler: %s!" % user.id) prep_template = PrepTemplate(preprocessed_data.prep_template) sample_template = SampleTemplate(preprocessed_data.study) study = Study(preprocessed_data.study) stats = [('Number of samples', len(prep_template)), ('Number of metadata headers', len(sample_template.categories()))] demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux'] demux_length = len(demux) if not demux_length: msg = ("Study does not appear to have demultiplexed " "sequences associated") msg_level = 'danger' elif demux_length > 1: msg = ("Study appears to have multiple demultiplexed files!") msg_level = 'danger' elif demux_length == 1: demux_file = demux[0] demux_file_stats = demux_stats(demux_file) stats.append(('Number of sequences', demux_file_stats.n)) msg_level = 'success' # Check if the templates have all the required columns for EBI pt_missing_cols = prep_template.check_restrictions( [PREP_TEMPLATE_COLUMNS['EBI']]) st_missing_cols = sample_template.check_restrictions( [SAMPLE_TEMPLATE_COLUMNS['EBI']]) allow_submission = (len(pt_missing_cols) == 0 and len(st_missing_cols) == 0) if not allow_submission: msg_list = ["Submission to EBI disabled due to missing columns:"] if len(pt_missing_cols) > 0: msg_list.append("Columns missing in prep template: %s" % ', '.join(pt_missing_cols)) if len(st_missing_cols) > 0: msg_list.append("Columns missing in sample template: %s" % ', '.join(st_missing_cols)) ebi_disabled_msg = "<br/>".join(msg_list) else: ebi_disabled_msg = None self.render('ebi_submission.html', study_title=study.title, stats=stats, message=msg, study_id=study.id, level=msg_level, preprocessed_data_id=preprocessed_data_id, investigation_type=prep_template.investigation_type, allow_submission=allow_submission, ebi_disabled_msg=ebi_disabled_msg)
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath Notes ----- If fastq_dir_fp is passed, it must not contain any empty files, or gzipped empty files """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ("submitting", "success"): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status("submitting") # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id("ENA", "ontology")) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = "Other" new_investigation_type = current_type else: # This should never happen raise ValueError( "Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology" ) if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == "preprocessed_demux"][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) wrote_sequences = False with gzopen(sample_fp, "w") as fh: for record in iterator: fh.write(record) wrote_sequences = True if not wrote_sequences: remove(sample_fp) output_dir = fastq_dir_fp + "_submission" samp_fp = join(fastq_dir_fp, "sample_metadata.txt") prep_fp = join(fastq_dir_fp, "prep_metadata.txt") sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp("study.xml") sample_fp = get_output_fp("sample.xml") experiment_fp = get_output_fp("experiment.xml") run_fp = get_output_fp("run.xml") submission_fp = get_output_fp("submission.xml") if not isdir(output_dir): makedirs(output_dir) else: raise IOError("The output folder already exists: %s" % output_dir) with open(samp_fp, "U") as st, open(prep_fp, "U") as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info["study_abstract"], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids, ) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status("failed") raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status("success", study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession
def submit_EBI(preprocessed_data_id, action, send, fastq_dir_fp=None): """Submit a preprocessed data to EBI Parameters ---------- preprocessed_data_id : int The preprocesssed data id action : %s The action to perform with this data send : bool True to actually send the files fastq_dir_fp : str, optional The fastq filepath """ preprocessed_data = PreprocessedData(preprocessed_data_id) preprocessed_data_id_str = str(preprocessed_data_id) study = Study(preprocessed_data.study) sample_template = SampleTemplate(study.sample_template) prep_template = PrepTemplate(preprocessed_data.prep_template) investigation_type = None new_investigation_type = None status = preprocessed_data.submitted_to_insdc_status() if status in ('submitting', 'success'): raise ValueError("Cannot resubmit! Current status is: %s" % status) if send: # If we intend actually to send the files, then change the status in # the database preprocessed_data.update_insdc_status('submitting') # we need to figure out whether the investigation type is a known one # or if we have to submit a "new_investigation_type" to EBI current_type = prep_template.investigation_type ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) if current_type in ena_ontology.terms: investigation_type = current_type elif current_type in ena_ontology.user_defined_terms: investigation_type = 'Other' new_investigation_type = current_type else: # This should never happen raise ValueError("Unrecognized investigation type: '%s'. This term " "is neither one of the official terms nor one of the " "user-defined terms in the ENA ontology") if fastq_dir_fp is not None: # If the user specifies a FASTQ directory, use it # Set demux_samples to None so that MetadataTemplate.to_file will put # all samples in the template files demux_samples = None else: # If the user does not specify a FASTQ directory, create one and # re-serialize the per-sample FASTQs from the demux file fastq_dir_fp = mkdtemp(prefix=qiita_config.working_dir) demux = [ path for _, path, ftype in preprocessed_data.get_filepaths() if ftype == 'preprocessed_demux' ][0] # Keep track of which files were actually in the demux file so that we # can write those rows to the prep and samples templates demux_samples = set() with open_file(demux) as demux_fh: for samp, iterator in to_per_sample_ascii(demux_fh, list(sample_template)): demux_samples.add(samp) sample_fp = join(fastq_dir_fp, "%s.fastq.gz" % samp) with gzopen(sample_fp, 'w') as fh: for record in iterator: fh.write(record) output_dir = fastq_dir_fp + '_submission' samp_fp = join(fastq_dir_fp, 'sample_metadata.txt') prep_fp = join(fastq_dir_fp, 'prep_metadata.txt') sample_template.to_file(samp_fp, demux_samples) prep_template.to_file(prep_fp, demux_samples) # Get specific output directory and set filepaths get_output_fp = partial(join, output_dir) study_fp = get_output_fp('study.xml') sample_fp = get_output_fp('sample.xml') experiment_fp = get_output_fp('experiment.xml') run_fp = get_output_fp('run.xml') submission_fp = get_output_fp('submission.xml') if not isdir(output_dir): makedirs(output_dir) else: raise IOError('The output folder already exists: %s' % output_dir) with open(samp_fp, 'U') as st, open(prep_fp, 'U') as pt: submission = EBISubmission.from_templates_and_per_sample_fastqs( preprocessed_data_id_str, study.title, study.info['study_abstract'], investigation_type, st, pt, fastq_dir_fp, new_investigation_type=new_investigation_type, pmids=study.pmids) submission.write_all_xml_files(study_fp, sample_fp, experiment_fp, run_fp, submission_fp, action) if send: submission.send_sequences() study_accession, submission_accession = submission.send_xml() if study_accession is None or submission_accession is None: preprocessed_data.update_insdc_status('failed') raise ComputeError("EBI Submission failed!") else: preprocessed_data.update_insdc_status('success', study_accession, submission_accession) else: study_accession, submission_accession = None, None return study_accession, submission_accession