def test_init_bad_parameters(self): '''test init with bad parameters''' with self.assertRaises(Exception): object_creator.ObjectCreator('ini_file', 'not_a_project', 'obj.xml', 'obj_alias', 'sub_alias', 'center 42', 'title') with self.assertRaises(Exception): # missing project_description object_creator.ObjectCreator('ini_file', 'project', 'obj.xml', 'obj_alias', 'sub_alias', 'center 42', 'title') with self.assertRaises(Exception): # missing taxon_id object_creator.ObjectCreator('ini_file', 'sample', 'obj.xml', 'obj_alias', 'sub_alias', 'center 42', 'title') with self.assertRaises(Exception): # missing study_accession, sample_accession, library_name, platform, instrument object_creator.ObjectCreator('ini_file', 'experiment', 'obj.xml', 'obj_alias', 'sub_alias', 'center 42', 'title') with self.assertRaises(Exception): # missing experiment_accession, reads_1, md5_1, reads_2, md5_2 object_creator.ObjectCreator('ini_file', 'run', 'obj.xml', 'obj_alias', 'sub_alias', 'center 42', 'title')
def _submit_study_object(self, data_in): if not os.path.exists(self.project_xml_dir): os.mkdir(self.project_xml_dir) study_accessions_from_db = {x["ena_study_accession"] for x in data_in} if len(study_accessions_from_db) > 1: raise Error( "Error! More than one study ID found for dataset " + self.dataset_name + ". Got: " + str(study_accessions_from_db) ) if not os.path.exists(self.project_xml): assert study_accessions_from_db == {None} project_alias = "project." + self.dataset_name submit_alias = "submit." + project_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name ) title = self.study_prefix + ". " + center_name + ". " + self.dataset_name project_description = title project_creator = object_creator.ObjectCreator( self.ini_file, "project", self.project_xml, project_alias, submit_alias, center_name, title, project_description, use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name, ) project_creator.run() if not project_creator.submission_receipt.successful: raise Error( "Error submitting project to ena. XML file: " + self.project_xml ) ena_study_accession = project_creator.submission_receipt.accessions.get( "PROJECT", None ) if ena_study_accession is None: raise Error( "Error getting proejct accession from " + project_creator.receipt_xml ) for row in data_in: row["ena_study_accession"] = ena_study_accession self.db.update_row( "Sample", {"sample_id": row["sample_id"]}, {"ena_study_accession": ena_study_accession}, ) self.db.commit() else: assert len(study_accessions_from_db) == 1
def _submit_sample_objects(self, data_in): submitted_samples = {} # sample id -> ena accession for row in data_in: if row["ena_sample_accession"] is not None: continue elif row["sample_id"] in submitted_samples: row["ena_sample_accession"] = submitted_samples[row["sample_id"]] else: assert row["ena_sample_accession"] is None iso_dir = isolate_dir.IsolateDir( self.pipeline_root, row["sample_id"], row["isolate_id"] ) object_xml = iso_dir.xml_submission_file("sample") object_alias = "sample." + str(row["sample_id"]) submit_alias = "submit." + object_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name ) title = ( row["subject_id"] + ". " + center_name + ". " + row["sample_id_from_lab"] ) obj_creator = object_creator.ObjectCreator( self.ini_file, "sample", object_xml, object_alias, submit_alias, center_name, title, taxon_id=self.taxon_id, use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name, ) obj_creator.run() if obj_creator.submission_receipt.successful: try: sample_accession = obj_creator.submission_receipt.accessions[ "SAMPLE" ] except: sample_accession = "FAIL" else: sample_accession = "FAIL" row["ena_sample_accession"] = sample_accession self.db.update_row( "Sample", {"sample_id": row["sample_id"]}, {"ena_sample_accession": sample_accession}, ) self.db.commit() submitted_samples[row["sample_id"]] = sample_accession
def _submit_experiment_objects(self, data_in): submitted_isolates = {} # isolate id -> ena accession for row in data_in: if row['ena_experiment_accession'] is not None or row[ 'ena_sample_accession'] == 'FAIL': continue elif row['isolate_id'] in submitted_isolates: row['ena_experiment_accession'] = submitted_isolates[ row['isolate_id']] else: assert row['ena_experiment_accession'] is None iso_dir = isolate_dir.IsolateDir(self.pipeline_root, row['sample_id'], row['isolate_id']) object_xml = iso_dir.xml_submission_file('experiment') object_alias = 'experiment.' + str(row['isolate_id']) submit_alias = 'submit.' + object_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name) title = row['subject_id'] + '. ' + center_name + '. ' + row[ 'sample_id_from_lab'] + '. ' + row[ 'isolate_number_from_lab'] library_name = title obj_creator = object_creator.ObjectCreator( self.ini_file, 'experiment', object_xml, object_alias, submit_alias, center_name, title, study_accession=row['ena_study_accession'], sample_accession=row['ena_sample_accession'], library_name=library_name, platform='ILLUMINA', instrument=row['instrument_model'], use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name, ) obj_creator.run() if obj_creator.submission_receipt.successful: try: experiment_accession = obj_creator.submission_receipt.accessions[ 'EXPERIMENT'] except: experiment_accession = 'FAIL' else: experiment_accession = 'FAIL' row['ena_experiment_accession'] = experiment_accession self.db.update_row( 'Isolate', {'isolate_id': row['isolate_id']}, {'ena_experiment_accession': experiment_accession}) self.db.commit() submitted_isolates[row['isolate_id']] = experiment_accession
def test_run_run(self): '''test run making run''' obj_xml = 'tmp.object_creator.run.obj.xml' ini_file = os.path.join(data_dir, 'conf.ini') obj = object_creator.ObjectCreator(ini_file, 'run', obj_xml, 'object alias', 'sub alias', 'center 42', 'title', experiment_accession='ERX123', reads_1='reads1.fq', md5_1='md51', reads_2='reads2.fq', md5_2='md52', unit_test='success') obj.run() self.assertTrue(obj.submission_receipt.successful) os.unlink(obj_xml) self.assertTrue(os.path.exists(obj.submission_xml)) os.unlink(obj.submission_xml) self.assertTrue(os.path.exists(obj.receipt_xml)) os.unlink(obj.receipt_xml)
def test_run_experiment(self): '''test run making experiment''' obj_xml = 'tmp.object_creator.experiment.obj.xml' ini_file = os.path.join(data_dir, 'conf.ini') obj = object_creator.ObjectCreator(ini_file, 'experiment', obj_xml, 'objct alias', 'sub alias', 'center 42', 'title', study_accession='ERP123', sample_accession='ERS42', library_name='lib name', platform='ILLUMINA', instrument='HISEQ', unit_test='success') obj.run() self.assertTrue(obj.submission_receipt.successful) os.unlink(obj_xml) self.assertTrue(os.path.exists(obj.submission_xml)) os.unlink(obj.submission_xml) self.assertTrue(os.path.exists(obj.receipt_xml)) os.unlink(obj.receipt_xml)
def test_run_sample(self): '''test run making sample''' obj_xml = 'tmp.object_creator.sample.obj.xml' ini_file = os.path.join(data_dir, 'conf.ini') obj = object_creator.ObjectCreator(ini_file, 'sample', obj_xml, 'objct alias', 'sub alias', 'center 42', 'title', taxon_id=42, unit_test='success') obj.run() self.assertTrue(obj.submission_receipt.successful) os.unlink(obj_xml) self.assertTrue(os.path.exists(obj.submission_xml)) os.unlink(obj.submission_xml) self.assertTrue(os.path.exists(obj.receipt_xml)) os.unlink(obj.receipt_xml)
def test_run_project(self): '''test run making project''' obj_xml = 'tmp.object_creator.project.obj.xml' ini_file = os.path.join(data_dir, 'conf.ini') obj = object_creator.ObjectCreator(ini_file, 'project', obj_xml, 'objct alias', 'sub alias', 'center 42', 'title', project_description='project description', unit_test='success') obj.run() self.assertTrue(obj.submission_receipt.successful) os.unlink(obj_xml) self.assertTrue(os.path.exists(obj.submission_xml)) os.unlink(obj.submission_xml) self.assertTrue(os.path.exists(obj.receipt_xml)) os.unlink(obj.receipt_xml)
def _submit_study_object(self, data_in): if not os.path.exists(self.project_xml_dir): os.mkdir(self.project_xml_dir) study_accessions_from_db = {x['ena_study_accession'] for x in data_in} if len(study_accessions_from_db) > 1: raise Error('Error! More than one study ID found for dataset ' + self.dataset_name + '. Got: ' + str(study_accessions_from_db)) if not os.path.exists(self.project_xml): assert study_accessions_from_db == {None} project_alias = 'project.' + self.dataset_name submit_alias = 'submit.' + project_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name) title = self.study_prefix + '. ' + center_name + '. ' + self.dataset_name project_description = title project_creator = object_creator.ObjectCreator( self.ini_file, 'project', self.project_xml, project_alias, submit_alias, center_name, title, project_description, use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name) project_creator.run() if not project_creator.submission_receipt.successful: raise Error('Error submitting project to ena. XML file: ' + self.project_xml) ena_study_accession = project_creator.submission_receipt.accessions.get( 'PROJECT', None) if ena_study_accession is None: raise Error('Error getting proejct accession from ' + project_creator.receipt_xml) for row in data_in: row['ena_study_accession'] = ena_study_accession self.db.update_row( 'Sample', {'sample_id': row['sample_id']}, {'ena_study_accession': ena_study_accession}) self.db.commit() else: assert len(study_accessions_from_db) == 1
def _submit_sample_objects(self, data_in): submitted_samples = {} # sample id -> ena accession for row in data_in: if row['ena_sample_accession'] is not None: continue elif row['sample_id'] in submitted_samples: row['ena_sample_accession'] = submitted_samples[ row['sample_id']] else: assert row['ena_sample_accession'] is None iso_dir = isolate_dir.IsolateDir(self.pipeline_root, row['sample_id'], row['isolate_id']) object_xml = iso_dir.xml_submission_file('sample') object_alias = 'sample.' + str(row['sample_id']) submit_alias = 'submit.' + object_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name) title = row['subject_id'] + '. ' + center_name + '. ' + row[ 'sample_id_from_lab'] obj_creator = object_creator.ObjectCreator( self.ini_file, 'sample', object_xml, object_alias, submit_alias, center_name, title, taxon_id=self.taxon_id, use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name) obj_creator.run() if obj_creator.submission_receipt.successful: try: sample_accession = obj_creator.submission_receipt.accessions[ 'SAMPLE'] except: sample_accession = 'FAIL' else: sample_accession = 'FAIL' row['ena_sample_accession'] = sample_accession self.db.update_row('Sample', {'sample_id': row['sample_id']}, {'ena_sample_accession': sample_accession}) self.db.commit() submitted_samples[row['sample_id']] = sample_accession
def _submit_runs(self, data_in): # Note: reads have to be in the dropbox before submitting the Run object. # Upload all the reads first, in parallel, then submit the runs. fq_pairs_to_upload = [ ] # (seqrep, full path on disk1, dropbox name1, full path on disk2, dropbox name2) for row in data_in: iso_dir = isolate_dir.IsolateDir(self.pipeline_root, row['sample_id'], row['isolate_id']) fq_pairs_to_upload.append(( row['seqrep_id'], iso_dir.reads_filename('remove_contam', row['sequence_replicate_number'], 1), str(row['seqrep_id']) + '.1.' + row['remove_contam_reads_file_1_md5'] + '.fq.gz', iso_dir.reads_filename('remove_contam', row['sequence_replicate_number'], 2), str(row['seqrep_id']) + '.2.' + row['remove_contam_reads_file_2_md5'] + '.fq.gz', )) self.pool = multiprocessing.Pool(self.fq_upload_threads) upload_return_values = self.pool.starmap( _upload_fastq_file_pair, zip(fq_pairs_to_upload, itertools.repeat(self.ini_file), itertools.repeat(self.unit_test))) upload_success = {x[0]: x[1] for x in upload_return_values} fq_pairs_to_upload = {x[0]: x for x in fq_pairs_to_upload} # Fastqs are uploaded, now submit the xmls and update the database for row in data_in: assert row['seqrep_id'] in fq_pairs_to_upload assert row['seqrep_id'] in upload_success assert row['ena_run_accession'] is None assert row['ena_experiment_accession'] is not None iso_dir = isolate_dir.IsolateDir(self.pipeline_root, row['sample_id'], row['isolate_id']) object_xml = iso_dir.xml_submission_file( 'run', sequence_replicate=row['sequence_replicate_number']) object_alias = 'run.' + str(row['isolate_id']) submit_alias = 'submit.' + object_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name) title = None # not needed for a run obj_creator = object_creator.ObjectCreator( self.ini_file, 'run', object_xml, object_alias, submit_alias, center_name, title, experiment_accession=row['ena_experiment_accession'], reads_1=fq_pairs_to_upload[row['seqrep_id']][2], md5_1=row['remove_contam_reads_file_1_md5'], reads_2=fq_pairs_to_upload[row['seqrep_id']][4], md5_2=row['remove_contam_reads_file_2_md5'], use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name, ) obj_creator.run() if obj_creator.submission_receipt.successful: try: run_accession = obj_creator.submission_receipt.accessions[ 'RUN'] except: run_accession = 'FAIL' else: run_accession = 'FAIL' row['ena_run_accession'] = run_accession self.db.update_row('Seqrep', {'seqrep_id': row['seqrep_id']}, {'ena_run_accession': run_accession}) self.db.commit()
def _submit_experiment_objects(self, data_in): submitted_isolates = {} # isolate id -> ena accession for row in data_in: if ( row["ena_experiment_accession"] is not None or row["ena_sample_accession"] == "FAIL" ): continue elif row["isolate_id"] in submitted_isolates: row["ena_experiment_accession"] = submitted_isolates[row["isolate_id"]] else: assert row["ena_experiment_accession"] is None iso_dir = isolate_dir.IsolateDir( self.pipeline_root, row["sample_id"], row["isolate_id"] ) object_xml = iso_dir.xml_submission_file("experiment") object_alias = "experiment." + str(row["isolate_id"]) submit_alias = "submit." + object_alias center_name = DatasetSubmitter._ena_center_name_from_db_data( data_in, number_to_name_dict=self.centre_number_to_name ) title = ( row["subject_id"] + ". " + center_name + ". " + row["sample_id_from_lab"] + ". " + row["isolate_number_from_lab"] ) library_name = title obj_creator = object_creator.ObjectCreator( self.ini_file, "experiment", object_xml, object_alias, submit_alias, center_name, title, study_accession=row["ena_study_accession"], sample_accession=row["ena_sample_accession"], library_name=library_name, platform="ILLUMINA", instrument=row["instrument_model"], use_test_server=self.use_test_server, unit_test=self.unit_test, broker_name=self.broker_name, ) obj_creator.run() if obj_creator.submission_receipt.successful: try: experiment_accession = obj_creator.submission_receipt.accessions[ "EXPERIMENT" ] except: experiment_accession = "FAIL" else: experiment_accession = "FAIL" row["ena_experiment_accession"] = experiment_accession self.db.update_row( "Isolate", {"isolate_id": row["isolate_id"]}, {"ena_experiment_accession": experiment_accession}, ) self.db.commit() submitted_isolates[row["isolate_id"]] = experiment_accession