예제 #1
0
    def test_init_bad_parameters(self):
        '''test init with bad parameters'''
        with self.assertRaises(Exception):
            object_creator.ObjectCreator('ini_file', 'not_a_project',
                                         'obj.xml', 'obj_alias', 'sub_alias',
                                         'center 42', 'title')

        with self.assertRaises(Exception):
            # missing project_description
            object_creator.ObjectCreator('ini_file', 'project', 'obj.xml',
                                         'obj_alias', 'sub_alias', 'center 42',
                                         'title')

        with self.assertRaises(Exception):
            # missing taxon_id
            object_creator.ObjectCreator('ini_file', 'sample', 'obj.xml',
                                         'obj_alias', 'sub_alias', 'center 42',
                                         'title')

        with self.assertRaises(Exception):
            # missing study_accession, sample_accession, library_name, platform, instrument
            object_creator.ObjectCreator('ini_file', 'experiment', 'obj.xml',
                                         'obj_alias', 'sub_alias', 'center 42',
                                         'title')

        with self.assertRaises(Exception):
            # missing experiment_accession, reads_1, md5_1, reads_2, md5_2
            object_creator.ObjectCreator('ini_file', 'run', 'obj.xml',
                                         'obj_alias', 'sub_alias', 'center 42',
                                         'title')
예제 #2
0
    def _submit_study_object(self, data_in):
        if not os.path.exists(self.project_xml_dir):
            os.mkdir(self.project_xml_dir)

        study_accessions_from_db = {x["ena_study_accession"] for x in data_in}
        if len(study_accessions_from_db) > 1:
            raise Error(
                "Error! More than one study ID found for dataset "
                + self.dataset_name
                + ". Got: "
                + str(study_accessions_from_db)
            )

        if not os.path.exists(self.project_xml):
            assert study_accessions_from_db == {None}
            project_alias = "project." + self.dataset_name
            submit_alias = "submit." + project_alias
            center_name = DatasetSubmitter._ena_center_name_from_db_data(
                data_in, number_to_name_dict=self.centre_number_to_name
            )
            title = self.study_prefix + ". " + center_name + ". " + self.dataset_name
            project_description = title
            project_creator = object_creator.ObjectCreator(
                self.ini_file,
                "project",
                self.project_xml,
                project_alias,
                submit_alias,
                center_name,
                title,
                project_description,
                use_test_server=self.use_test_server,
                unit_test=self.unit_test,
                broker_name=self.broker_name,
            )
            project_creator.run()
            if not project_creator.submission_receipt.successful:
                raise Error(
                    "Error submitting project to ena. XML file: " + self.project_xml
                )

            ena_study_accession = project_creator.submission_receipt.accessions.get(
                "PROJECT", None
            )
            if ena_study_accession is None:
                raise Error(
                    "Error getting proejct accession from "
                    + project_creator.receipt_xml
                )

            for row in data_in:
                row["ena_study_accession"] = ena_study_accession
                self.db.update_row(
                    "Sample",
                    {"sample_id": row["sample_id"]},
                    {"ena_study_accession": ena_study_accession},
                )
            self.db.commit()
        else:
            assert len(study_accessions_from_db) == 1
예제 #3
0
    def _submit_sample_objects(self, data_in):
        submitted_samples = {}  # sample id -> ena accession

        for row in data_in:
            if row["ena_sample_accession"] is not None:
                continue
            elif row["sample_id"] in submitted_samples:
                row["ena_sample_accession"] = submitted_samples[row["sample_id"]]
            else:
                assert row["ena_sample_accession"] is None
                iso_dir = isolate_dir.IsolateDir(
                    self.pipeline_root, row["sample_id"], row["isolate_id"]
                )
                object_xml = iso_dir.xml_submission_file("sample")
                object_alias = "sample." + str(row["sample_id"])
                submit_alias = "submit." + object_alias
                center_name = DatasetSubmitter._ena_center_name_from_db_data(
                    data_in, number_to_name_dict=self.centre_number_to_name
                )
                title = (
                    row["subject_id"]
                    + ". "
                    + center_name
                    + ". "
                    + row["sample_id_from_lab"]
                )
                obj_creator = object_creator.ObjectCreator(
                    self.ini_file,
                    "sample",
                    object_xml,
                    object_alias,
                    submit_alias,
                    center_name,
                    title,
                    taxon_id=self.taxon_id,
                    use_test_server=self.use_test_server,
                    unit_test=self.unit_test,
                    broker_name=self.broker_name,
                )
                obj_creator.run()
                if obj_creator.submission_receipt.successful:
                    try:
                        sample_accession = obj_creator.submission_receipt.accessions[
                            "SAMPLE"
                        ]
                    except:
                        sample_accession = "FAIL"
                else:
                    sample_accession = "FAIL"

                row["ena_sample_accession"] = sample_accession
                self.db.update_row(
                    "Sample",
                    {"sample_id": row["sample_id"]},
                    {"ena_sample_accession": sample_accession},
                )
                self.db.commit()
                submitted_samples[row["sample_id"]] = sample_accession
예제 #4
0
    def _submit_experiment_objects(self, data_in):
        submitted_isolates = {}  # isolate id -> ena accession

        for row in data_in:
            if row['ena_experiment_accession'] is not None or row[
                    'ena_sample_accession'] == 'FAIL':
                continue
            elif row['isolate_id'] in submitted_isolates:
                row['ena_experiment_accession'] = submitted_isolates[
                    row['isolate_id']]
            else:
                assert row['ena_experiment_accession'] is None
                iso_dir = isolate_dir.IsolateDir(self.pipeline_root,
                                                 row['sample_id'],
                                                 row['isolate_id'])
                object_xml = iso_dir.xml_submission_file('experiment')
                object_alias = 'experiment.' + str(row['isolate_id'])
                submit_alias = 'submit.' + object_alias
                center_name = DatasetSubmitter._ena_center_name_from_db_data(
                    data_in, number_to_name_dict=self.centre_number_to_name)
                title = row['subject_id'] + '. ' + center_name + '. ' + row[
                    'sample_id_from_lab'] + '. ' + row[
                        'isolate_number_from_lab']
                library_name = title
                obj_creator = object_creator.ObjectCreator(
                    self.ini_file,
                    'experiment',
                    object_xml,
                    object_alias,
                    submit_alias,
                    center_name,
                    title,
                    study_accession=row['ena_study_accession'],
                    sample_accession=row['ena_sample_accession'],
                    library_name=library_name,
                    platform='ILLUMINA',
                    instrument=row['instrument_model'],
                    use_test_server=self.use_test_server,
                    unit_test=self.unit_test,
                    broker_name=self.broker_name,
                )
                obj_creator.run()
                if obj_creator.submission_receipt.successful:
                    try:
                        experiment_accession = obj_creator.submission_receipt.accessions[
                            'EXPERIMENT']
                    except:
                        experiment_accession = 'FAIL'
                else:
                    experiment_accession = 'FAIL'

                row['ena_experiment_accession'] = experiment_accession
                self.db.update_row(
                    'Isolate', {'isolate_id': row['isolate_id']},
                    {'ena_experiment_accession': experiment_accession})
                self.db.commit()
                submitted_isolates[row['isolate_id']] = experiment_accession
예제 #5
0
 def test_run_run(self):
     '''test run making run'''
     obj_xml = 'tmp.object_creator.run.obj.xml'
     ini_file = os.path.join(data_dir, 'conf.ini')
     obj = object_creator.ObjectCreator(ini_file, 'run', obj_xml, 'object alias', 'sub alias', 'center 42', 'title', experiment_accession='ERX123', reads_1='reads1.fq', md5_1='md51', reads_2='reads2.fq', md5_2='md52', unit_test='success')
     obj.run()
     self.assertTrue(obj.submission_receipt.successful)
     os.unlink(obj_xml)
     self.assertTrue(os.path.exists(obj.submission_xml))
     os.unlink(obj.submission_xml)
     self.assertTrue(os.path.exists(obj.receipt_xml))
     os.unlink(obj.receipt_xml)
예제 #6
0
 def test_run_experiment(self):
     '''test run making experiment'''
     obj_xml = 'tmp.object_creator.experiment.obj.xml'
     ini_file = os.path.join(data_dir, 'conf.ini')
     obj = object_creator.ObjectCreator(ini_file, 'experiment', obj_xml, 'objct alias', 'sub alias', 'center 42', 'title', study_accession='ERP123', sample_accession='ERS42', library_name='lib name', platform='ILLUMINA', instrument='HISEQ', unit_test='success')
     obj.run()
     self.assertTrue(obj.submission_receipt.successful)
     os.unlink(obj_xml)
     self.assertTrue(os.path.exists(obj.submission_xml))
     os.unlink(obj.submission_xml)
     self.assertTrue(os.path.exists(obj.receipt_xml))
     os.unlink(obj.receipt_xml)
예제 #7
0
 def test_run_sample(self):
     '''test run making sample'''
     obj_xml = 'tmp.object_creator.sample.obj.xml'
     ini_file = os.path.join(data_dir, 'conf.ini')
     obj = object_creator.ObjectCreator(ini_file, 'sample', obj_xml, 'objct alias', 'sub alias', 'center 42', 'title', taxon_id=42, unit_test='success')
     obj.run()
     self.assertTrue(obj.submission_receipt.successful)
     os.unlink(obj_xml)
     self.assertTrue(os.path.exists(obj.submission_xml))
     os.unlink(obj.submission_xml)
     self.assertTrue(os.path.exists(obj.receipt_xml))
     os.unlink(obj.receipt_xml)
예제 #8
0
 def test_run_project(self):
     '''test run making project'''
     obj_xml = 'tmp.object_creator.project.obj.xml'
     ini_file = os.path.join(data_dir, 'conf.ini')
     obj = object_creator.ObjectCreator(ini_file, 'project', obj_xml, 'objct alias', 'sub alias', 'center 42', 'title', project_description='project description', unit_test='success')
     obj.run()
     self.assertTrue(obj.submission_receipt.successful)
     os.unlink(obj_xml)
     self.assertTrue(os.path.exists(obj.submission_xml))
     os.unlink(obj.submission_xml)
     self.assertTrue(os.path.exists(obj.receipt_xml))
     os.unlink(obj.receipt_xml)
예제 #9
0
    def _submit_study_object(self, data_in):
        if not os.path.exists(self.project_xml_dir):
            os.mkdir(self.project_xml_dir)

        study_accessions_from_db = {x['ena_study_accession'] for x in data_in}
        if len(study_accessions_from_db) > 1:
            raise Error('Error! More than one study ID found for dataset ' +
                        self.dataset_name + '. Got: ' +
                        str(study_accessions_from_db))

        if not os.path.exists(self.project_xml):
            assert study_accessions_from_db == {None}
            project_alias = 'project.' + self.dataset_name
            submit_alias = 'submit.' + project_alias
            center_name = DatasetSubmitter._ena_center_name_from_db_data(
                data_in, number_to_name_dict=self.centre_number_to_name)
            title = self.study_prefix + '. ' + center_name + '. ' + self.dataset_name
            project_description = title
            project_creator = object_creator.ObjectCreator(
                self.ini_file,
                'project',
                self.project_xml,
                project_alias,
                submit_alias,
                center_name,
                title,
                project_description,
                use_test_server=self.use_test_server,
                unit_test=self.unit_test,
                broker_name=self.broker_name)
            project_creator.run()
            if not project_creator.submission_receipt.successful:
                raise Error('Error submitting project to ena. XML file: ' +
                            self.project_xml)

            ena_study_accession = project_creator.submission_receipt.accessions.get(
                'PROJECT', None)
            if ena_study_accession is None:
                raise Error('Error getting proejct accession from ' +
                            project_creator.receipt_xml)

            for row in data_in:
                row['ena_study_accession'] = ena_study_accession
                self.db.update_row(
                    'Sample', {'sample_id': row['sample_id']},
                    {'ena_study_accession': ena_study_accession})
            self.db.commit()
        else:
            assert len(study_accessions_from_db) == 1
예제 #10
0
    def _submit_sample_objects(self, data_in):
        submitted_samples = {}  # sample id -> ena accession

        for row in data_in:
            if row['ena_sample_accession'] is not None:
                continue
            elif row['sample_id'] in submitted_samples:
                row['ena_sample_accession'] = submitted_samples[
                    row['sample_id']]
            else:
                assert row['ena_sample_accession'] is None
                iso_dir = isolate_dir.IsolateDir(self.pipeline_root,
                                                 row['sample_id'],
                                                 row['isolate_id'])
                object_xml = iso_dir.xml_submission_file('sample')
                object_alias = 'sample.' + str(row['sample_id'])
                submit_alias = 'submit.' + object_alias
                center_name = DatasetSubmitter._ena_center_name_from_db_data(
                    data_in, number_to_name_dict=self.centre_number_to_name)
                title = row['subject_id'] + '. ' + center_name + '. ' + row[
                    'sample_id_from_lab']
                obj_creator = object_creator.ObjectCreator(
                    self.ini_file,
                    'sample',
                    object_xml,
                    object_alias,
                    submit_alias,
                    center_name,
                    title,
                    taxon_id=self.taxon_id,
                    use_test_server=self.use_test_server,
                    unit_test=self.unit_test,
                    broker_name=self.broker_name)
                obj_creator.run()
                if obj_creator.submission_receipt.successful:
                    try:
                        sample_accession = obj_creator.submission_receipt.accessions[
                            'SAMPLE']
                    except:
                        sample_accession = 'FAIL'
                else:
                    sample_accession = 'FAIL'

                row['ena_sample_accession'] = sample_accession
                self.db.update_row('Sample', {'sample_id': row['sample_id']},
                                   {'ena_sample_accession': sample_accession})
                self.db.commit()
                submitted_samples[row['sample_id']] = sample_accession
예제 #11
0
    def _submit_runs(self, data_in):
        # Note: reads have to be in the dropbox before submitting the Run object.
        # Upload all the reads first, in parallel, then submit the runs.
        fq_pairs_to_upload = [
        ]  # (seqrep, full path on disk1, dropbox name1, full path on disk2, dropbox name2)
        for row in data_in:
            iso_dir = isolate_dir.IsolateDir(self.pipeline_root,
                                             row['sample_id'],
                                             row['isolate_id'])
            fq_pairs_to_upload.append((
                row['seqrep_id'],
                iso_dir.reads_filename('remove_contam',
                                       row['sequence_replicate_number'], 1),
                str(row['seqrep_id']) + '.1.' +
                row['remove_contam_reads_file_1_md5'] + '.fq.gz',
                iso_dir.reads_filename('remove_contam',
                                       row['sequence_replicate_number'], 2),
                str(row['seqrep_id']) + '.2.' +
                row['remove_contam_reads_file_2_md5'] + '.fq.gz',
            ))

        self.pool = multiprocessing.Pool(self.fq_upload_threads)
        upload_return_values = self.pool.starmap(
            _upload_fastq_file_pair,
            zip(fq_pairs_to_upload, itertools.repeat(self.ini_file),
                itertools.repeat(self.unit_test)))
        upload_success = {x[0]: x[1] for x in upload_return_values}
        fq_pairs_to_upload = {x[0]: x for x in fq_pairs_to_upload}

        # Fastqs are uploaded, now submit the xmls and update the database
        for row in data_in:
            assert row['seqrep_id'] in fq_pairs_to_upload
            assert row['seqrep_id'] in upload_success
            assert row['ena_run_accession'] is None
            assert row['ena_experiment_accession'] is not None

            iso_dir = isolate_dir.IsolateDir(self.pipeline_root,
                                             row['sample_id'],
                                             row['isolate_id'])
            object_xml = iso_dir.xml_submission_file(
                'run', sequence_replicate=row['sequence_replicate_number'])
            object_alias = 'run.' + str(row['isolate_id'])
            submit_alias = 'submit.' + object_alias
            center_name = DatasetSubmitter._ena_center_name_from_db_data(
                data_in, number_to_name_dict=self.centre_number_to_name)
            title = None  # not needed for a run

            obj_creator = object_creator.ObjectCreator(
                self.ini_file,
                'run',
                object_xml,
                object_alias,
                submit_alias,
                center_name,
                title,
                experiment_accession=row['ena_experiment_accession'],
                reads_1=fq_pairs_to_upload[row['seqrep_id']][2],
                md5_1=row['remove_contam_reads_file_1_md5'],
                reads_2=fq_pairs_to_upload[row['seqrep_id']][4],
                md5_2=row['remove_contam_reads_file_2_md5'],
                use_test_server=self.use_test_server,
                unit_test=self.unit_test,
                broker_name=self.broker_name,
            )
            obj_creator.run()

            if obj_creator.submission_receipt.successful:
                try:
                    run_accession = obj_creator.submission_receipt.accessions[
                        'RUN']
                except:
                    run_accession = 'FAIL'
            else:
                run_accession = 'FAIL'

            row['ena_run_accession'] = run_accession
            self.db.update_row('Seqrep', {'seqrep_id': row['seqrep_id']},
                               {'ena_run_accession': run_accession})
            self.db.commit()
예제 #12
0
    def _submit_experiment_objects(self, data_in):
        submitted_isolates = {}  # isolate id -> ena accession

        for row in data_in:
            if (
                row["ena_experiment_accession"] is not None
                or row["ena_sample_accession"] == "FAIL"
            ):
                continue
            elif row["isolate_id"] in submitted_isolates:
                row["ena_experiment_accession"] = submitted_isolates[row["isolate_id"]]
            else:
                assert row["ena_experiment_accession"] is None
                iso_dir = isolate_dir.IsolateDir(
                    self.pipeline_root, row["sample_id"], row["isolate_id"]
                )
                object_xml = iso_dir.xml_submission_file("experiment")
                object_alias = "experiment." + str(row["isolate_id"])
                submit_alias = "submit." + object_alias
                center_name = DatasetSubmitter._ena_center_name_from_db_data(
                    data_in, number_to_name_dict=self.centre_number_to_name
                )
                title = (
                    row["subject_id"]
                    + ". "
                    + center_name
                    + ". "
                    + row["sample_id_from_lab"]
                    + ". "
                    + row["isolate_number_from_lab"]
                )
                library_name = title
                obj_creator = object_creator.ObjectCreator(
                    self.ini_file,
                    "experiment",
                    object_xml,
                    object_alias,
                    submit_alias,
                    center_name,
                    title,
                    study_accession=row["ena_study_accession"],
                    sample_accession=row["ena_sample_accession"],
                    library_name=library_name,
                    platform="ILLUMINA",
                    instrument=row["instrument_model"],
                    use_test_server=self.use_test_server,
                    unit_test=self.unit_test,
                    broker_name=self.broker_name,
                )
                obj_creator.run()
                if obj_creator.submission_receipt.successful:
                    try:
                        experiment_accession = obj_creator.submission_receipt.accessions[
                            "EXPERIMENT"
                        ]
                    except:
                        experiment_accession = "FAIL"
                else:
                    experiment_accession = "FAIL"

                row["ena_experiment_accession"] = experiment_accession
                self.db.update_row(
                    "Isolate",
                    {"isolate_id": row["isolate_id"]},
                    {"ena_experiment_accession": experiment_accession},
                )
                self.db.commit()
                submitted_isolates[row["isolate_id"]] = experiment_accession