예제 #1
0
    def _make_family(self, external_id='FAM01'):
        """
        Make a family with two participants and a given external id
        """
        s = Study(external_id='phs001')
        p1 = Participant(external_id="CASE01", is_proband=False)
        p2 = Participant(external_id="CASE02", is_proband=False)
        s.participants.extend([p1, p2])

        f = Family(external_id=external_id, family_type='Duo+')
        f.participants.extend([p1, p2])
        db.session.add(s)
        db.session.commit()
        return f
    def _create_entities(self):
        # Create study
        study = Study(external_id='phs001')

        # Create participant
        p = Participant(external_id='p1',
                        is_proband=True, study=study)

        # Create sequencing_center
        sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
        if sc is None:
            sc = SequencingCenter(name="Baylor")
            db.session.add(sc)
            db.session.commit()

        # Create sequencing experiments
        se1 = SequencingExperiment(**self._make_seq_exp('se1'),
                                   sequencing_center_id=sc.kf_id)
        se2 = SequencingExperiment(**self._make_seq_exp('se2'),
                                   sequencing_center_id=sc.kf_id)

        # Create biospecimen
        bs = Biospecimen(external_sample_id='bio1', analyte_type='dna',
                         participant_id=p.kf_id,
                         sequencing_center_id=sc.kf_id)
        # Create genomic files
        gfs = []
        for i in range(4):
            kwargs = {
                'file_name': 'file_{}'.format(i),
                'data_type': 'submitted aligned read',
                'file_format': '.cram',
                'urls': ['s3://file_{}'.format(i)],
                'hashes': {'md5': str(uuid.uuid4())},
                'controlled_access': True,
                'is_harmonized': True,
                'reference_genome': 'Test01'
            }
            gf = GenomicFile(**kwargs,
                             sequencing_experiment_id=se1.kf_id)
            if i % 2:
                se1.genomic_files.append(gf)
            else:
                se2.genomic_files.append(gf)
            gfs.append(gf)
        bs.genomic_files = gfs
        p.biospecimens = [bs]
        db.session.add(p)
        db.session.commit()
예제 #3
0
 def _create_participants_and_studies(self, total):
     """
     Creates studies and participants with biospecimens, and diagnoses
     """
     # Studies
     studies = self._create_studies_investigators()
     #
     seq_centers = self._create_sequencing_centers()
     # Participants
     for i in range(total):
         diagnoses = self._create_diagnoses(
             random.randint(self.min_diagnoses, self.max_diagnoses))
         self._get_unique_sites(diagnoses)
         biospecimens = self._create_biospecimens(
             random.randint(self.min_biospecimens, self.max_biospecimens))
         outcomes = self._create_outcomes(
             random.randint(self.min_outcomes, self.max_outcomes))
         phenotypes = self._create_phenotypes(
             random.randint(self.min_phenotypes, self.max_phenotypes))
         p = Participant(
             external_id='participant_{}'.format(i),
             # family_id='family_{}'.format(total % (i + 1)),
             is_proband=random.choice(self.is_proband_list),
             consent_type=random.choice(self.consent_type_list),
             biospecimens=biospecimens,
             diagnoses=diagnoses,
             outcomes=outcomes,
             phenotypes=phenotypes,
             study_id=random.choice(studies).kf_id)
         db.session.add(p)
         f = Family(participants=[p])
         db.session.add(f)
     db.session.commit()
    def _create_save_to_db(self):
        """
        Create and save phenotype

        Requires creating a participant
        Create a phenotype and add it to participant as kwarg
        Save participant
        """
        # Create study
        study = Study(external_id='phs001')

        # Create phenotype
        kwargs = {
            'external_id': 'test_phenotype_0',
            'source_text_phenotype': 'Hand Tremor',
            'hpo_id_phenotype': 'HP:0002378',
            'snomed_id_phenotype': '38033009',
            'observed': 'Positive',
            'age_at_event_days': 365
        }
        ph = Phenotype(**kwargs)

        # Create and save participant with phenotype
        participant_id = 'Test subject 0'
        p = Participant(external_id=participant_id,
                        phenotypes=[ph],
                        is_proband=True,
                        study=study)
        db.session.add(p)
        db.session.commit()

        kwargs['participant_id'] = p.kf_id
        kwargs['kf_id'] = ph.kf_id

        return kwargs
    def create_participant_biospecimen(self):
        """
        create a participant and biospecimen and save to db
        returns participant_id and biospecimen_id
        """
        participant_id = "Test_Subject_0"
        sample_id = "Test_Sample_0"
        aliquot_id = "Test_aliquot_0"
        data = self._make_biospecimen(external_sample_id=sample_id,
                                      external_aliquot_id=aliquot_id)
        ids = self.create_seqexp()
        biospecimen_0 = Biospecimen(**data,
                                    sequencing_center_id=ids
                                    ['sequencing_center_id'])
        participant_0 = Participant(
            external_id=participant_id,
            is_proband=True,
            biospecimens=[biospecimen_0])

        study = Study(external_id='phs001')
        study.participants.append(participant_0)

        db.session.add(study)
        db.session.commit()
        return participant_id, sample_id, aliquot_id
    def test_create_and_find_biospecimen(self):
        """
        Test creation of biospecimen
        """
        study = Study(external_id='phs001')
        db.session.add(study)
        db.session.commit()

        dt = datetime.now()
        participant_id = "Test_Subject_0"
        # creating participant
        p = Participant(external_id=participant_id, is_proband=True,
                        study_id=study.kf_id)
        db.session.add(p)
        db.session.commit()

        # Creating Biospecimen
        sample_id = "Test_Sample_0"
        aliquot_id = "Test_Aliquot_0"
        data = self._make_biospecimen(external_sample_id=sample_id,
                                      external_aliquot_id=aliquot_id)

        ids = self.create_seqexp()
        s = Biospecimen(**data, participant_id=p.kf_id,
                        sequencing_center_id=ids['sequencing_center_id'])
        db.session.add(s)
        db.session.commit()

        self.assertEqual(Biospecimen.query.count(), 1)
        bs = Biospecimen.query.first()
        for key, value in data.items():
            self.assertEqual(value, getattr(bs, key))
        self.assertGreater(bs.created_at, dt)
        self.assertGreater(bs.modified_at, dt)
        self.assertIs(type(uuid.UUID(bs.uuid)), uuid.UUID)
    def _create_participants_and_dependents(self):
        """
        Create participant with required entities
        """
        # Create study
        study = Study(external_id='phs001')

        names = ['Fred', 'Wilma', 'Pebbles', 'Dino']
        proband = [True, False]
        participants = []
        for i, _name in enumerate(names):
            # Participants
            p = Participant(external_id=_name,
                            is_proband=random.choice(proband),
                            study=study)
            db.session.add(p)
            db.session.commit()
            # Sequencing center
            sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
            if sc is None:
                sc = SequencingCenter(name="Baylor")
                db.session.add(sc)
                db.session.commit()
            # SequencingExperiment
            se = self._create_experiment('se_{}'.format(i),
                                         sequencing_center_id=sc.kf_id)
            # Biospecimen
            s = self._create_biospecimen('s_{}'.format(i),
                                         sequencing_center_id=sc.kf_id,
                                         participant_id=p.kf_id)
            # Input GF
            gf_in = self._create_genomic_file(
                'gf_{}_in'.format(i),
                sequencing_experiment_id=se.kf_id)
            # Output GF
            gf_out = self._create_genomic_file(
                'gf_{}_out'.format(i),
                data_type='aligned read',
                sequencing_experiment_id=se.kf_id)

            s.genomic_files = [gf_in, gf_out]
            p.biospecimens = [s]
            participants.append(p)

        return participants
예제 #8
0
    def _create_all_entities(self):
        """
        Create 2 studies with genomic files and read groups
        """
        sc = SequencingCenter(name='sc')
        studies = []
        ses = {}
        gfs = {}
        for j in range(2):
            s = Study(external_id='s{}'.format(j))
            p = Participant(external_id='p{}'.format(j))
            s.participants.append(p)
            study_gfs = gfs.setdefault('study{}'.format(j), [])
            for i in range(3):
                b = Biospecimen(external_sample_id='b{}'.format(i),
                                analyte_type='DNA',
                                sequencing_center=sc,
                                participant=p)
                gf = GenomicFile(
                    external_id='study{}-gf{}'.format(j, i),
                    urls=['s3://mybucket/key'],
                    hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
                study_gfs.append(gf)
                b.genomic_files.append(gf)

            study_ses = ses.setdefault('study{}'.format(j), [])
            dt = datetime.now()
            kwargs = {
                'experiment_date': str(dt.replace(tzinfo=tz.tzutc())),
                'experiment_strategy': 'WXS',
                'library_name': 'Test_library_name_1',
                'library_strand': 'Unstranded',
                'is_paired_end': False,
                'platform': 'Illumina',
                'instrument_model': '454 GS FLX Titanium',
                'max_insert_size': 600,
                'mean_insert_size': 500,
                'mean_depth': 40,
                'total_reads': 800,
                'mean_read_length': 200
            }
            se0 = SequencingExperiment(**kwargs,
                                       sequencing_center=sc,
                                       external_id='study{}-se0'.format(j))
            se0.genomic_files.extend(study_gfs[0:2])
            se1 = SequencingExperiment(**kwargs,
                                       sequencing_center=sc,
                                       external_id='study{}-se1'.format(j))
            se1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

            study_ses.extend([se0, se1])
            studies.append(s)

        db.session.add_all(studies)
        db.session.commit()

        return ses, gfs, studies
    def test_modified_at(self):
        """
        Test that modified_at is updated when model is updated
        """
        s = Study(external_id='phs001')
        p = Participant(study=s, external_id='test01', is_proband=True)
        db.session.add(p)
        db.session.commit()

        diff = (p.modified_at - p.created_at)
        assert diff < timedelta(seconds=0.01)

        time.sleep(0.5)

        p.external_id = 'test02'
        db.session.add(s)
        db.session.commit()

        diff = (p.modified_at - p.created_at)
        assert diff > timedelta(seconds=0.50)
    def _create_entities(self):
        """
        Create participant with required entities
        """
        # Sequencing center
        sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
        if sc is None:
            sc = SequencingCenter(name="Baylor")
            db.session.add(sc)
            db.session.commit()

        # Create study
        study = Study(external_id='phs001')

        # Participants
        p = Participant(external_id='p0', is_proband=True, study=study)

        # Biospecimen
        bs = Biospecimen(analyte_type='dna',
                         sequencing_center=sc,
                         participant=p)

        # SequencingExperiment
        data = {
            'external_id': 'se',
            'experiment_strategy': 'wgs',
            'is_paired_end': True,
            'platform': 'platform',
            'sequencing_center': sc
        }
        se = SequencingExperiment(**data)

        # Genomic Files
        genomic_files = []
        for i in range(4):
            data = {
                'file_name': 'gf_{}'.format(i),
                'data_type': 'submitted aligned read',
                'file_format': '.cram',
                'urls': ['s3://file_{}'.format(i)],
                'hashes': {
                    'md5': str(uuid.uuid4())
                },
                'is_harmonized': True if i % 2 else False
            }
            gf = GenomicFile(**data)
            bs.genomic_files.append(gf)
            se.genomic_files.append(gf)
            genomic_files.append(gf)

        ct = self._create_cavatica_task('ct1')
        db.session.add(ct)
        db.session.add(study)
        db.session.commit()
def _create_all_entities():
    """
    Create 2 studies with genomic files and read groups
    """
    sc = SequencingCenter(name='sc')
    studies = []
    ses = {}
    rgs = {}
    gfs = {}
    for j in range(2):
        s = Study(external_id='s{}'.format(j))
        p = Participant(external_id='p{}'.format(j))
        s.participants.append(p)
        study_gfs = gfs.setdefault('study{}'.format(j), [])
        for i in range(3):
            b = Biospecimen(external_sample_id='b{}'.format(i),
                            analyte_type='DNA',
                            sequencing_center=sc,
                            participant=p)
            gf = GenomicFile(
                external_id='study{}-gf{}'.format(j, i),
                urls=['s3://mybucket/key', 'https://gen3.something.com/did'],
                hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
            study_gfs.append(gf)
            b.genomic_files.append(gf)

        study_rgs = rgs.setdefault('study{}'.format(j), [])
        rg0 = ReadGroup(external_id='study{}-rg0'.format(j))
        rg0.genomic_files.extend(study_gfs[0:2])
        rg1 = ReadGroup(external_id='study{}-rg1'.format(j))
        rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

        study_ses = ses.setdefault('study{}'.format(j), [])
        se0 = SequencingExperiment(external_id='study{}-se0'.format(j),
                                   experiment_strategy='WGS',
                                   is_paired_end=True,
                                   platform='platform',
                                   sequencing_center=sc)
        se0.genomic_files.extend(study_gfs[0:2])
        se1 = SequencingExperiment(external_id='study{}-se1'.format(j),
                                   experiment_strategy='WGS',
                                   is_paired_end=True,
                                   platform='platform',
                                   sequencing_center=sc)
        se1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

        study_rgs.extend([rg0, rg1])
        study_ses.extend([se0, se1])
        studies.append(s)

    db.session.add_all(studies)
    db.session.commit()

    return ses, rgs, gfs, studies
예제 #12
0
    def _create_save_to_db(self):
        """
        Create and save family_relationship

        Requires creating a participant
        Create a family_relationship and add it to participant as kwarg
        Save participant
        """
        # Create study
        study = Study(external_id='phs001')

        # Create participants
        p1 = Participant(external_id='Fred', is_proband=False)
        p2 = Participant(external_id='Wilma', is_proband=False)
        p3 = Participant(external_id='Pebbles', is_proband=True)
        p4 = Participant(external_id='Dino', is_proband=True)

        study.participants.extend([p1, p2, p3, p4])
        db.session.add(study)
        db.session.commit()

        # Create family_relationship
        kwargs = {
            'participant1_id': p1.kf_id,
            'participant2_id': p3.kf_id,
            'participant1_to_participant2_relation': 'father',
            'source_text_notes': 'Notes 1'
        }
        fr = FamilyRelationship(**kwargs)

        db.session.add(fr)
        db.session.commit()
        kwargs['kf_id'] = fr.kf_id
        kwargs['participant2_to_participant1_relation'] = \
            fr.participant2_to_participant1_relation

        fr.external_id = str(fr)
        db.session.commit()

        return p1, p2, p3, p4, study, kwargs
    def _create_save_to_db(self):
        """
        Create and save diagnosis

        Requires creating a participant
        Create a diagnosis and add it to participant as kwarg
        Save participant
        """
        # Create study
        study = Study(external_id='phs001')

        # Create diagnosis
        kwargs = {
            'external_id': 'd1',
            'source_text_diagnosis': 'flu',
            'diagnosis_category': 'Cancer',
            'source_text_tumor_location': 'Brain',
            'age_at_event_days': 365,
            'mondo_id_diagnosis': 'DOID:8469',
            'icd_id_diagnosis': 'J10.01',
            'uberon_id_tumor_location': 'UBERON:0000955',
            'spatial_descriptor': 'left side'
        }
        d = Diagnosis(**kwargs)

        # Create and save participant with diagnosis
        participant_id = 'Test subject 0'
        p = Participant(external_id=participant_id,
                        diagnoses=[d],
                        is_proband=True,
                        study=study)
        db.session.add(p)
        db.session.commit()

        # Create sequencing center
        s = SequencingCenter(name='washu')
        db.session.add(s)
        db.session.commit()
        # Create biospecimen
        b = Biospecimen(analyte_type='DNA',
                        sequencing_center_id=s.kf_id,
                        participant=p)
        db.session.add(s)
        db.session.add(b)
        db.session.commit()

        kwargs['participant_id'] = p.kf_id
        kwargs['kf_id'] = d.kf_id

        return kwargs
예제 #14
0
    def _create_save_dependents(self):
        """
        Create and save all genomic file dependent entities to db

        Dependent entities: participant, biospecimens
        """
        # Create study
        study = Study(external_id='phs001')
        # Create participant
        p = Participant(external_id='p1',
                        biospecimens=self._create_biospecimens(),
                        is_proband=True, study=study)
        db.session.add(p)
        db.session.commit()
    def test_delete_orphans(self):
        """
        Test that orphaned alias groups are deleted
        Orphans are alias groups with 0 particpants
        """
        # Create alias group
        data = self._create_save_to_db()

        # Create another alias group
        study = Study.query.first()
        p6 = Participant(external_id='p6',
                         is_proband=True,
                         study_id=study.kf_id)
        p7 = Participant(external_id='p7',
                         is_proband=True,
                         study_id=study.kf_id)
        p6.add_alias(p7)
        db.session.add_all([p6, p7])
        db.session.commit()
        self.assertEqual(2, AliasGroup.query.count())

        # Make orphan
        groups = AliasGroup.query.all()
        for p in groups[0].participants:
            db.session.delete(p)
        db.session.commit()

        # Check that the orphan was deleted and other ag was unaffected
        self.assertEqual(1, AliasGroup.query.count())
        self.assertEqual(len(groups[1].participants),
                         len(AliasGroup.query.first().participants))

        # Check that ag w at least 1 particpant does not get deleted
        db.session.delete(groups[1].participants[0])
        db.session.commit()
        self.assertEqual(1, AliasGroup.query.count())
 def test_link_biospecimen_diagnosis(self):
     """
     Test Deleting one of the biospecimens
     """
     # create a participant with a biospecimen
     (participant_id,
      sample_id,
      aliquot_id) = self.create_participant_biospecimen()
     p = Participant.query.first()
     # Create diagnosis
     kwargs = {
         'external_id': 'id_1',
         'source_text_diagnosis': 'diagnosis_1',
         'age_at_event_days': 365,
         'diagnosis_category': 'cancer',
         'source_text_tumor_location': 'Brain',
         'mondo_id_diagnosis': 'DOID:8469',
         'uberon_id_tumor_location': 'UBERON:0000955',
         'icd_id_diagnosis': 'J10.01',
         'spatial_descriptor': 'left side',
         'participant_id': p.kf_id
     }
     dg = Diagnosis(**kwargs)
     db.session.add(dg)
     biospecimen = Biospecimen.query.first()
     # create link btn bs and ds
     bs_ds = BiospecimenDiagnosis(biospecimen_id=biospecimen.kf_id,
                                  diagnosis_id=dg.kf_id)
     db.session.add(bs_ds)
     db.session.commit()
     self.assertEqual(BiospecimenDiagnosis.query.count(), 1)
     self.assertEqual(bs_ds.biospecimen_id, biospecimen.kf_id)
     self.assertEqual(bs_ds.diagnosis_id, dg.kf_id)
     s = Study(external_id="study")
     sc = SequencingCenter.query.first()
     p1 = Participant(external_id='p1', study=s)
     b1 = Biospecimen(analyte_type='RNA', participant=p1,
                      sequencing_center_id=sc.kf_id)
     db.session.add(s)
     db.session.commit()
     # Participant 1 - Link their biop b1 to Participant 0 diagnosis d0
     bd1 = BiospecimenDiagnosis(biospecimen_id=b1.kf_id,
                                diagnosis_id=dg.kf_id)
     db.session.add(bd1)
     with self.assertRaises(DatabaseValidationError):
         db.session.commit()
     db.session.rollback()
예제 #17
0
    def _create_outcomes(self):
        """
        Create outcome and required entities
        """
        # Create study
        study = Study(external_id='phs001')
        # Create two outcomes
        oc = ['Deceased', 'Alive']
        o1 = Outcome(vital_status=oc[0])
        o2 = Outcome(vital_status=oc[1])
        p = Participant(external_id='p1', is_proband=True, study=study)

        # Add to participant and save
        p.outcomes.extend([o1, o2])
        db.session.add(p)
        db.session.commit()

        return [o1, o2], p
 def _create_participant(self, external_id='Test_Participant_0'):
     """
     Create participant with external id
     """
     s = Study(external_id='phs001')
     data = {
         'external_id': external_id,
         'is_proband': False,
         'race': 'asian',
         'ethnicity': 'not hispanic',
         'gender': 'female',
         'affected_status': False,
         'diagnosis_category': 'Cancer'
     }
     p = Participant(**data)
     s.participants.append(p)
     db.session.add(s)
     db.session.commit()
     return p
    def _create_save_participants(self, n=5):
        """
        Create participants
        """
        s = Study(external_id='phs001')

        particpant_data = {}
        for i in range(n):
            k = 'p{}'.format(i)
            particpant_data[k] = {
                'external_id': k,
                'is_proband': random.choice([True, False])
            }
            pt = Participant(**particpant_data[k])
            particpant_data[k]['obj'] = pt
            s.participants.append(pt)

        db.session.add(s)
        db.session.commit()

        return particpant_data
    def _create_phenotypes(self):
        """
        Create phenotypes and required entities
        """
        # Create Study
        study = Study(external_id='phs001')

        # Create two phenotypes
        pheno = ['test phenotype 1', 'test phenotype 2']
        ph1 = Phenotype(source_text_phenotype=pheno[0],
                        external_id='test_phenotype_0')
        ph2 = Phenotype(source_text_phenotype=pheno[1],
                        external_id='test_phenotype_0')
        p = Participant(external_id='p1', is_proband=True, study=study)

        # Add to participant and save
        p.phenotypes.extend([ph1, ph2])
        db.session.add(p)
        db.session.commit()

        return [ph1, ph2], p, pheno
    def _create_all_entities(self):
        """
        Create 2 studies with genomic files and read groups
        """
        sc = SequencingCenter(name='sc')
        studies = []
        rgs = {}
        gfs = {}
        for j in range(2):
            s = Study(external_id='s{}'.format(j))
            p = Participant(external_id='p{}'.format(j))
            s.participants.append(p)
            study_gfs = gfs.setdefault('study{}'.format(j), [])
            for i in range(3):
                b = Biospecimen(external_sample_id='b{}'.format(i),
                                analyte_type='DNA',
                                sequencing_center=sc,
                                participant=p)
                gf = GenomicFile(
                    external_id='study{}-gf{}'.format(j, i),
                    urls=['s3://mybucket/key'],
                    hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
                study_gfs.append(gf)
                b.genomic_files.append(gf)

            study_rgs = rgs.setdefault('study{}'.format(j), [])

            rg0 = ReadGroup(external_id='study{}-rg0'.format(j))
            rg0.genomic_files.extend(study_gfs[0:2])
            rg1 = ReadGroup(external_id='study{}-rg1'.format(j))
            rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

            study_rgs.extend([rg0, rg1])
            studies.append(s)

        db.session.add_all(studies)
        db.session.commit()

        return rgs, gfs, studies
    def test_create_and_find(self):
        """
        Test create phenotype
        """
        dt = datetime.now()
        # Create Study
        study = Study(external_id='phs001')

        # Create and save participant
        participant_id = 'Test subject 0'
        p = Participant(external_id=participant_id,
                        is_proband=True,
                        study=study)
        db.session.add(p)
        db.session.commit()
        kwarg_dict = {}
        # Create phenotypes
        for i in range(2):
            data = {
                'external_id': 'test_phenotype_{}'.format(i),
                'source_text_phenotype': 'test phenotype_{}'.format(i),
                'hpo_id_phenotype': 'HP:0000118',
                'snomed_id_phenotype': '38033009',
                'age_at_event_days': 120,
                'participant_id': p.kf_id
            }
            ph = Phenotype(**data)
            kwarg_dict[ph.external_id] = data
            db.session.add(ph)
        db.session.commit()

        self.assertEqual(Phenotype.query.count(), 2)
        for k, kwargs in kwarg_dict.items():
            ph = Phenotype.query.filter_by(external_id=k).one()
            for key, value in kwargs.items():
                self.assertEqual(value, getattr(ph, key))
            self.assertGreater(ph.created_at, dt)
            self.assertGreater(ph.modified_at, dt)
            self.assertIs(type(uuid.UUID(ph.uuid)), uuid.UUID)
예제 #23
0
    def test_create(self):
        """
        Test create outcome
        """
        # Create study
        study = Study(external_id='phs001')
        # Create and save participant
        participant_id = 'Test subject 0'
        p = Participant(external_id=participant_id,
                        is_proband=True,
                        study=study)
        db.session.add(p)
        db.session.commit()

        # Create outcomes
        data = {
            'external_id': 'test_0',
            'vital_status': 'Alive',
            'disease_related': False,
            'age_at_event_days': 120,
            'participant_id': p.kf_id
        }
        dt = datetime.now()
        o1 = Outcome(**data)
        db.session.add(o1)
        data['vital_status'] = 'Deceased'
        data['disease_related'] = 'True'
        o2 = Outcome(**data)
        db.session.add(o2)
        db.session.commit()

        self.assertEqual(Outcome.query.count(), 2)
        new_outcome = Outcome.query.all()[1]
        self.assertGreater(new_outcome.created_at, dt)
        self.assertGreater(new_outcome.modified_at, dt)
        self.assertIs(type(uuid.UUID(new_outcome.uuid)), uuid.UUID)

        self.assertEqual(new_outcome.vital_status, data['vital_status'])
        self.assertEqual(new_outcome.disease_related, data['disease_related'])
    def test_post(self):
        """
        Test create a new phenotype
        """
        # Create study
        study = Study(external_id='phs001')

        # Create a participant
        p = Participant(external_id='Test subject 0',
                        is_proband=True,
                        study=study)
        db.session.add(p)
        db.session.commit()

        # Create phenotype data
        kwargs = {
            'external_id': 'test_phenotype_0',
            'source_text_phenotype': 'Hand tremor',
            'age_at_event_days': 365,
            'hpo_id_phenotype': 'HP:0002378',
            'observed': 'Positive',
            'participant_id': p.kf_id
        }
        # Send get request
        response = self.client.post(url_for(PHENOTYPES_LIST_URL),
                                    data=json.dumps(kwargs),
                                    headers=self._api_headers())

        # Check response status status_code
        self.assertEqual(response.status_code, 201)
        # Check response content
        response = json.loads(response.data.decode('utf-8'))
        phenotype = response['results']
        ph = Phenotype.query.get(phenotype.get('kf_id'))
        for k, v in kwargs.items():
            if k == 'participant_id':
                continue
            self.assertEqual(phenotype[k], getattr(ph, k))
예제 #25
0
    def test_add_invalid_biospecimen(self):
        """
        Test that a diagnosis cannot be linked with a biospecimen if
        they refer to different participants
        """

        diagnoses, kwarg_dict = self._create_diagnoses()
        # Get first participant
        st = Study.query.first()
        s = SequencingCenter.query.first()
        # Create new participant with biospecimen
        p1 = Participant(external_id='p1', is_proband=True, study_id=st.kf_id)
        b = Biospecimen(analyte_type='DNA',
                        sequencing_center_id=s.kf_id,
                        participant=p1)
        db.session.add(b)
        db.session.commit()

        # Try linking
        d = Diagnosis.query.first()
        b.diagnoses.append(d)
        with self.assertRaises(DatabaseValidationError):
            db.session.commit()
        db.session.rollback()
예제 #26
0
    def _create_diagnoses(self, total=2):
        """
        Create diagnoses and other requred entities
        """
        # Create study
        study = Study(external_id='phs001')

        # Create participant
        participant_id = 'Test subject 0'
        p = Participant(external_id=participant_id,
                        is_proband=True,
                        study=study)
        # Create sequencing center
        s = SequencingCenter(name='washu')
        db.session.add(s)
        db.session.commit()
        # Create biospecimen
        b = Biospecimen(analyte_type='DNA',
                        sequencing_center_id=s.kf_id,
                        participant=p)
        db.session.add(p)
        db.session.add(b)
        db.session.commit()

        # Create diagnoses
        diagnoses = []
        kwarg_dict = {}
        for i in range(total):
            d, kwargs = self._create_diagnosis(i, participant_id=p.kf_id)
            kwarg_dict[d.external_id] = kwargs
            diagnoses.append(d)

        db.session.add_all(diagnoses)
        db.session.commit()

        return diagnoses, kwarg_dict
예제 #27
0
    def test_special_filter_param(self):
        """
        Test special filter param participant_id

        /family-relationships?participant_id
        """
        # Add some family relationships
        p1, p2, p3, p4, s1, kwargs = self._create_save_to_db()
        r2 = FamilyRelationship(participant1=p1,
                                participant2=p4,
                                participant1_to_participant2_relation='father')
        r3 = FamilyRelationship(participant1=p2,
                                participant2=p3,
                                participant1_to_participant2_relation='mother')
        r4 = FamilyRelationship(participant1=p2,
                                participant2=p4,
                                participant1_to_participant2_relation='mother')
        db.session.add_all([r2, r3, r4])
        db.session.commit()

        # Case 1 - Participant with no family defined
        url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) +
               '?participant_id={}'.format(p3.kf_id))
        response = self.client.get(url, headers=self._api_headers())
        self.assertEqual(response.status_code, 200)
        response = json.loads(response.data.decode("utf-8"))
        content = response.get('results')
        # Only immediate family relationships returned
        self.assertEqual(len(content), 2)

        # Test with additional filter parameters
        url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) + '?participant_id={}'
               '&study_id={}&participant1_to_participant2_relation={}'.format(
                   p3.kf_id, s1.kf_id, 'father'))
        response = self.client.get(url, headers=self._api_headers())
        self.assertEqual(response.status_code, 200)
        response = json.loads(response.data.decode("utf-8"))
        content = response.get('results')
        self.assertEqual(len(content), 1)

        # Case 2 - Participant with a family defined
        f0 = Family(external_id='phs001-family')
        f0.participants.extend([p1, p2, p3, p4])
        db.session.add(f0)
        db.session.commit()

        url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) +
               '?participant_id={}'.format(p3.kf_id))
        response = self.client.get(url, headers=self._api_headers())
        self.assertEqual(response.status_code, 200)
        response = json.loads(response.data.decode("utf-8"))
        content = response.get('results')
        # All family relationships returned
        self.assertEqual(len(content), 4)

        # Add another study with a family and relationships
        s2 = Study(external_id='phs002')
        f2 = Family(external_id='phs002-family')
        p_1 = Participant(external_id='Fred_1', is_proband=False)
        p_2 = Participant(external_id='Wilma_1', is_proband=False)
        p_3 = Participant(external_id='Pebbles_1', is_proband=True)

        r_1 = FamilyRelationship(
            participant1=p_1,
            participant2=p_3,
            participant1_to_participant2_relation='father')
        r_2 = FamilyRelationship(
            participant1=p_2,
            participant2=p_3,
            participant1_to_participant2_relation='mother')

        s2.participants.extend([p_1, p_2, p_3])
        f2.participants.extend([p_1, p_2, p_3])
        db.session.add(s2)
        db.session.add(f2)
        db.session.add_all([r_1, r_2])
        db.session.commit()

        # Should see same results for p3
        url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) +
               '?participant_id={}'.format(p3.kf_id))
        response = self.client.get(url, headers=self._api_headers())
        self.assertEqual(response.status_code, 200)
        response = json.loads(response.data.decode("utf-8"))
        content = response.get('results')
        # All family relationships returned
        self.assertEqual(len(content), 4)
    def _create_save_to_db(self):
        """
        Create and save biospecimen

        Requires creating a participant
        Create a biospecimen and add it to participant as kwarg
        Save participant
        """
        dt = datetime.now()
        study = Study(external_id='phs001')
        db.session.add(study)
        db.session.commit()

        sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
        if sc is None:
            sc = SequencingCenter(name="Baylor")
            db.session.add(sc)
            db.session.commit()
        se = SequencingExperiment(external_id="Test_seq_ex_o",
                                  experiment_strategy="WGS",
                                  is_paired_end="True",
                                  platform="Test_platform",
                                  sequencing_center_id=sc.kf_id)
        db.session.add(se)
        db.session.commit()

        # Create biospecimen
        kwargs = {
            'external_sample_id': 's1',
            'external_aliquot_id': 'a1',
            'source_text_tissue_type': 'Normal',
            'composition': 'composition1',
            'source_text_anatomical_site': 'Brain',
            'age_at_event_days': 365,
            'source_text_tumor_descriptor': 'Metastatic',
            'shipment_origin': 'CORIELL',
            'analyte_type': 'DNA',
            'concentration_mg_per_ml': 100,
            'volume_ul': 12.67,
            'shipment_date': dt,
            'spatial_descriptor': 'left side',
            'ncit_id_tissue_type': 'Test',
            'ncit_id_anatomical_site': 'C12439',
            'uberon_id_anatomical_site': 'UBERON:0000955',
            'consent_type': 'GRU-IRB',
            'dbgap_consent_code': 'phs00000.c1',
            'sequencing_center_id': sc.kf_id
        }
        d = Biospecimen(**kwargs)

        # Create and save participant with biospecimen
        p = Participant(external_id='Test subject 0',
                        biospecimens=[d],
                        is_proband=True,
                        study_id=study.kf_id)
        db.session.add(p)
        db.session.commit()

        kwargs['participant_id'] = p.kf_id
        kwargs['kf_id'] = d.kf_id

        return kwargs
    def participants(client):

        # Add a bunch of studies for pagination
        for i in range(101):
            s = Study(external_id='Study_{}'.format(i))
            db.session.add(s)

        for i in range(101):
            ca = CavaticaApp(name='app', revision=0)
            db.session.add(ca)

        # Add a bunch of study files
        s0 = Study.query.filter_by(external_id='Study_0').one()
        s1 = Study.query.filter_by(external_id='Study_1').one()
        for i in range(101):
            sf = StudyFile(file_name='blah', study_id=s0.kf_id)
            db.session.add(sf)

        # Add a bunch of investigators
        for _ in range(102):
            inv = Investigator(name='test')
            inv.studies.extend([s0, s1])
            db.session.add(inv)

        # Add a bunch of families
        families = []
        for i in range(101):
            families.append(Family(external_id='Family_{}'.format(i)))
        db.session.add_all(families)
        db.session.flush()

        participants = []
        f0 = Family.query.filter_by(external_id='Family_0').one()
        f1 = Family.query.filter_by(external_id='Family_1').one()
        seq_cen = None
        for i in range(102):
            f = f0 if i < 50 else f1
            s = s0 if i < 50 else s1
            data = {
                'external_id': "test",
                'is_proband': True,
                'race': 'Asian',
                'ethnicity': 'Hispanic or Latino',
                'diagnosis_category': 'Cancer',
                'gender': 'Male'
            }
            p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id)
            diag = Diagnosis()
            p.diagnoses = [diag]
            outcome = Outcome()
            p.outcomes = [outcome]
            phen = Phenotype()
            p.phenotypes = [phen]
            participants.append(p)
            db.session.add(p)
            db.session.flush()

            seq_data = {
                'external_id': 'Seq_0',
                'experiment_strategy': 'WXS',
                'library_name': 'Test_library_name_1',
                'library_strand': 'Unstranded',
                'is_paired_end': False,
                'platform': 'Test_platform_name_1'
            }
            gf_kwargs = {
                'external_id': 'gf_0',
                'file_name': 'hg38.fq',
                'data_type': 'Aligned Reads',
                'file_format': 'fastq',
                'size': 1000,
                'urls': ['s3://bucket/key'],
                'hashes': {
                    'md5': str(uuid.uuid4())
                },
                'controlled_access': False
            }
            seq_cen = SequencingCenter.query.filter_by(name="Baylor")\
                .one_or_none()
            if seq_cen is None:
                seq_cen = SequencingCenter(external_id='SC_0', name="Baylor")
                db.session.add(seq_cen)
                db.session.flush()
            seq_exp = SequencingExperiment(**seq_data,
                                           sequencing_center_id=seq_cen.kf_id)
            db.session.add(seq_exp)
            samp = Biospecimen(analyte_type='an analyte',
                               sequencing_center_id=seq_cen.kf_id,
                               participant=p)
            db.session.add(samp)
            p.biospecimens = [samp]

            gf = GenomicFile(**gf_kwargs,
                             sequencing_experiment_id=seq_exp.kf_id)
            db.session.add(gf)
            samp.genomic_files.append(gf)
            samp.diagnoses.append(diag)

            db.session.flush()

            rg = ReadGroup(lane_number=4, flow_cell='FL0123')
            rg.genomic_files.append(gf)

            ct = CavaticaTask(name='task_{}'.format(i))
            ct.genomic_files.append(gf)
            ca.cavatica_tasks.append(ct)

        # Family relationships
        for participant1, participant2 in iterate_pairwise(participants):
            gender = participant1.gender
            rel = 'mother'
            if gender == 'male':
                rel = 'father'
            r = FamilyRelationship(participant1=participant1,
                                   participant2=participant2,
                                   participant1_to_participant2_relation=rel)
            db.session.add(r)

        db.session.commit()
    def _create_all_entities(self):
        """
        Create 2 studies with same content
        Content: 3 participants, 4 biospecimens, 4 diagnoses
        """
        # Create entities
        sc = SequencingCenter.query.filter_by(name='sc').first()
        if not sc:
            sc = SequencingCenter(name='sc')
        studies = []

        # Two studies
        for j in range(2):
            s = Study(external_id='s{}'.format(j))
            p0 = Participant(external_id='study{}-p0'.format(j))
            p1 = Participant(external_id='study{}-p1'.format(j))
            p2 = Participant(external_id='study{}-p2'.format(j))

            # Participant 0
            # Has 2 Biospecimens
            for i in range(2):
                b = Biospecimen(external_sample_id='study{}-p0-b{}'.format(
                    j, i),
                                analyte_type='DNA',
                                sequencing_center=sc)

                # Biospecimen b0 has 2 diagnoses
                if i == 0:
                    for k in range(2):
                        d = Diagnosis(
                            external_id='study{}-p0-d{}'.format(j, k))
                        p0.diagnoses.append(d)

                # Biospecimen b1 has 1 diagnosis
                else:
                    d = Diagnosis(
                        external_id='study{}-p0-d{}'.format(j, k + 1))
                    p0.diagnoses.append(d)
                p0.biospecimens.append(b)

            # Participant 1
            # Has 1 biospecimen, 1 diagnosis
            b = Biospecimen(external_sample_id='study{}-p1-b0'.format(j),
                            analyte_type='DNA',
                            sequencing_center=sc)
            d = Diagnosis(external_id='study{}-p1-d0'.format(j))
            p1.biospecimens.append(b)
            p1.diagnoses.append(d)

            # Participant 2
            # Has 1 biospecimen
            b = Biospecimen(external_sample_id='study{}-p2-b0'.format(j),
                            analyte_type='DNA',
                            sequencing_center=sc)
            p2.biospecimens.append(b)

            s.participants.extend([p0, p1, p2])
            studies.append(s)

        db.session.add_all(studies)
        db.session.commit()

        # Create links between bios and diags
        bs_dgs = []

        # Participant 0
        p0 = studies[0].participants[0]
        # b0-d0
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id,
                                 diagnosis_id=p0.diagnoses[0].kf_id))
        # b0-d1
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id,
                                 diagnosis_id=p0.diagnoses[1].kf_id))
        # b1-d2
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[1].kf_id,
                                 diagnosis_id=p0.diagnoses[2].kf_id))
        # b0-d2
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id,
                                 diagnosis_id=p0.diagnoses[2].kf_id))

        # Participant 1
        p1 = studies[0].participants[1]
        # b0-d0
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p1.biospecimens[0].kf_id,
                                 diagnosis_id=p1.diagnoses[0].kf_id))

        db.session.add_all(bs_dgs)
        db.session.commit()