def _make_family(self, external_id='FAM01'): """ Make a family with two participants and a given external id """ s = Study(external_id='phs001') p1 = Participant(external_id="CASE01", is_proband=False) p2 = Participant(external_id="CASE02", is_proband=False) s.participants.extend([p1, p2]) f = Family(external_id=external_id, family_type='Duo+') f.participants.extend([p1, p2]) db.session.add(s) db.session.commit() return f
def _create_entities(self): # Create study study = Study(external_id='phs001') # Create participant p = Participant(external_id='p1', is_proband=True, study=study) # Create sequencing_center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create sequencing experiments se1 = SequencingExperiment(**self._make_seq_exp('se1'), sequencing_center_id=sc.kf_id) se2 = SequencingExperiment(**self._make_seq_exp('se2'), sequencing_center_id=sc.kf_id) # Create biospecimen bs = Biospecimen(external_sample_id='bio1', analyte_type='dna', participant_id=p.kf_id, sequencing_center_id=sc.kf_id) # Create genomic files gfs = [] for i in range(4): kwargs = { 'file_name': 'file_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': {'md5': str(uuid.uuid4())}, 'controlled_access': True, 'is_harmonized': True, 'reference_genome': 'Test01' } gf = GenomicFile(**kwargs, sequencing_experiment_id=se1.kf_id) if i % 2: se1.genomic_files.append(gf) else: se2.genomic_files.append(gf) gfs.append(gf) bs.genomic_files = gfs p.biospecimens = [bs] db.session.add(p) db.session.commit()
def _create_participants_and_studies(self, total): """ Creates studies and participants with biospecimens, and diagnoses """ # Studies studies = self._create_studies_investigators() # seq_centers = self._create_sequencing_centers() # Participants for i in range(total): diagnoses = self._create_diagnoses( random.randint(self.min_diagnoses, self.max_diagnoses)) self._get_unique_sites(diagnoses) biospecimens = self._create_biospecimens( random.randint(self.min_biospecimens, self.max_biospecimens)) outcomes = self._create_outcomes( random.randint(self.min_outcomes, self.max_outcomes)) phenotypes = self._create_phenotypes( random.randint(self.min_phenotypes, self.max_phenotypes)) p = Participant( external_id='participant_{}'.format(i), # family_id='family_{}'.format(total % (i + 1)), is_proband=random.choice(self.is_proband_list), consent_type=random.choice(self.consent_type_list), biospecimens=biospecimens, diagnoses=diagnoses, outcomes=outcomes, phenotypes=phenotypes, study_id=random.choice(studies).kf_id) db.session.add(p) f = Family(participants=[p]) db.session.add(f) db.session.commit()
def _create_save_to_db(self): """ Create and save phenotype Requires creating a participant Create a phenotype and add it to participant as kwarg Save participant """ # Create study study = Study(external_id='phs001') # Create phenotype kwargs = { 'external_id': 'test_phenotype_0', 'source_text_phenotype': 'Hand Tremor', 'hpo_id_phenotype': 'HP:0002378', 'snomed_id_phenotype': '38033009', 'observed': 'Positive', 'age_at_event_days': 365 } ph = Phenotype(**kwargs) # Create and save participant with phenotype participant_id = 'Test subject 0' p = Participant(external_id=participant_id, phenotypes=[ph], is_proband=True, study=study) db.session.add(p) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = ph.kf_id return kwargs
def create_participant_biospecimen(self): """ create a participant and biospecimen and save to db returns participant_id and biospecimen_id """ participant_id = "Test_Subject_0" sample_id = "Test_Sample_0" aliquot_id = "Test_aliquot_0" data = self._make_biospecimen(external_sample_id=sample_id, external_aliquot_id=aliquot_id) ids = self.create_seqexp() biospecimen_0 = Biospecimen(**data, sequencing_center_id=ids ['sequencing_center_id']) participant_0 = Participant( external_id=participant_id, is_proband=True, biospecimens=[biospecimen_0]) study = Study(external_id='phs001') study.participants.append(participant_0) db.session.add(study) db.session.commit() return participant_id, sample_id, aliquot_id
def test_create_and_find_biospecimen(self): """ Test creation of biospecimen """ study = Study(external_id='phs001') db.session.add(study) db.session.commit() dt = datetime.now() participant_id = "Test_Subject_0" # creating participant p = Participant(external_id=participant_id, is_proband=True, study_id=study.kf_id) db.session.add(p) db.session.commit() # Creating Biospecimen sample_id = "Test_Sample_0" aliquot_id = "Test_Aliquot_0" data = self._make_biospecimen(external_sample_id=sample_id, external_aliquot_id=aliquot_id) ids = self.create_seqexp() s = Biospecimen(**data, participant_id=p.kf_id, sequencing_center_id=ids['sequencing_center_id']) db.session.add(s) db.session.commit() self.assertEqual(Biospecimen.query.count(), 1) bs = Biospecimen.query.first() for key, value in data.items(): self.assertEqual(value, getattr(bs, key)) self.assertGreater(bs.created_at, dt) self.assertGreater(bs.modified_at, dt) self.assertIs(type(uuid.UUID(bs.uuid)), uuid.UUID)
def _create_participants_and_dependents(self): """ Create participant with required entities """ # Create study study = Study(external_id='phs001') names = ['Fred', 'Wilma', 'Pebbles', 'Dino'] proband = [True, False] participants = [] for i, _name in enumerate(names): # Participants p = Participant(external_id=_name, is_proband=random.choice(proband), study=study) db.session.add(p) db.session.commit() # Sequencing center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # SequencingExperiment se = self._create_experiment('se_{}'.format(i), sequencing_center_id=sc.kf_id) # Biospecimen s = self._create_biospecimen('s_{}'.format(i), sequencing_center_id=sc.kf_id, participant_id=p.kf_id) # Input GF gf_in = self._create_genomic_file( 'gf_{}_in'.format(i), sequencing_experiment_id=se.kf_id) # Output GF gf_out = self._create_genomic_file( 'gf_{}_out'.format(i), data_type='aligned read', sequencing_experiment_id=se.kf_id) s.genomic_files = [gf_in, gf_out] p.biospecimens = [s] participants.append(p) return participants
def _create_all_entities(self): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] ses = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_ses = ses.setdefault('study{}'.format(j), []) dt = datetime.now() kwargs = { 'experiment_date': str(dt.replace(tzinfo=tz.tzutc())), 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Illumina', 'instrument_model': '454 GS FLX Titanium', 'max_insert_size': 600, 'mean_insert_size': 500, 'mean_depth': 40, 'total_reads': 800, 'mean_read_length': 200 } se0 = SequencingExperiment(**kwargs, sequencing_center=sc, external_id='study{}-se0'.format(j)) se0.genomic_files.extend(study_gfs[0:2]) se1 = SequencingExperiment(**kwargs, sequencing_center=sc, external_id='study{}-se1'.format(j)) se1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_ses.extend([se0, se1]) studies.append(s) db.session.add_all(studies) db.session.commit() return ses, gfs, studies
def test_modified_at(self): """ Test that modified_at is updated when model is updated """ s = Study(external_id='phs001') p = Participant(study=s, external_id='test01', is_proband=True) db.session.add(p) db.session.commit() diff = (p.modified_at - p.created_at) assert diff < timedelta(seconds=0.01) time.sleep(0.5) p.external_id = 'test02' db.session.add(s) db.session.commit() diff = (p.modified_at - p.created_at) assert diff > timedelta(seconds=0.50)
def _create_entities(self): """ Create participant with required entities """ # Sequencing center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create study study = Study(external_id='phs001') # Participants p = Participant(external_id='p0', is_proband=True, study=study) # Biospecimen bs = Biospecimen(analyte_type='dna', sequencing_center=sc, participant=p) # SequencingExperiment data = { 'external_id': 'se', 'experiment_strategy': 'wgs', 'is_paired_end': True, 'platform': 'platform', 'sequencing_center': sc } se = SequencingExperiment(**data) # Genomic Files genomic_files = [] for i in range(4): data = { 'file_name': 'gf_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': { 'md5': str(uuid.uuid4()) }, 'is_harmonized': True if i % 2 else False } gf = GenomicFile(**data) bs.genomic_files.append(gf) se.genomic_files.append(gf) genomic_files.append(gf) ct = self._create_cavatica_task('ct1') db.session.add(ct) db.session.add(study) db.session.commit()
def _create_all_entities(): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] ses = {} rgs = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key', 'https://gen3.something.com/did'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_rgs = rgs.setdefault('study{}'.format(j), []) rg0 = ReadGroup(external_id='study{}-rg0'.format(j)) rg0.genomic_files.extend(study_gfs[0:2]) rg1 = ReadGroup(external_id='study{}-rg1'.format(j)) rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_ses = ses.setdefault('study{}'.format(j), []) se0 = SequencingExperiment(external_id='study{}-se0'.format(j), experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) se0.genomic_files.extend(study_gfs[0:2]) se1 = SequencingExperiment(external_id='study{}-se1'.format(j), experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) se1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_rgs.extend([rg0, rg1]) study_ses.extend([se0, se1]) studies.append(s) db.session.add_all(studies) db.session.commit() return ses, rgs, gfs, studies
def _create_save_to_db(self): """ Create and save family_relationship Requires creating a participant Create a family_relationship and add it to participant as kwarg Save participant """ # Create study study = Study(external_id='phs001') # Create participants p1 = Participant(external_id='Fred', is_proband=False) p2 = Participant(external_id='Wilma', is_proband=False) p3 = Participant(external_id='Pebbles', is_proband=True) p4 = Participant(external_id='Dino', is_proband=True) study.participants.extend([p1, p2, p3, p4]) db.session.add(study) db.session.commit() # Create family_relationship kwargs = { 'participant1_id': p1.kf_id, 'participant2_id': p3.kf_id, 'participant1_to_participant2_relation': 'father', 'source_text_notes': 'Notes 1' } fr = FamilyRelationship(**kwargs) db.session.add(fr) db.session.commit() kwargs['kf_id'] = fr.kf_id kwargs['participant2_to_participant1_relation'] = \ fr.participant2_to_participant1_relation fr.external_id = str(fr) db.session.commit() return p1, p2, p3, p4, study, kwargs
def _create_save_to_db(self): """ Create and save diagnosis Requires creating a participant Create a diagnosis and add it to participant as kwarg Save participant """ # Create study study = Study(external_id='phs001') # Create diagnosis kwargs = { 'external_id': 'd1', 'source_text_diagnosis': 'flu', 'diagnosis_category': 'Cancer', 'source_text_tumor_location': 'Brain', 'age_at_event_days': 365, 'mondo_id_diagnosis': 'DOID:8469', 'icd_id_diagnosis': 'J10.01', 'uberon_id_tumor_location': 'UBERON:0000955', 'spatial_descriptor': 'left side' } d = Diagnosis(**kwargs) # Create and save participant with diagnosis participant_id = 'Test subject 0' p = Participant(external_id=participant_id, diagnoses=[d], is_proband=True, study=study) db.session.add(p) db.session.commit() # Create sequencing center s = SequencingCenter(name='washu') db.session.add(s) db.session.commit() # Create biospecimen b = Biospecimen(analyte_type='DNA', sequencing_center_id=s.kf_id, participant=p) db.session.add(s) db.session.add(b) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = d.kf_id return kwargs
def _create_save_dependents(self): """ Create and save all genomic file dependent entities to db Dependent entities: participant, biospecimens """ # Create study study = Study(external_id='phs001') # Create participant p = Participant(external_id='p1', biospecimens=self._create_biospecimens(), is_proband=True, study=study) db.session.add(p) db.session.commit()
def test_delete_orphans(self): """ Test that orphaned alias groups are deleted Orphans are alias groups with 0 particpants """ # Create alias group data = self._create_save_to_db() # Create another alias group study = Study.query.first() p6 = Participant(external_id='p6', is_proband=True, study_id=study.kf_id) p7 = Participant(external_id='p7', is_proband=True, study_id=study.kf_id) p6.add_alias(p7) db.session.add_all([p6, p7]) db.session.commit() self.assertEqual(2, AliasGroup.query.count()) # Make orphan groups = AliasGroup.query.all() for p in groups[0].participants: db.session.delete(p) db.session.commit() # Check that the orphan was deleted and other ag was unaffected self.assertEqual(1, AliasGroup.query.count()) self.assertEqual(len(groups[1].participants), len(AliasGroup.query.first().participants)) # Check that ag w at least 1 particpant does not get deleted db.session.delete(groups[1].participants[0]) db.session.commit() self.assertEqual(1, AliasGroup.query.count())
def test_link_biospecimen_diagnosis(self): """ Test Deleting one of the biospecimens """ # create a participant with a biospecimen (participant_id, sample_id, aliquot_id) = self.create_participant_biospecimen() p = Participant.query.first() # Create diagnosis kwargs = { 'external_id': 'id_1', 'source_text_diagnosis': 'diagnosis_1', 'age_at_event_days': 365, 'diagnosis_category': 'cancer', 'source_text_tumor_location': 'Brain', 'mondo_id_diagnosis': 'DOID:8469', 'uberon_id_tumor_location': 'UBERON:0000955', 'icd_id_diagnosis': 'J10.01', 'spatial_descriptor': 'left side', 'participant_id': p.kf_id } dg = Diagnosis(**kwargs) db.session.add(dg) biospecimen = Biospecimen.query.first() # create link btn bs and ds bs_ds = BiospecimenDiagnosis(biospecimen_id=biospecimen.kf_id, diagnosis_id=dg.kf_id) db.session.add(bs_ds) db.session.commit() self.assertEqual(BiospecimenDiagnosis.query.count(), 1) self.assertEqual(bs_ds.biospecimen_id, biospecimen.kf_id) self.assertEqual(bs_ds.diagnosis_id, dg.kf_id) s = Study(external_id="study") sc = SequencingCenter.query.first() p1 = Participant(external_id='p1', study=s) b1 = Biospecimen(analyte_type='RNA', participant=p1, sequencing_center_id=sc.kf_id) db.session.add(s) db.session.commit() # Participant 1 - Link their biop b1 to Participant 0 diagnosis d0 bd1 = BiospecimenDiagnosis(biospecimen_id=b1.kf_id, diagnosis_id=dg.kf_id) db.session.add(bd1) with self.assertRaises(DatabaseValidationError): db.session.commit() db.session.rollback()
def _create_outcomes(self): """ Create outcome and required entities """ # Create study study = Study(external_id='phs001') # Create two outcomes oc = ['Deceased', 'Alive'] o1 = Outcome(vital_status=oc[0]) o2 = Outcome(vital_status=oc[1]) p = Participant(external_id='p1', is_proband=True, study=study) # Add to participant and save p.outcomes.extend([o1, o2]) db.session.add(p) db.session.commit() return [o1, o2], p
def _create_participant(self, external_id='Test_Participant_0'): """ Create participant with external id """ s = Study(external_id='phs001') data = { 'external_id': external_id, 'is_proband': False, 'race': 'asian', 'ethnicity': 'not hispanic', 'gender': 'female', 'affected_status': False, 'diagnosis_category': 'Cancer' } p = Participant(**data) s.participants.append(p) db.session.add(s) db.session.commit() return p
def _create_save_participants(self, n=5): """ Create participants """ s = Study(external_id='phs001') particpant_data = {} for i in range(n): k = 'p{}'.format(i) particpant_data[k] = { 'external_id': k, 'is_proband': random.choice([True, False]) } pt = Participant(**particpant_data[k]) particpant_data[k]['obj'] = pt s.participants.append(pt) db.session.add(s) db.session.commit() return particpant_data
def _create_phenotypes(self): """ Create phenotypes and required entities """ # Create Study study = Study(external_id='phs001') # Create two phenotypes pheno = ['test phenotype 1', 'test phenotype 2'] ph1 = Phenotype(source_text_phenotype=pheno[0], external_id='test_phenotype_0') ph2 = Phenotype(source_text_phenotype=pheno[1], external_id='test_phenotype_0') p = Participant(external_id='p1', is_proband=True, study=study) # Add to participant and save p.phenotypes.extend([ph1, ph2]) db.session.add(p) db.session.commit() return [ph1, ph2], p, pheno
def _create_all_entities(self): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] rgs = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_rgs = rgs.setdefault('study{}'.format(j), []) rg0 = ReadGroup(external_id='study{}-rg0'.format(j)) rg0.genomic_files.extend(study_gfs[0:2]) rg1 = ReadGroup(external_id='study{}-rg1'.format(j)) rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_rgs.extend([rg0, rg1]) studies.append(s) db.session.add_all(studies) db.session.commit() return rgs, gfs, studies
def test_create_and_find(self): """ Test create phenotype """ dt = datetime.now() # Create Study study = Study(external_id='phs001') # Create and save participant participant_id = 'Test subject 0' p = Participant(external_id=participant_id, is_proband=True, study=study) db.session.add(p) db.session.commit() kwarg_dict = {} # Create phenotypes for i in range(2): data = { 'external_id': 'test_phenotype_{}'.format(i), 'source_text_phenotype': 'test phenotype_{}'.format(i), 'hpo_id_phenotype': 'HP:0000118', 'snomed_id_phenotype': '38033009', 'age_at_event_days': 120, 'participant_id': p.kf_id } ph = Phenotype(**data) kwarg_dict[ph.external_id] = data db.session.add(ph) db.session.commit() self.assertEqual(Phenotype.query.count(), 2) for k, kwargs in kwarg_dict.items(): ph = Phenotype.query.filter_by(external_id=k).one() for key, value in kwargs.items(): self.assertEqual(value, getattr(ph, key)) self.assertGreater(ph.created_at, dt) self.assertGreater(ph.modified_at, dt) self.assertIs(type(uuid.UUID(ph.uuid)), uuid.UUID)
def test_create(self): """ Test create outcome """ # Create study study = Study(external_id='phs001') # Create and save participant participant_id = 'Test subject 0' p = Participant(external_id=participant_id, is_proband=True, study=study) db.session.add(p) db.session.commit() # Create outcomes data = { 'external_id': 'test_0', 'vital_status': 'Alive', 'disease_related': False, 'age_at_event_days': 120, 'participant_id': p.kf_id } dt = datetime.now() o1 = Outcome(**data) db.session.add(o1) data['vital_status'] = 'Deceased' data['disease_related'] = 'True' o2 = Outcome(**data) db.session.add(o2) db.session.commit() self.assertEqual(Outcome.query.count(), 2) new_outcome = Outcome.query.all()[1] self.assertGreater(new_outcome.created_at, dt) self.assertGreater(new_outcome.modified_at, dt) self.assertIs(type(uuid.UUID(new_outcome.uuid)), uuid.UUID) self.assertEqual(new_outcome.vital_status, data['vital_status']) self.assertEqual(new_outcome.disease_related, data['disease_related'])
def test_post(self): """ Test create a new phenotype """ # Create study study = Study(external_id='phs001') # Create a participant p = Participant(external_id='Test subject 0', is_proband=True, study=study) db.session.add(p) db.session.commit() # Create phenotype data kwargs = { 'external_id': 'test_phenotype_0', 'source_text_phenotype': 'Hand tremor', 'age_at_event_days': 365, 'hpo_id_phenotype': 'HP:0002378', 'observed': 'Positive', 'participant_id': p.kf_id } # Send get request response = self.client.post(url_for(PHENOTYPES_LIST_URL), data=json.dumps(kwargs), headers=self._api_headers()) # Check response status status_code self.assertEqual(response.status_code, 201) # Check response content response = json.loads(response.data.decode('utf-8')) phenotype = response['results'] ph = Phenotype.query.get(phenotype.get('kf_id')) for k, v in kwargs.items(): if k == 'participant_id': continue self.assertEqual(phenotype[k], getattr(ph, k))
def test_add_invalid_biospecimen(self): """ Test that a diagnosis cannot be linked with a biospecimen if they refer to different participants """ diagnoses, kwarg_dict = self._create_diagnoses() # Get first participant st = Study.query.first() s = SequencingCenter.query.first() # Create new participant with biospecimen p1 = Participant(external_id='p1', is_proband=True, study_id=st.kf_id) b = Biospecimen(analyte_type='DNA', sequencing_center_id=s.kf_id, participant=p1) db.session.add(b) db.session.commit() # Try linking d = Diagnosis.query.first() b.diagnoses.append(d) with self.assertRaises(DatabaseValidationError): db.session.commit() db.session.rollback()
def _create_diagnoses(self, total=2): """ Create diagnoses and other requred entities """ # Create study study = Study(external_id='phs001') # Create participant participant_id = 'Test subject 0' p = Participant(external_id=participant_id, is_proband=True, study=study) # Create sequencing center s = SequencingCenter(name='washu') db.session.add(s) db.session.commit() # Create biospecimen b = Biospecimen(analyte_type='DNA', sequencing_center_id=s.kf_id, participant=p) db.session.add(p) db.session.add(b) db.session.commit() # Create diagnoses diagnoses = [] kwarg_dict = {} for i in range(total): d, kwargs = self._create_diagnosis(i, participant_id=p.kf_id) kwarg_dict[d.external_id] = kwargs diagnoses.append(d) db.session.add_all(diagnoses) db.session.commit() return diagnoses, kwarg_dict
def test_special_filter_param(self): """ Test special filter param participant_id /family-relationships?participant_id """ # Add some family relationships p1, p2, p3, p4, s1, kwargs = self._create_save_to_db() r2 = FamilyRelationship(participant1=p1, participant2=p4, participant1_to_participant2_relation='father') r3 = FamilyRelationship(participant1=p2, participant2=p3, participant1_to_participant2_relation='mother') r4 = FamilyRelationship(participant1=p2, participant2=p4, participant1_to_participant2_relation='mother') db.session.add_all([r2, r3, r4]) db.session.commit() # Case 1 - Participant with no family defined url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) + '?participant_id={}'.format(p3.kf_id)) response = self.client.get(url, headers=self._api_headers()) self.assertEqual(response.status_code, 200) response = json.loads(response.data.decode("utf-8")) content = response.get('results') # Only immediate family relationships returned self.assertEqual(len(content), 2) # Test with additional filter parameters url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) + '?participant_id={}' '&study_id={}&participant1_to_participant2_relation={}'.format( p3.kf_id, s1.kf_id, 'father')) response = self.client.get(url, headers=self._api_headers()) self.assertEqual(response.status_code, 200) response = json.loads(response.data.decode("utf-8")) content = response.get('results') self.assertEqual(len(content), 1) # Case 2 - Participant with a family defined f0 = Family(external_id='phs001-family') f0.participants.extend([p1, p2, p3, p4]) db.session.add(f0) db.session.commit() url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) + '?participant_id={}'.format(p3.kf_id)) response = self.client.get(url, headers=self._api_headers()) self.assertEqual(response.status_code, 200) response = json.loads(response.data.decode("utf-8")) content = response.get('results') # All family relationships returned self.assertEqual(len(content), 4) # Add another study with a family and relationships s2 = Study(external_id='phs002') f2 = Family(external_id='phs002-family') p_1 = Participant(external_id='Fred_1', is_proband=False) p_2 = Participant(external_id='Wilma_1', is_proband=False) p_3 = Participant(external_id='Pebbles_1', is_proband=True) r_1 = FamilyRelationship( participant1=p_1, participant2=p_3, participant1_to_participant2_relation='father') r_2 = FamilyRelationship( participant1=p_2, participant2=p_3, participant1_to_participant2_relation='mother') s2.participants.extend([p_1, p_2, p_3]) f2.participants.extend([p_1, p_2, p_3]) db.session.add(s2) db.session.add(f2) db.session.add_all([r_1, r_2]) db.session.commit() # Should see same results for p3 url = (url_for(FAMILY_RELATIONSHIPS_LIST_URL) + '?participant_id={}'.format(p3.kf_id)) response = self.client.get(url, headers=self._api_headers()) self.assertEqual(response.status_code, 200) response = json.loads(response.data.decode("utf-8")) content = response.get('results') # All family relationships returned self.assertEqual(len(content), 4)
def _create_save_to_db(self): """ Create and save biospecimen Requires creating a participant Create a biospecimen and add it to participant as kwarg Save participant """ dt = datetime.now() study = Study(external_id='phs001') db.session.add(study) db.session.commit() sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() se = SequencingExperiment(external_id="Test_seq_ex_o", experiment_strategy="WGS", is_paired_end="True", platform="Test_platform", sequencing_center_id=sc.kf_id) db.session.add(se) db.session.commit() # Create biospecimen kwargs = { 'external_sample_id': 's1', 'external_aliquot_id': 'a1', 'source_text_tissue_type': 'Normal', 'composition': 'composition1', 'source_text_anatomical_site': 'Brain', 'age_at_event_days': 365, 'source_text_tumor_descriptor': 'Metastatic', 'shipment_origin': 'CORIELL', 'analyte_type': 'DNA', 'concentration_mg_per_ml': 100, 'volume_ul': 12.67, 'shipment_date': dt, 'spatial_descriptor': 'left side', 'ncit_id_tissue_type': 'Test', 'ncit_id_anatomical_site': 'C12439', 'uberon_id_anatomical_site': 'UBERON:0000955', 'consent_type': 'GRU-IRB', 'dbgap_consent_code': 'phs00000.c1', 'sequencing_center_id': sc.kf_id } d = Biospecimen(**kwargs) # Create and save participant with biospecimen p = Participant(external_id='Test subject 0', biospecimens=[d], is_proband=True, study_id=study.kf_id) db.session.add(p) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = d.kf_id return kwargs
def participants(client): # Add a bunch of studies for pagination for i in range(101): s = Study(external_id='Study_{}'.format(i)) db.session.add(s) for i in range(101): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() for i in range(101): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators for _ in range(102): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] for i in range(101): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() participants = [] f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None for i in range(102): f = f0 if i < 50 else f1 s = s0 if i < 50 else s1 data = { 'external_id': "test", 'is_proband': True, 'race': 'Asian', 'ethnicity': 'Hispanic or Latino', 'diagnosis_category': 'Cancer', 'gender': 'Male' } p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id) diag = Diagnosis() p.diagnoses = [diag] outcome = Outcome() p.outcomes = [outcome] phen = Phenotype() p.phenotypes = [phen] participants.append(p) db.session.add(p) db.session.flush() seq_data = { 'external_id': 'Seq_0', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Test_platform_name_1' } gf_kwargs = { 'external_id': 'gf_0', 'file_name': 'hg38.fq', 'data_type': 'Aligned Reads', 'file_format': 'fastq', 'size': 1000, 'urls': ['s3://bucket/key'], 'hashes': { 'md5': str(uuid.uuid4()) }, 'controlled_access': False } seq_cen = SequencingCenter.query.filter_by(name="Baylor")\ .one_or_none() if seq_cen is None: seq_cen = SequencingCenter(external_id='SC_0', name="Baylor") db.session.add(seq_cen) db.session.flush() seq_exp = SequencingExperiment(**seq_data, sequencing_center_id=seq_cen.kf_id) db.session.add(seq_exp) samp = Biospecimen(analyte_type='an analyte', sequencing_center_id=seq_cen.kf_id, participant=p) db.session.add(samp) p.biospecimens = [samp] gf = GenomicFile(**gf_kwargs, sequencing_experiment_id=seq_exp.kf_id) db.session.add(gf) samp.genomic_files.append(gf) samp.diagnoses.append(diag) db.session.flush() rg = ReadGroup(lane_number=4, flow_cell='FL0123') rg.genomic_files.append(gf) ct = CavaticaTask(name='task_{}'.format(i)) ct.genomic_files.append(gf) ca.cavatica_tasks.append(ct) # Family relationships for participant1, participant2 in iterate_pairwise(participants): gender = participant1.gender rel = 'mother' if gender == 'male': rel = 'father' r = FamilyRelationship(participant1=participant1, participant2=participant2, participant1_to_participant2_relation=rel) db.session.add(r) db.session.commit()
def _create_all_entities(self): """ Create 2 studies with same content Content: 3 participants, 4 biospecimens, 4 diagnoses """ # Create entities sc = SequencingCenter.query.filter_by(name='sc').first() if not sc: sc = SequencingCenter(name='sc') studies = [] # Two studies for j in range(2): s = Study(external_id='s{}'.format(j)) p0 = Participant(external_id='study{}-p0'.format(j)) p1 = Participant(external_id='study{}-p1'.format(j)) p2 = Participant(external_id='study{}-p2'.format(j)) # Participant 0 # Has 2 Biospecimens for i in range(2): b = Biospecimen(external_sample_id='study{}-p0-b{}'.format( j, i), analyte_type='DNA', sequencing_center=sc) # Biospecimen b0 has 2 diagnoses if i == 0: for k in range(2): d = Diagnosis( external_id='study{}-p0-d{}'.format(j, k)) p0.diagnoses.append(d) # Biospecimen b1 has 1 diagnosis else: d = Diagnosis( external_id='study{}-p0-d{}'.format(j, k + 1)) p0.diagnoses.append(d) p0.biospecimens.append(b) # Participant 1 # Has 1 biospecimen, 1 diagnosis b = Biospecimen(external_sample_id='study{}-p1-b0'.format(j), analyte_type='DNA', sequencing_center=sc) d = Diagnosis(external_id='study{}-p1-d0'.format(j)) p1.biospecimens.append(b) p1.diagnoses.append(d) # Participant 2 # Has 1 biospecimen b = Biospecimen(external_sample_id='study{}-p2-b0'.format(j), analyte_type='DNA', sequencing_center=sc) p2.biospecimens.append(b) s.participants.extend([p0, p1, p2]) studies.append(s) db.session.add_all(studies) db.session.commit() # Create links between bios and diags bs_dgs = [] # Participant 0 p0 = studies[0].participants[0] # b0-d0 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id, diagnosis_id=p0.diagnoses[0].kf_id)) # b0-d1 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id, diagnosis_id=p0.diagnoses[1].kf_id)) # b1-d2 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[1].kf_id, diagnosis_id=p0.diagnoses[2].kf_id)) # b0-d2 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id, diagnosis_id=p0.diagnoses[2].kf_id)) # Participant 1 p1 = studies[0].participants[1] # b0-d0 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p1.biospecimens[0].kf_id, diagnosis_id=p1.diagnoses[0].kf_id)) db.session.add_all(bs_dgs) db.session.commit()