def _create_entities(self): """ Create participant with required entities """ # Sequencing center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create study study = Study(external_id='phs001') # Participants p = Participant(external_id='p0', is_proband=True, study=study) # Biospecimen bs = Biospecimen(analyte_type='dna', sequencing_center=sc, participant=p) # SequencingExperiment data = { 'external_id': 'se', 'experiment_strategy': 'wgs', 'is_paired_end': True, 'platform': 'platform', 'sequencing_center': sc } se = SequencingExperiment(**data) # Genomic Files genomic_files = [] for i in range(4): data = { 'file_name': 'gf_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': { 'md5': str(uuid.uuid4()) }, 'is_harmonized': True if i % 2 else False } gf = GenomicFile(**data) se.genomic_files.append(gf) genomic_files.append(gf) bs2 = Biospecimen(analyte_type='rna', sequencing_center=sc, participant=p) db.session.add(bs, bs2) db.session.add(study) db.session.commit()
def _create_entities(self): # Create study study = Study(external_id='phs001') # Create participant p = Participant(external_id='p1', is_proband=True, study=study) # Create sequencing_center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create sequencing experiments se1 = SequencingExperiment(**self._make_seq_exp('se1'), sequencing_center_id=sc.kf_id) se2 = SequencingExperiment(**self._make_seq_exp('se2'), sequencing_center_id=sc.kf_id) # Create biospecimen bs = Biospecimen(external_sample_id='bio1', analyte_type='dna', participant_id=p.kf_id, sequencing_center_id=sc.kf_id) # Create genomic files gfs = [] for i in range(4): kwargs = { 'file_name': 'file_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': {'md5': str(uuid.uuid4())}, 'controlled_access': True, 'is_harmonized': True, 'reference_genome': 'Test01' } gf = GenomicFile(**kwargs, sequencing_experiment_id=se1.kf_id) if i % 2: se1.genomic_files.append(gf) else: se2.genomic_files.append(gf) gfs.append(gf) bs.genomic_files = gfs p.biospecimens = [bs] db.session.add(p) db.session.commit()
def test_create_and_find_biospecimen(self): """ Test creation of biospecimen """ study = Study(external_id='phs001') db.session.add(study) db.session.commit() dt = datetime.now() participant_id = "Test_Subject_0" # creating participant p = Participant(external_id=participant_id, is_proband=True, study_id=study.kf_id) db.session.add(p) db.session.commit() # Creating Biospecimen sample_id = "Test_Sample_0" aliquot_id = "Test_Aliquot_0" data = self._make_biospecimen(external_sample_id=sample_id, external_aliquot_id=aliquot_id) ids = self.create_seqexp() s = Biospecimen(**data, participant_id=p.kf_id, sequencing_center_id=ids['sequencing_center_id']) db.session.add(s) db.session.commit() self.assertEqual(Biospecimen.query.count(), 1) bs = Biospecimen.query.first() for key, value in data.items(): self.assertEqual(value, getattr(bs, key)) self.assertGreater(bs.created_at, dt) self.assertGreater(bs.modified_at, dt) self.assertIs(type(uuid.UUID(bs.uuid)), uuid.UUID)
def test_one_to_many_relationship_create(self): """ Test creating multiple biospecimens to the the Participant """ # create a participant with a biospecimen (participant_id, sample_id, aliquot_id) = self.create_participant_biospecimen() p = Participant.query.filter_by( external_id=participant_id).one_or_none() # adding another biospecimen to participant data = self._make_biospecimen(external_sample_id='Test_Sample_1', external_aliquot_id='Test_Aliquot_id') ids = self.create_seqexp() s = Biospecimen(**data, participant_id=p.kf_id, sequencing_center_id=ids['sequencing_center_id']) db.session.add(s) db.session.commit() p = Participant.query.filter_by(external_id=participant_id).all() self.assertEqual(Participant.query.count(), 1) self.assertEqual(p[0].biospecimens[0].external_sample_id, 'Test_Sample_0') self.assertEqual(p[0].biospecimens[1].external_sample_id, 'Test_Sample_1') self.assertEqual(p[0].kf_id, s.participant_id) self.assertEqual(Biospecimen.query.count(), 2)
def create_participant_biospecimen(self): """ create a participant and biospecimen and save to db returns participant_id and biospecimen_id """ participant_id = "Test_Subject_0" sample_id = "Test_Sample_0" aliquot_id = "Test_aliquot_0" data = self._make_biospecimen(external_sample_id=sample_id, external_aliquot_id=aliquot_id) ids = self.create_seqexp() biospecimen_0 = Biospecimen(**data, sequencing_center_id=ids ['sequencing_center_id']) participant_0 = Participant( external_id=participant_id, is_proband=True, biospecimens=[biospecimen_0]) study = Study(external_id='phs001') study.participants.append(participant_0) db.session.add(study) db.session.commit() return participant_id, sample_id, aliquot_id
def _create_biospecimens(self, total): """ Create biospecimens with genomic_files """ s_list = [] dt = datetime.now() for i in range(total): biospecimen_data = { 'external_sample_id': 'sample_{}'.format(i), 'tissue_type': random.choice(self.tissue_type_list), 'composition': random.choice(self.composition_list), 'anatomical_site': random.choice(self.anatomical_site_list), 'age_at_event_days': random.randint(0, 32872), 'tumor_descriptor': random.choice(self.tumor_descriptor_list), 'external_aliquot_id': 'aliquot_{}'.format(i), 'shipment_origin': random.choice(self.shipment_origin_list), 'analyte_type': random.choice(self.analyte_type_list), 'concentration_mg_per_ml': (random.randint(700, 4000)) / 10, 'volume_ml': (random.randint(200, 400)) / 10, 'shipment_date': dt - relativedelta.relativedelta( years=random.randint(1, 2)) - relativedelta.relativedelta( months=random.randint(1, 12)) + relativedelta.relativedelta(days=random.randint(1, 30)) } genomic_files = self._create_genomic_files( random.randint(self.min_gen_files, self.max_gen_files)) sc = random.choice(SequencingCenter.query.all()) b = Biospecimen( **biospecimen_data, genomic_files=genomic_files, sequencing_center_id=sc.kf_id) s_list.append(b) return s_list
def _create_all_entities(self): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] ses = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_ses = ses.setdefault('study{}'.format(j), []) dt = datetime.now() kwargs = { 'experiment_date': str(dt.replace(tzinfo=tz.tzutc())), 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Illumina', 'instrument_model': '454 GS FLX Titanium', 'max_insert_size': 600, 'mean_insert_size': 500, 'mean_depth': 40, 'total_reads': 800, 'mean_read_length': 200 } se0 = SequencingExperiment(**kwargs, sequencing_center=sc, external_id='study{}-se0'.format(j)) se0.genomic_files.extend(study_gfs[0:2]) se1 = SequencingExperiment(**kwargs, sequencing_center=sc, external_id='study{}-se1'.format(j)) se1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_ses.extend([se0, se1]) studies.append(s) db.session.add_all(studies) db.session.commit() return ses, gfs, studies
def _create_all_entities(): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] ses = {} rgs = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key', 'https://gen3.something.com/did'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_rgs = rgs.setdefault('study{}'.format(j), []) rg0 = ReadGroup(external_id='study{}-rg0'.format(j)) rg0.genomic_files.extend(study_gfs[0:2]) rg1 = ReadGroup(external_id='study{}-rg1'.format(j)) rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_ses = ses.setdefault('study{}'.format(j), []) se0 = SequencingExperiment(external_id='study{}-se0'.format(j), experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) se0.genomic_files.extend(study_gfs[0:2]) se1 = SequencingExperiment(external_id='study{}-se1'.format(j), experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) se1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_rgs.extend([rg0, rg1]) study_ses.extend([se0, se1]) studies.append(s) db.session.add_all(studies) db.session.commit() return ses, rgs, gfs, studies
def _create_biospecimens(self, total=2): """ Create biospecimens """ # Create Sequencing_center sc = SequencingCenter(name='Baylor') db.session.add(sc) db.session.commit() return [Biospecimen(external_sample_id='s{}'.format(i), analyte_type='dna', sequencing_center_id=sc.kf_id) for i in range(total)]
def _create_biospecimen(self, _id, genomic_files=None, sequencing_center_id=None, participant_id=None): """ Create biospecimen with genomic_files """ bs = Biospecimen(external_sample_id=_id, analyte_type='dna', genomic_files=genomic_files or [], sequencing_center_id=sequencing_center_id, participant_id=participant_id) db.session.add(bs) db.session.commit() return bs
def test_one_to_many_realtionship_update(self): """ Test Updating one of the biospecimens in the participant """ # create a participant with a biospecimen (participant_id, sample_id, aliquot_id) = self.create_participant_biospecimen() # Get Participant p = Participant.query.filter_by( external_id=participant_id).one_or_none() # adding another biospecimen to participant data = self._make_biospecimen(external_sample_id='Test_Sample_1', external_aliquot_id='Test_Aliquot_1') ids = self.create_seqexp() s = Biospecimen(**data, participant_id=p.kf_id, sequencing_center_id=ids['sequencing_center_id']) db.session.add(s) db.session.commit() # Get Biospecimen and Person with multiple Entries p = Participant.query.filter_by(external_id=participant_id).all() s = Biospecimen.query.filter_by(external_sample_id='Test_Sample_1').\ one_or_none() # update one of the biospecimen attribute s.source_text_tissue_type = 'Tumor' s = Biospecimen.query.filter_by(external_sample_id='Test_Sample_1').\ one_or_none() self.assertEqual(s.source_text_tissue_type, 'Tumor') self.assertEqual(Participant.query.count(), 1) self.assertEqual(p[0].biospecimens[1].external_sample_id, 'Test_Sample_1') self.assertEqual(Biospecimen.query.count(), 2)
def _create_save_to_db(self): """ Create and save diagnosis Requires creating a participant Create a diagnosis and add it to participant as kwarg Save participant """ # Create study study = Study(external_id='phs001') # Create diagnosis kwargs = { 'external_id': 'd1', 'source_text_diagnosis': 'flu', 'diagnosis_category': 'Cancer', 'source_text_tumor_location': 'Brain', 'age_at_event_days': 365, 'mondo_id_diagnosis': 'DOID:8469', 'icd_id_diagnosis': 'J10.01', 'uberon_id_tumor_location': 'UBERON:0000955', 'spatial_descriptor': 'left side' } d = Diagnosis(**kwargs) # Create and save participant with diagnosis participant_id = 'Test subject 0' p = Participant(external_id=participant_id, diagnoses=[d], is_proband=True, study=study) db.session.add(p) db.session.commit() # Create sequencing center s = SequencingCenter(name='washu') db.session.add(s) db.session.commit() # Create biospecimen b = Biospecimen(analyte_type='DNA', sequencing_center_id=s.kf_id, participant=p) db.session.add(s) db.session.add(b) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = d.kf_id return kwargs
def test_not_null_constraint(self): """ Test biospecimen cannot be created with out required parameters such as participant_id """ # Create Biospecimen sample_id = "Test_Sample_0" # With Missing Kf_id data = self._make_biospecimen(external_sample_id=sample_id) s = Biospecimen(**data) db.session.add(s) # Add Biospecimen to db with self.assertRaises(IntegrityError): db.session.commit()
def test_foreign_key_constraint(self): """ Test biospecimen cannot be created with empty participant_id """ # Create Biospecimen biospecimen_id = "Test_Sample_0" # With Empty Kf_id data = self._make_biospecimen(external_sample_id=biospecimen_id) s = Biospecimen(**data, participant_id='') db.session.add(s) # Add Biospecimen to db with self.assertRaises(IntegrityError): db.session.commit()
def test_link_biospecimen_diagnosis(self): """ Test Deleting one of the biospecimens """ # create a participant with a biospecimen (participant_id, sample_id, aliquot_id) = self.create_participant_biospecimen() p = Participant.query.first() # Create diagnosis kwargs = { 'external_id': 'id_1', 'source_text_diagnosis': 'diagnosis_1', 'age_at_event_days': 365, 'diagnosis_category': 'cancer', 'source_text_tumor_location': 'Brain', 'mondo_id_diagnosis': 'DOID:8469', 'uberon_id_tumor_location': 'UBERON:0000955', 'icd_id_diagnosis': 'J10.01', 'spatial_descriptor': 'left side', 'participant_id': p.kf_id } dg = Diagnosis(**kwargs) db.session.add(dg) biospecimen = Biospecimen.query.first() # create link btn bs and ds bs_ds = BiospecimenDiagnosis(biospecimen_id=biospecimen.kf_id, diagnosis_id=dg.kf_id) db.session.add(bs_ds) db.session.commit() self.assertEqual(BiospecimenDiagnosis.query.count(), 1) self.assertEqual(bs_ds.biospecimen_id, biospecimen.kf_id) self.assertEqual(bs_ds.diagnosis_id, dg.kf_id) s = Study(external_id="study") sc = SequencingCenter.query.first() p1 = Participant(external_id='p1', study=s) b1 = Biospecimen(analyte_type='RNA', participant=p1, sequencing_center_id=sc.kf_id) db.session.add(s) db.session.commit() # Participant 1 - Link their biop b1 to Participant 0 diagnosis d0 bd1 = BiospecimenDiagnosis(biospecimen_id=b1.kf_id, diagnosis_id=dg.kf_id) db.session.add(bd1) with self.assertRaises(DatabaseValidationError): db.session.commit() db.session.rollback()
def _create_all_entities(self): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] rgs = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_rgs = rgs.setdefault('study{}'.format(j), []) rg0 = ReadGroup(external_id='study{}-rg0'.format(j)) rg0.genomic_files.extend(study_gfs[0:2]) rg1 = ReadGroup(external_id='study{}-rg1'.format(j)) rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_rgs.extend([rg0, rg1]) studies.append(s) db.session.add_all(studies) db.session.commit() return rgs, gfs, studies
def test_cascade_delete_via_biospecimen(self): """ Test delete existing diagnosis Delete biospecimen to which diagnosis belongs """ # Create and save genomic files and dependent entities kwargs_dict, diagnoses = self._create_diagnoses(total=1) # Add another biospecimen p = Participant.query.first() sc = SequencingCenter.query.first() b = Biospecimen(analyte_type='DNA', sequencing_center=sc, participant=p) db.session.add(b) db.session.commit() # Link bio and diags biospecimens = Biospecimen.query.all() diagnosis = Diagnosis.query.first() for biospecimen in biospecimens: bsds = BiospecimenDiagnosis(biospecimen=biospecimen, diagnosis=diagnosis) db.session.add(bsds) db.session.commit() # Get initial counts b_count = Biospecimen.query.count() d_count = Diagnosis.query.count() bd_count = BiospecimenDiagnosis.query.count() # Delete biospecimen db.session.delete(b) db.session.commit() # Check database assert BiospecimenDiagnosis.query.count() == bd_count - 1 assert Diagnosis.query.count() == d_count assert Biospecimen.query.count() == b_count - 1
def test_add_invalid_biospecimen(self): """ Test that a diagnosis cannot be linked with a biospecimen if they refer to different participants """ diagnoses, kwarg_dict = self._create_diagnoses() # Get first participant st = Study.query.first() s = SequencingCenter.query.first() # Create new participant with biospecimen p1 = Participant(external_id='p1', is_proband=True, study_id=st.kf_id) b = Biospecimen(analyte_type='DNA', sequencing_center_id=s.kf_id, participant=p1) db.session.add(b) db.session.commit() # Try linking d = Diagnosis.query.first() b.diagnoses.append(d) with self.assertRaises(DatabaseValidationError): db.session.commit() db.session.rollback()
def test_one_to_many_relationship_delete(self): """ Test Deleting one of the biospecimens """ # create a participant with a biospecimen (participant_id, sample_id, aliquot_id) = self.create_participant_biospecimen() p = Participant.query.filter_by( external_id=participant_id).one_or_none() # adding another biospecimen to participant data = self._make_biospecimen(external_sample_id='Test_Sample_1', external_aliquot_id='Test_Aliquot_1') ids = self.create_seqexp() s = Biospecimen(**data, participant_id=p.kf_id, sequencing_center_id=ids['sequencing_center_id']) db.session.add(s) db.session.commit() # Delete Biospecimen db.session.delete(s) db.session.commit() self.assertEqual(Biospecimen.query.count(), 1)
def _create_diagnoses(self, total=2): """ Create diagnoses and other requred entities """ # Create study study = Study(external_id='phs001') # Create participant participant_id = 'Test subject 0' p = Participant(external_id=participant_id, is_proband=True, study=study) # Create sequencing center s = SequencingCenter(name='washu') db.session.add(s) db.session.commit() # Create biospecimen b = Biospecimen(analyte_type='DNA', sequencing_center_id=s.kf_id, participant=p) db.session.add(p) db.session.add(b) db.session.commit() # Create diagnoses diagnoses = [] kwarg_dict = {} for i in range(total): d, kwargs = self._create_diagnosis(i, participant_id=p.kf_id) kwarg_dict[d.external_id] = kwargs diagnoses.append(d) db.session.add_all(diagnoses) db.session.commit() return diagnoses, kwarg_dict
def _create_save_to_db(self): """ Create and save biospecimen Requires creating a participant Create a biospecimen and add it to participant as kwarg Save participant """ dt = datetime.now() study = Study(external_id='phs001') db.session.add(study) db.session.commit() sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() se = SequencingExperiment(external_id="Test_seq_ex_o", experiment_strategy="WGS", is_paired_end="True", platform="Test_platform", sequencing_center_id=sc.kf_id) db.session.add(se) db.session.commit() # Create biospecimen kwargs = { 'external_sample_id': 's1', 'external_aliquot_id': 'a1', 'source_text_tissue_type': 'Normal', 'composition': 'composition1', 'source_text_anatomical_site': 'Brain', 'age_at_event_days': 365, 'source_text_tumor_descriptor': 'Metastatic', 'shipment_origin': 'CORIELL', 'analyte_type': 'DNA', 'concentration_mg_per_ml': 100, 'volume_ul': 12.67, 'shipment_date': dt, 'spatial_descriptor': 'left side', 'ncit_id_tissue_type': 'Test', 'ncit_id_anatomical_site': 'C12439', 'uberon_id_anatomical_site': 'UBERON:0000955', 'consent_type': 'GRU-IRB', 'dbgap_consent_code': 'phs00000.c1', 'sequencing_center_id': sc.kf_id } d = Biospecimen(**kwargs) # Create and save participant with biospecimen p = Participant(external_id='Test subject 0', biospecimens=[d], is_proband=True, study_id=study.kf_id) db.session.add(p) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = d.kf_id return kwargs
def _create_all_entities(self): """ Create 2 studies with same content Content: 3 participants, 4 biospecimens, 4 diagnoses """ # Create entities sc = SequencingCenter.query.filter_by(name='sc').first() if not sc: sc = SequencingCenter(name='sc') studies = [] # Two studies for j in range(2): s = Study(external_id='s{}'.format(j)) p0 = Participant(external_id='study{}-p0'.format(j)) p1 = Participant(external_id='study{}-p1'.format(j)) p2 = Participant(external_id='study{}-p2'.format(j)) # Participant 0 # Has 2 Biospecimens for i in range(2): b = Biospecimen(external_sample_id='study{}-p0-b{}'.format( j, i), analyte_type='DNA', sequencing_center=sc) # Biospecimen b0 has 2 diagnoses if i == 0: for k in range(2): d = Diagnosis( external_id='study{}-p0-d{}'.format(j, k)) p0.diagnoses.append(d) # Biospecimen b1 has 1 diagnosis else: d = Diagnosis( external_id='study{}-p0-d{}'.format(j, k + 1)) p0.diagnoses.append(d) p0.biospecimens.append(b) # Participant 1 # Has 1 biospecimen, 1 diagnosis b = Biospecimen(external_sample_id='study{}-p1-b0'.format(j), analyte_type='DNA', sequencing_center=sc) d = Diagnosis(external_id='study{}-p1-d0'.format(j)) p1.biospecimens.append(b) p1.diagnoses.append(d) # Participant 2 # Has 1 biospecimen b = Biospecimen(external_sample_id='study{}-p2-b0'.format(j), analyte_type='DNA', sequencing_center=sc) p2.biospecimens.append(b) s.participants.extend([p0, p1, p2]) studies.append(s) db.session.add_all(studies) db.session.commit() # Create links between bios and diags bs_dgs = [] # Participant 0 p0 = studies[0].participants[0] # b0-d0 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id, diagnosis_id=p0.diagnoses[0].kf_id)) # b0-d1 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id, diagnosis_id=p0.diagnoses[1].kf_id)) # b1-d2 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[1].kf_id, diagnosis_id=p0.diagnoses[2].kf_id)) # b0-d2 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id, diagnosis_id=p0.diagnoses[2].kf_id)) # Participant 1 p1 = studies[0].participants[1] # b0-d0 bs_dgs.append( BiospecimenDiagnosis(biospecimen_id=p1.biospecimens[0].kf_id, diagnosis_id=p1.diagnoses[0].kf_id)) db.session.add_all(bs_dgs) db.session.commit()
def participants(client): # Add a bunch of studies for pagination for i in range(101): s = Study(external_id='Study_{}'.format(i)) db.session.add(s) for i in range(101): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() for i in range(101): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators for _ in range(102): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] for i in range(101): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() participants = [] f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None for i in range(102): f = f0 if i < 50 else f1 s = s0 if i < 50 else s1 data = { 'external_id': "test", 'is_proband': True, 'race': 'Asian', 'ethnicity': 'Hispanic or Latino', 'diagnosis_category': 'Cancer', 'gender': 'Male' } p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id) diag = Diagnosis() p.diagnoses = [diag] outcome = Outcome() p.outcomes = [outcome] phen = Phenotype() p.phenotypes = [phen] participants.append(p) db.session.add(p) db.session.flush() seq_data = { 'external_id': 'Seq_0', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Test_platform_name_1' } gf_kwargs = { 'external_id': 'gf_0', 'file_name': 'hg38.fq', 'data_type': 'Aligned Reads', 'file_format': 'fastq', 'size': 1000, 'urls': ['s3://bucket/key'], 'hashes': { 'md5': str(uuid.uuid4()) }, 'controlled_access': False } seq_cen = SequencingCenter.query.filter_by(name="Baylor")\ .one_or_none() if seq_cen is None: seq_cen = SequencingCenter(external_id='SC_0', name="Baylor") db.session.add(seq_cen) db.session.flush() seq_exp = SequencingExperiment(**seq_data, sequencing_center_id=seq_cen.kf_id) db.session.add(seq_exp) samp = Biospecimen(analyte_type='an analyte', sequencing_center_id=seq_cen.kf_id, participant=p) db.session.add(samp) p.biospecimens = [samp] gf = GenomicFile(**gf_kwargs, sequencing_experiment_id=seq_exp.kf_id) db.session.add(gf) samp.genomic_files.append(gf) samp.diagnoses.append(diag) db.session.flush() rg = ReadGroup(lane_number=4, flow_cell='FL0123') rg.genomic_files.append(gf) ct = CavaticaTask(name='task_{}'.format(i)) ct.genomic_files.append(gf) ca.cavatica_tasks.append(ct) # Family relationships for participant1, participant2 in iterate_pairwise(participants): gender = participant1.gender rel = 'mother' if gender == 'male': rel = 'father' r = FamilyRelationship(participant1=participant1, participant2=participant2, participant1_to_participant2_relation=rel) db.session.add(r) db.session.commit()