def _create_save_to_db(self): """ Create and save phenotype Requires creating a participant Create a phenotype and add it to participant as kwarg Save participant """ # Create study study = Study(external_id='phs001') # Create phenotype kwargs = { 'external_id': 'test_phenotype_0', 'source_text_phenotype': 'Hand Tremor', 'hpo_id_phenotype': 'HP:0002378', 'snomed_id_phenotype': '38033009', 'observed': 'Positive', 'age_at_event_days': 365 } ph = Phenotype(**kwargs) # Create and save participant with phenotype participant_id = 'Test subject 0' p = Participant(external_id=participant_id, phenotypes=[ph], is_proband=True, study=study) db.session.add(p) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = ph.kf_id return kwargs
def _create_phenotypes(self): """ Create phenotypes and required entities """ # Create Study study = Study(external_id='phs001') # Create two phenotypes pheno = ['test phenotype 1', 'test phenotype 2'] ph1 = Phenotype(source_text_phenotype=pheno[0], external_id='test_phenotype_0') ph2 = Phenotype(source_text_phenotype=pheno[1], external_id='test_phenotype_0') p = Participant(external_id='p1', is_proband=True, study=study) # Add to participant and save p.phenotypes.extend([ph1, ph2]) db.session.add(p) db.session.commit() return [ph1, ph2], p, pheno
def test_foreign_key_constraint(self): """ Test that a phenotype cannot be created without an existing reference Participant. This checks foreign key constraint """ # Create phenotype data = { 'source_text_phenotype': 'phenotype_1', 'participant_id': '' # empty blank foreign key } d = Phenotype(**data) # Add to db self.assertRaises(IntegrityError, db.session.add(d))
def test_not_null_constraint(self): """ Test that a phenotype cannot be created without required parameters such as participant_id """ # Create phenotype data = { 'source_text_phenotype': 'phenotype_1', # non-existent required param: participant_id } d = Phenotype(**data) # Add to db self.assertRaises(IntegrityError, db.session.add(d))
def _create_phenotypes(self, total): """ Create phenotypes """ phen_list = [] for i in range(total): ph = random.choice(self.phenotype_chosen_list) phen = { 'phenotype': ph[0], 'hpo_id': ph[1], 'observed': random.choice(self.observed_list), 'age_at_event_days': random.randint(0, 32872) } phen_list.append(Phenotype(**phen)) return phen_list
def test_create_and_find(self): """ Test create phenotype """ dt = datetime.now() # Create Study study = Study(external_id='phs001') # Create and save participant participant_id = 'Test subject 0' p = Participant(external_id=participant_id, is_proband=True, study=study) db.session.add(p) db.session.commit() kwarg_dict = {} # Create phenotypes for i in range(2): data = { 'external_id': 'test_phenotype_{}'.format(i), 'source_text_phenotype': 'test phenotype_{}'.format(i), 'hpo_id_phenotype': 'HP:0000118', 'snomed_id_phenotype': '38033009', 'age_at_event_days': 120, 'participant_id': p.kf_id } ph = Phenotype(**data) kwarg_dict[ph.external_id] = data db.session.add(ph) db.session.commit() self.assertEqual(Phenotype.query.count(), 2) for k, kwargs in kwarg_dict.items(): ph = Phenotype.query.filter_by(external_id=k).one() for key, value in kwargs.items(): self.assertEqual(value, getattr(ph, key)) self.assertGreater(ph.created_at, dt) self.assertGreater(ph.modified_at, dt) self.assertIs(type(uuid.UUID(ph.uuid)), uuid.UUID)
def participants(client): # Add a bunch of studies for pagination for i in range(101): s = Study(external_id='Study_{}'.format(i)) db.session.add(s) for i in range(101): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() for i in range(101): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators for _ in range(102): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] for i in range(101): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() participants = [] f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None for i in range(102): f = f0 if i < 50 else f1 s = s0 if i < 50 else s1 data = { 'external_id': "test", 'is_proband': True, 'race': 'Asian', 'ethnicity': 'Hispanic or Latino', 'diagnosis_category': 'Cancer', 'gender': 'Male' } p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id) diag = Diagnosis() p.diagnoses = [diag] outcome = Outcome() p.outcomes = [outcome] phen = Phenotype() p.phenotypes = [phen] participants.append(p) db.session.add(p) db.session.flush() seq_data = { 'external_id': 'Seq_0', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Test_platform_name_1' } gf_kwargs = { 'external_id': 'gf_0', 'file_name': 'hg38.fq', 'data_type': 'Aligned Reads', 'file_format': 'fastq', 'size': 1000, 'urls': ['s3://bucket/key'], 'hashes': { 'md5': str(uuid.uuid4()) }, 'controlled_access': False } seq_cen = SequencingCenter.query.filter_by(name="Baylor")\ .one_or_none() if seq_cen is None: seq_cen = SequencingCenter(external_id='SC_0', name="Baylor") db.session.add(seq_cen) db.session.flush() seq_exp = SequencingExperiment(**seq_data, sequencing_center_id=seq_cen.kf_id) db.session.add(seq_exp) samp = Biospecimen(analyte_type='an analyte', sequencing_center_id=seq_cen.kf_id, participant=p) db.session.add(samp) p.biospecimens = [samp] gf = GenomicFile(**gf_kwargs, sequencing_experiment_id=seq_exp.kf_id) db.session.add(gf) samp.genomic_files.append(gf) samp.diagnoses.append(diag) db.session.flush() rg = ReadGroup(lane_number=4, flow_cell='FL0123') rg.genomic_files.append(gf) ct = CavaticaTask(name='task_{}'.format(i)) ct.genomic_files.append(gf) ca.cavatica_tasks.append(ct) # Family relationships for participant1, participant2 in iterate_pairwise(participants): gender = participant1.gender rel = 'mother' if gender == 'male': rel = 'father' r = FamilyRelationship(participant1=participant1, participant2=participant2, participant1_to_participant2_relation=rel) db.session.add(r) db.session.commit()