def _create_entities(self): # Create study study = Study(external_id='phs001') # Create participant p = Participant(external_id='p1', is_proband=True, study=study) # Create sequencing_center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create sequencing experiments se1 = SequencingExperiment(**self._make_seq_exp('se1'), sequencing_center_id=sc.kf_id) se2 = SequencingExperiment(**self._make_seq_exp('se2'), sequencing_center_id=sc.kf_id) # Create biospecimen bs = Biospecimen(external_sample_id='bio1', analyte_type='dna', participant_id=p.kf_id, sequencing_center_id=sc.kf_id) # Create genomic files gfs = [] for i in range(4): kwargs = { 'file_name': 'file_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': {'md5': str(uuid.uuid4())}, 'controlled_access': True, 'is_harmonized': True, 'reference_genome': 'Test01' } gf = GenomicFile(**kwargs, sequencing_experiment_id=se1.kf_id) if i % 2: se1.genomic_files.append(gf) else: se2.genomic_files.append(gf) gfs.append(gf) bs.genomic_files = gfs p.biospecimens = [bs] db.session.add(p) db.session.commit()
def _create_participants_and_dependents(self): """ Create participant with required entities """ # Create study study = Study(external_id='phs001') names = ['Fred', 'Wilma', 'Pebbles', 'Dino'] proband = [True, False] participants = [] for i, _name in enumerate(names): # Participants p = Participant(external_id=_name, is_proband=random.choice(proband), study=study) db.session.add(p) db.session.commit() # Sequencing center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # SequencingExperiment se = self._create_experiment('se_{}'.format(i), sequencing_center_id=sc.kf_id) # Biospecimen s = self._create_biospecimen('s_{}'.format(i), sequencing_center_id=sc.kf_id, participant_id=p.kf_id) # Input GF gf_in = self._create_genomic_file( 'gf_{}_in'.format(i), sequencing_experiment_id=se.kf_id) # Output GF gf_out = self._create_genomic_file( 'gf_{}_out'.format(i), data_type='aligned read', sequencing_experiment_id=se.kf_id) s.genomic_files = [gf_in, gf_out] p.biospecimens = [s] participants.append(p) return participants
def participants(client): # Add a bunch of studies for pagination for i in range(101): s = Study(external_id='Study_{}'.format(i)) db.session.add(s) for i in range(101): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() for i in range(101): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators for _ in range(102): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] for i in range(101): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() participants = [] f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None for i in range(102): f = f0 if i < 50 else f1 s = s0 if i < 50 else s1 data = { 'external_id': "test", 'is_proband': True, 'race': 'Asian', 'ethnicity': 'Hispanic or Latino', 'diagnosis_category': 'Cancer', 'gender': 'Male' } p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id) diag = Diagnosis() p.diagnoses = [diag] outcome = Outcome() p.outcomes = [outcome] phen = Phenotype() p.phenotypes = [phen] participants.append(p) db.session.add(p) db.session.flush() seq_data = { 'external_id': 'Seq_0', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Test_platform_name_1' } gf_kwargs = { 'external_id': 'gf_0', 'file_name': 'hg38.fq', 'data_type': 'Aligned Reads', 'file_format': 'fastq', 'size': 1000, 'urls': ['s3://bucket/key'], 'hashes': { 'md5': str(uuid.uuid4()) }, 'controlled_access': False } seq_cen = SequencingCenter.query.filter_by(name="Baylor")\ .one_or_none() if seq_cen is None: seq_cen = SequencingCenter(external_id='SC_0', name="Baylor") db.session.add(seq_cen) db.session.flush() seq_exp = SequencingExperiment(**seq_data, sequencing_center_id=seq_cen.kf_id) db.session.add(seq_exp) samp = Biospecimen(analyte_type='an analyte', sequencing_center_id=seq_cen.kf_id, participant=p) db.session.add(samp) p.biospecimens = [samp] gf = GenomicFile(**gf_kwargs, sequencing_experiment_id=seq_exp.kf_id) db.session.add(gf) samp.genomic_files.append(gf) samp.diagnoses.append(diag) db.session.flush() rg = ReadGroup(lane_number=4, flow_cell='FL0123') rg.genomic_files.append(gf) ct = CavaticaTask(name='task_{}'.format(i)) ct.genomic_files.append(gf) ca.cavatica_tasks.append(ct) # Family relationships for participant1, participant2 in iterate_pairwise(participants): gender = participant1.gender rel = 'mother' if gender == 'male': rel = 'father' r = FamilyRelationship(participant1=participant1, participant2=participant2, participant1_to_participant2_relation=rel) db.session.add(r) db.session.commit()