def test_check_project_and_sample(self): sa = SampleAdaptor(**{'session_class': self.session_class}) sample_data = [ { 'sample_igf_id': 'IGFS001', 'library_id': 'IGFS001', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS002', 'library_id': 'IGFS002', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS003', 'library_id': 'IGFS003', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, { 'sample_igf_id': 'IGFS004', 'library_id': 'IGFS004', 'project_igf_id': 'IGFP0001_test_22-8-2017_rna', }, ] sa.start_session() sa.store_sample_and_attribute_data(data=sample_data) sa1=sa.check_project_and_sample(project_igf_id='IGFP0001_test_22-8-2017_rna',\ sample_igf_id='IGFS001') self.assertEqual(sa1, True) sa2=sa.check_project_and_sample(project_igf_id='IGFP0001_test_22-8-2017_rna',\ sample_igf_id='IGFS0011') self.assertEqual(sa2, False) sa.close_session()
def _fetch_project_info_from_db(self): ''' An internal method for fetching data from db :returns: A dataframe containing following columns project_igf_id, sample_igf_id, expected_read, total_read ''' try: check_file_path(self.dbconfig_file) dbconf = read_dbconf_json(self.dbconfig_file) sa = SampleAdaptor(**dbconf) sa.start_session() query = sa.session.\ query(Project.project_igf_id, Sample.sample_igf_id, func.max(Sample_attribute.attribute_value).label(self.expected_read_tag), func.sum(Run_attribute.attribute_value).label(self.total_read_tag) ).\ outerjoin(Sample,Project.project_id==Sample.project_id).\ outerjoin(Sample_attribute, Sample.sample_id==Sample_attribute.sample_id).\ outerjoin(Experiment, Sample.sample_id==Experiment.sample_id).\ outerjoin(Run,Experiment.experiment_id==Run.experiment_id).\ outerjoin(Run_attribute,Run.run_id==Run_attribute.run_id).\ filter((Experiment.platform_name.in_(self.platform_list))|(Experiment.platform_name.is_(None))).\ filter(Sample_attribute.attribute_name==self.expected_read_tag).\ filter((Run_attribute.attribute_name==self.r1_read_tag)|(Run_attribute.attribute_name.is_(None))).\ group_by(Sample.sample_igf_id) records = sa.fetch_records(query=query, output_mode='dataframe') sa.close_session() records[self.total_read_tag] = records[self.total_read_tag].fillna(0).astype(int) return records except: raise
def setUp(self): self.dbconfig = 'data/dbconfig.json' dbparam = read_dbconf_json(self.dbconfig) base = BaseAdaptor(**dbparam) self.engine = base.engine self.dbname = dbparam['dbname'] Base.metadata.create_all(self.engine) self.session_class = base.get_session_class() # load platform data platform_data=\ [{"platform_igf_id" : "M03291" , "model_name" : "MISEQ" , "vendor_name" : "ILLUMINA" , "software_name" : "RTA" , "software_version" : "RTA1.18.54" }, {"platform_igf_id" : "NB501820", "model_name" : "NEXTSEQ", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }, {"platform_igf_id" : "K00345", "model_name" : "HISEQ4000", "vendor_name" : "ILLUMINA", "software_name" : "RTA", "software_version" : "RTA2" }] flowcell_rule_data=\ [{"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 SR", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}, {"platform_igf_id":"K00345", "flowcell_type":"HiSeq 3000/4000 PE", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"NB501820", "flowcell_type":"NEXTSEQ", "index_1":"NO_CHANGE", "index_2":"REVCOMP"}, {"platform_igf_id":"M03291", "flowcell_type":"MISEQ", "index_1":"NO_CHANGE", "index_2":"NO_CHANGE"}] pl = PlatformAdaptor(**{'session_class': base.session_class}) pl.start_session() pl.store_platform_data(data=platform_data) pl.store_flowcell_barcode_rule(data=flowcell_rule_data) pl.close_session() # load project data project_data = [{'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA'}] pa = ProjectAdaptor(**{'session_class': base.session_class}) pa.start_session() pa.store_project_and_attribute_data(data=project_data) pa.close_session() # load samples sample_data = [ { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', 'expected_read': 40000000 }, { 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797_1', 'expected_read': 40000000 }, ] sa = SampleAdaptor(**{'session_class': base.session_class}) sa.start_session() sa.store_sample_and_attribute_data(data=sample_data) sa.close_session() # load seqrun data seqrun_data = [{ 'flowcell_id': 'HV2GJBBXX', 'platform_igf_id': 'K00345', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX' }] sra = SeqrunAdaptor(**{'session_class': base.session_class}) sra.start_session() sra.store_seqrun_and_attribute_data(data=seqrun_data) sra.close_session() # load experiment data experiment_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'library_name': 'IGF109792', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109792', }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'library_name': 'IGF109793', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109793', }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'library_name': 'IGF109794', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109794', }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'library_name': 'IGF109795', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109795', }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'library_name': 'IGF109796', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109796', }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'library_name': 'IGF109797', 'platform_name': 'HISEQ4000', 'project_igf_id': 'IGFQ000472_avik_28-3-2018_RNA', 'sample_igf_id': 'IGF109797', }, ] ea = ExperimentAdaptor(**{'session_class': base.session_class}) ea.start_session() ea.store_project_and_attribute_data(data=experiment_data) ea.close_session() # load run data run_data=\ [{'experiment_igf_id': 'IGF109792_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109792_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':288046541 }, {'experiment_igf_id': 'IGF109793_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109793_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':14666330 }, {'experiment_igf_id': 'IGF109794_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109794_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':5009143 }, {'experiment_igf_id': 'IGF109795_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109795_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1391747 }, {'experiment_igf_id': 'IGF109796_HISEQ4000', 'lane_number': '7', 'run_igf_id': ' IGF109796_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1318008 }, {'experiment_igf_id': 'IGF109797_HISEQ4000', 'lane_number': '7', 'run_igf_id': 'IGF109797_HISEQ4000_H2N3MBBXY_7', 'seqrun_igf_id': '180518_K00345_0047_BHV2GJBBXX', 'R1_READ_COUNT':1216324 }, ] ra = RunAdaptor(**{'session_class': base.session_class}) ra.start_session() ra.store_run_and_attribute_data(data=run_data) ra.close_session()