def test_common_mutations3(self): """ to check if common mutations between three patients are correctly retrieved """ self.init_test(self.current_func_name) abs_vcf_db = self.__create_db_instance() test_file = os.path.join(self.data_dir, self.current_func_name + ".vcf.gz") test_chrom = 18 test_begin_pos = 12510000 test_end_pos = 14515000 vcf_db = VcfDB() vcf_db.open_db(test_file, test_chrom, test_begin_pos, test_end_pos) abs_vcf_db.add_connector(vcf_db) common_mutations = abs_vcf_db.common_mutations(["134/06", "Co1584", "Co1591"]) # *************** test keys ****************** self.assertEqual(len(common_mutations.keys()), 3, "Incorrect number of common mutation keys") self.assertTrue("18|12512255" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|12512281" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|12512294" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|12512309" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|12512370" in common_mutations, "Incorrect common mutation key") self.assertTrue("18|12512385" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|14513526" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|14513529" in common_mutations, "Incorrect common mutation key") self.assertTrue("18|14513535" in common_mutations, "Incorrect common mutation key") self.assertTrue("18|14513545" not in common_mutations, "Incorrect common mutation key") self.assertTrue("18|14513570" not in common_mutations, "Incorrect common mutation key")
def test_mutations_with_target_patients(self): """ With target patients specified, the mutations table should have only mutations from the target patients, and should filter out the blank (.\.) ones """ self.init_test(self.current_func_name) abs_vcf_db = self.__create_db_instance(["729-05o", "Co1207", "Co1368"]) test_file = os.path.join(self.data_dir, self.current_func_name + ".vcf.gz") test_chrom = 18 test_begin_pos = 12702537 test_end_pos = "12703020" vcf_db = VcfDB() vcf_db.open_db( test_file, test_chrom, test_begin_pos, test_end_pos, patient_codes=["729-05o", "Co1207", "Co1368"] ) abs_vcf_db.add_connector(vcf_db) mutations = abs_vcf_db.mutations # *************** test keys ****************** self.assertEqual(len(mutations.keys()), 4, "Incorrect number of mutation keys") self.assertTrue("18|12702537" not in mutations, "Incorrect mutation key") self.assertTrue("18|12702705" in mutations, "Incorrect mutation key") self.assertTrue("18|12884315" not in mutations, "Incorrect mutation key") self.assertTrue("18|12702536" not in mutations, "Incorrect mutation key") # *************** test contents ****************** self.assertEqual(len(mutations), 4, "Incorrect number of mutations") self.assertEqual(len(mutations["18|12702705"].genotype_fields), 3, "Incorrect number of genotype fields") self.assertEqual( mutations["18|12702705"].genotype_fields["Co1207"].raw_content, "./.", "Invalid data in genotype field" ) self.assertEqual( mutations["18|12702610"].genotype_fields["729-05o"].raw_content, "0/1:12,16:28:99:271,0,248", "Invalid data in genotype field", ) self.assertEqual( mutations["18|12702705"].genotype_fields["Co1368"].raw_content, "0/1:7,6:13:99:135,0,210", "Invalid data in genotype field", )
def test_patients(self): """ to check if patients are correctly retrieved """ self.init_test(self.current_func_name) abs_vcf_db = self.__create_db_instance() test_file = os.path.join(self.data_dir, self.current_func_name + ".vcf.gz") test_chrom1 = 18 test_begin_pos1 = 12702537 test_end_pos1 = "12703020" vcf_db1 = VcfDB() vcf_db1.open_db(test_file, test_chrom1, test_begin_pos1, test_end_pos1) abs_vcf_db.add_connector(vcf_db1) test_chrom2 = 18 test_begin_pos2 = 12884105 test_end_pos2 = 12884315 vcf_db2 = VcfDB() vcf_db2.open_db(test_file, test_chrom2, test_begin_pos2, test_end_pos2) abs_vcf_db.add_connector(vcf_db2) patients = abs_vcf_db.patients # *************** test keys ****************** self.assertEqual(len(patients), 77, "Incorrect number of patients") self.assertTrue("1052/05" in patients.keys(), "Incorrect patient code") self.assertTrue("398-05o" in patients.keys(), "Incorrect patient code") self.assertTrue("Co866" in patients.keys(), "Incorrect patient code") self.assertTrue("co131" in patients.keys(), "Incorrect patient code")
def test_mutations_from_two_VcfDB(self): """ to check if mutations are correctly retrieved """ self.init_test(self.current_func_name) abs_vcf_db = self.__create_db_instance() test_file = os.path.join(self.data_dir, self.current_func_name + ".vcf.gz") test_chrom1 = 18 test_begin_pos1 = 12702537 test_end_pos1 = "12703020" vcf_db1 = VcfDB() vcf_db1.open_db(test_file, test_chrom1, test_begin_pos1, test_end_pos1) abs_vcf_db.add_connector(vcf_db1) test_chrom2 = 18 test_begin_pos2 = 12884105 test_end_pos2 = 12884315 vcf_db2 = VcfDB() vcf_db2.open_db(test_file, test_chrom2, test_begin_pos2, test_end_pos2) abs_vcf_db.add_connector(vcf_db2) mutations = abs_vcf_db.mutations # *************** test keys ****************** self.assertEqual(len(mutations.keys()), 10, "Incorrect number of mutation keys") self.assertTrue("18|12702705" in mutations, "Incorrect mutation key") self.assertTrue("18|12884315" in mutations, "Incorrect mutation key") self.assertTrue("18|12702536" not in mutations, "Incorrect mutation key") # *************** test contents ****************** self.assertEqual(len(mutations), 10, "Incorrect number of mutations") self.assertEqual(mutations["18|12884315"].ref, "C", "Incorrect mutation content") self.assertEqual(mutations["18|12702610"].vcf_id, "rs4797701", "Incorrect mutation content") self.assertEqual( mutations["18|12884105"].genotype_fields["354/06"].raw_content, "0/0:12,0:12:30.09:0,30,377", "Invalid data in genotype field", ) self.assertEqual( mutations["18|12884105"].genotype_fields["398-05o"].raw_content, "./.", "Invalid data in genotype field" ) self.assertEqual( mutations["18|12884105"].genotype_fields["Co866"].raw_content, "0/0:14,0:14:33.10:0,33,394", "Invalid data in genotype field", )
def __connect_vcf_db(self, vcf_db_gz_file, chrom, begin_pos, end_pos, patient_codes=None): self.info("creating vcf db connection to " + vcf_db_gz_file) # self.info("\t\tchrom: " + str(chrom) + "\tbegin pos: " + str(begin_pos) + "\tend_pos:" + str(end_pos)) vcf_db = VcfDB() vcf_db.open_db(vcf_db_gz_file, chrom, begin_pos, end_pos, patient_codes=patient_codes) self.__abs_vcf_db.add_connector(vcf_db)