def test_invalid_same_variant_defined_in_two_rows_in_hgvs_pro(self): hgvs = generate_hgvs(prefix="p") data = "{},{}\n{},1.0\n{},1.0".format(self.HGVS_PRO_COL, "count", hgvs, hgvs) dataset = MaveDataset.for_counts(StringIO(data)) dataset.validate() self.assertFalse(dataset.is_valid) self.assertEqual(len(dataset.errors), 1) print(dataset.errors)
def test_defines_same_variants(self): tests = [ ( "{},{}\nc.1A>G,0.0".format(self.HGVS_NT_COL, self.SCORE_COL), "{},count\nc.1A>G,0.0".format(self.HGVS_NT_COL), True, ), ( "{},{}\nc.1A>G,0.0".format(self.HGVS_NT_COL, self.SCORE_COL), "{},count\nc.2A>G,0.0".format(self.HGVS_NT_COL), False, ), ( "{},{},{}\nc.1A>G,p.Ile1Val,0.0".format( self.HGVS_NT_COL, self.HGVS_PRO_COL, self.SCORE_COL), "{},{},count\nc.1A>G,p.Ile1Val,0.0".format( self.HGVS_NT_COL, self.HGVS_PRO_COL), True, ), ( "{},{},{}\nc.1A>G,p.Ile1Val,0.0".format( self.HGVS_NT_COL, self.HGVS_PRO_COL, self.SCORE_COL), "{},{},count\nc.1A>G,p.Ile1Phe,0.0".format( self.HGVS_NT_COL, self.HGVS_PRO_COL), False, ), # Check returns None if either dataset invalid ( "wrong_columns,{}\nc.1A>G,0.0".format(self.SCORE_COL), "{},count\nc.1A>G,0.0".format(self.HGVS_NT_COL), None, ), ( "{},{}\nc.1A>G,0.0".format(self.HGVS_NT_COL, self.SCORE_COL), "wrong_column,count\nc.1A>G,0.0".format(), None, ), ] for (scores, counts, expected) in tests: with self.subTest(msg=(scores, counts, expected)): scores_dataset = MaveDataset.for_scores(StringIO(scores)) scores_dataset.validate() counts_dataset = MaveDataset.for_counts(StringIO(counts)) counts_dataset.validate() self.assertEqual(scores_dataset.match_other(counts_dataset), expected)
def test_invalid_no_additional_columns_outside_hgvs_ones(self): data = "{},{},{}\n{},{},{}".format( self.HGVS_NT_COL, self.HGVS_SPLICE_COL, self.HGVS_PRO_COL, generate_hgvs(prefix="g"), generate_hgvs(prefix="c"), generate_hgvs(prefix="p"), ) dataset = MaveDataset.for_counts(StringIO(data)) dataset.validate() self.assertFalse(dataset.is_valid) self.assertEqual(len(dataset.errors), 1) print(dataset.errors)