def testExampleVcfDBAnnotationWithSNPExactMatch(self): """ """ tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"]) tabixIndexedVcfDatasource = DatasourceFactory.createDatasource( os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName) chrom = "20" start = "1110696" end = "1110696" ref_allele = "A" alt_allele = "T" build = "hg19" m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build) m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1) m1_annotation = m1_annotated.getAnnotation("ESP_AF") cur_annotation = Annotation(value="0.667", datasourceName="ESP", dataType="Float", description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT], number=-1) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_AC") cur_annotation = Annotation(value="2,4", datasourceName="ESP", dataType="Integer", description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_H2") cur_annotation = Annotation(value="False", datasourceName="ESP", dataType="Flag", description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=0) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") chrom = "20" start = "1230237" end = "1230237" ref_allele = "T" alt_allele = "A" build = "hg19" m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build) m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1) m1_annotation = m1_annotated.getAnnotation("ESP_NS") cur_annotation = Annotation(value="3", datasourceName="ESP", dataType="Integer", description="Number of Samples With Data", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=1) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_AF") cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float", description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT], number=-1) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
def _is_matching(self, mut, tsv_record): chrom = tsv_record[self.tsv_index["chrom"]] startPos = tsv_record[self.tsv_index["start"]] endPos = tsv_record[self.tsv_index["end"]] build = "hg19" if self.match_mode == "exact": if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present ref = tsv_record[self.tsv_index["ref"]] alt = tsv_record[self.tsv_index["alt"]] if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records # TODO: This looks risky to be calling the MutationData constructor directly ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build) else: # addresses tsv records where the input isn't a Mutation Annotation Format file ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build) if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): return True else: # do not use ref and alt information if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos): return True else: return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos)) return False
def _is_matching(self, mut, tsv_record): chrom = tsv_record[self.tsv_index["chrom"]] startPos = tsv_record[self.tsv_index["start"]] endPos = tsv_record[self.tsv_index["end"]] build = "hg19" if self.match_mode == "exact": if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present ref = tsv_record[self.tsv_index["ref"]] alt = tsv_record[self.tsv_index["alt"]] if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records # TODO: This looks risky to be calling the MutationData constructor directly ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build) else: # addresses tsv records where the input isn't a Mutation Annotation Format file ds_mut = MutUtils.initializeMutFromAttributes( chrom, startPos, endPos, ref, alt, build) if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): return True else: # do not use ref and alt information if mut.chr == chrom and int( mut.start) == int(startPos) and int( mut.end) == int(endPos): return True else: return TranscriptProviderUtils.test_overlap( int(mut.start), int(mut.end), int(startPos), int(endPos)) return False
def _createMutation(self, record, alt_index, build): chrom = MutUtils.convertChromosomeStringToMutationDataFormat( record.CHROM) startPos = int(record.POS) endPos = int(record.POS) ref = record.REF.strip() ref = "" if ref == "." else ref alt = ref if not record.is_monomorphic: alt = str(record.ALT[alt_index]).strip() mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build, self._mutation_data_factory) ID = "" if record.ID is None else record.ID mut.createAnnotation("id", ID, "INPUT", tags=[TagConstants.ID]) mut.createAnnotation("qual", str(record.QUAL), "INPUT", tags=[TagConstants.QUAL]) mut.createAnnotation("alt_allele_seen", str(True), "INPUT") if self.collapse_filter_fields: mut = self._add_filter_data_2_mutation_single_field(mut, record) else: mut = self._addFilterData2Mutation(mut, record) mut = self._addInfoData2Mutation(mut, record, alt_index) return mut
def _determine_matching_alt_indices(self, mut, record, build): """ :param mut: :param record: :return: """ indices = [] if record.is_monomorphic: chrom = MutUtils.convertChromosomeStringToMutationDataFormat( record.CHROM) startPos = record.POS endPos = record.POS ref_allele = record.REF if self.match_mode == "exact": if mut.chr == chrom and mut.ref_allele == ref_allele: indices = [-1] else: if mut.chr == chrom and int(mut.start) <= startPos and int( mut.end) >= endPos: indices = [-1] else: # Iterate over all alternates in the record for index in xrange(0, len(record.ALT)): chrom = MutUtils.convertChromosomeStringToMutationDataFormat( record.CHROM) startPos = record.POS endPos = record.POS ref = str(record.REF) alt = str(record.ALT[index]) ds_mut = MutUtils.initializeMutFromAttributes( chrom, startPos, endPos, ref, alt, build) if self.match_mode == "exact": if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): indices += [index] else: # cases whether the match mode isn't exact if mut.chr == ds_mut.chr and int(mut.start) == int( ds_mut.start) and int(mut.end) == int(ds_mut.end): indices += [index] elif mut.chr == ds_mut.chr and int(mut.start) >= int(ds_mut.start) \ and int(mut.end) >= int(ds_mut.end) and int(mut.start) <= int(ds_mut.end): indices += [index] elif mut.chr == ds_mut.chr and int(mut.start) <= int( ds_mut.start) and int(mut.end) >= int(ds_mut.end): indices += [index] elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) \ and int(mut.end) <= int(ds_mut.end) and int(mut.end) >= int(ds_mut.start): indices += [index] # if len(indices) == 0: # indices = [None] return indices
def _determine_matching_alt_indices(self, mut, record, build): """ :param mut: :param record: :return: """ indices = [] if record.is_monomorphic: chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM) startPos = record.POS endPos = record.POS ref_allele = record.REF if self.match_mode == "exact": if mut.chr == chrom and mut.ref_allele == ref_allele: indices = [-1] else: if mut.chr == chrom and int(mut.start) <= startPos and int(mut.end) >= endPos: indices = [-1] else: # Iterate over all alternates in the record for index in xrange(0, len(record.ALT)): chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM) startPos = record.POS endPos = record.POS ref = str(record.REF) alt = str(record.ALT[index]) ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build) if self.match_mode == "exact": if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): indices += [index] else: # cases whether the match mode isn't exact if mut.chr == ds_mut.chr and int(mut.start) == int(ds_mut.start) and int(mut.end) == int(ds_mut.end): indices += [index] elif mut.chr == ds_mut.chr and int(mut.start) >= int(ds_mut.start) \ and int(mut.end) >= int(ds_mut.end) and int(mut.start) <= int(ds_mut.end): indices += [index] elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) and int(mut.end) >= int(ds_mut.end): indices += [index] elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) \ and int(mut.end) <= int(ds_mut.end) and int(mut.end) >= int(ds_mut.start): indices += [index] # if len(indices) == 0: # indices = [None] return indices
def testExampleVcfDBAnnotationWithIndelAvgMatch(self): """ """ tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_avg", "hg19"]) tabixIndexedVcfDatasource = DatasourceFactory.createDatasource( os.path.join(tabixIndexedVcfDirName, "vcf_db_avg.config"), tabixIndexedVcfDirName) chrom = "4" start = "1234567" end = "1234567" ref_allele = "GTC" alt_allele = "GTCTTA" build = "hg19" m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build) m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1) m1_annotation = m1_annotated.getAnnotation("ESP_AF") cur_annotation = Annotation(value="0.5", datasourceName="ESP", dataType="Float", description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT], number=-1) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_AC") cur_annotation = Annotation(value="3.0", datasourceName="ESP", dataType="Float", description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_H2") cur_annotation = Annotation(value="False|False|False", datasourceName="ESP", dataType="String", description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=None) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_AA") cur_annotation = Annotation(value="T", datasourceName="ESP", dataType="String", description="Ancestral Allele", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=1) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_Z") cur_annotation = Annotation(value="2.0,3.0,3.0", datasourceName="ESP", dataType="Float", description="A random variable, Z", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=3) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
def testExampleVcfDBAnnotationWithMissingIndelExactMatch(self): """ """ tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"]) tabixIndexedVcfDatasource = DatasourceFactory.createDatasource( os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName) chrom = "21" start = "1234567" end = "1234567" ref_allele = "AGTC" alt_allele = "A" build = "hg19" m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build) m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1) m1_annotation = m1_annotated.getAnnotation("ESP_AF") cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float", description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT], number=-1) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_X") cur_annotation = Annotation(value="", datasourceName="ESP", dataType="String", description="A random variable, X", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=2) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_H2") cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Flag", description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=0) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_Y") cur_annotation = Annotation(value="", datasourceName="ESP", dataType="String", description="A random variable, Y", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=-2) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.") m1_annotation = m1_annotated.getAnnotation("ESP_Z") cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float", description="A random variable, Z", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT], number=3) self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
def _createMutation(self, record, alt_index, build): chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM) startPos = int(record.POS) endPos = int(record.POS) ref = record.REF ref = "" if ref == "." else ref alt = ref if not record.is_monomorphic: alt = str(record.ALT[alt_index]) mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build) ID = "" if record.ID is None else record.ID mut.createAnnotation("id", ID, "INPUT", tags=[TagConstants.ID]) mut.createAnnotation("qual", str(record.QUAL), "INPUT", tags=[TagConstants.QUAL]) mut.createAnnotation("alt_allele_seen", str(True), "INPUT") mut = self._addFilterData2Mutation(mut, record) mut = self._addInfoData2Mutation(mut, record, alt_index) return mut
def _is_matching(self, mut, tsv_record): chrom = tsv_record[self.tsv_index["chrom"]] startPos = tsv_record[self.tsv_index["start"]] endPos = tsv_record[self.tsv_index["end"]] build = "hg19" if self.match_mode == "exact": if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present ref = tsv_record[self.tsv_index["ref"]] alt = tsv_record[self.tsv_index["alt"]] if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build) else: # addresses tsv records where the input isn't a Mutation Annotation Format file ds_mut = MutUtils.initializeMutFromAttributes( chrom, startPos, endPos, ref, alt, build) if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): return True else: # do not use ref and alt information if mut.chr == chrom and int( mut.start) == int(startPos) and int( mut.end) == int(endPos): return True else: if mut.chr == chrom and int(mut.start) == int(startPos) and int( mut.end) == int(endPos): return True elif mut.chr == chrom and int(mut.start) >= int(startPos) and int(mut.end) >= int(endPos) \ and int(mut.start) <= int(endPos): return True elif mut.chr == chrom and int(mut.start) <= int(startPos) and int( mut.end) >= int(endPos): return True elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(mut.end) <= int(endPos) \ and int(mut.end) >= int(startPos): return True return False
def _is_matching(self, mut, tsv_record): chrom = tsv_record[self.tsv_index["chrom"]] startPos = tsv_record[self.tsv_index["start"]] endPos = tsv_record[self.tsv_index["end"]] build = "hg19" if self.match_mode == "exact": if "ref" in self.tsv_index and "alt" in self.tsv_index: # ref and alt information is present ref = tsv_record[self.tsv_index["ref"]] alt = tsv_record[self.tsv_index["alt"]] if ref == "-" or alt == "-": # addresses Mutation Annotation Format based tsv records ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build) else: # addresses tsv records where the input isn't a Mutation Annotation Format file ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build) if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \ and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \ and int(mut.end) == int(ds_mut.end): return True else: # do not use ref and alt information if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos): return True else: if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos): return True elif mut.chr == chrom and int(mut.start) >= int(startPos) and int(mut.end) >= int(endPos) \ and int(mut.start) <= int(endPos): return True elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(mut.end) >= int(endPos): return True elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(mut.end) <= int(endPos) \ and int(mut.end) >= int(startPos): return True return False