def testExampleVcfDBAnnotationWithSNPExactMatch(self):
        """

        """
        tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"])
        tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName)

        chrom = "20"
        start = "1110696"
        end = "1110696"
        ref_allele = "A"
        alt_allele = "T"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="0.667", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AC")
        cur_annotation = Annotation(value="2,4", datasourceName="ESP", dataType="Integer",
                                    description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_H2")
        cur_annotation = Annotation(value="False", datasourceName="ESP", dataType="Flag",
                                    description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=0)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        chrom = "20"
        start = "1230237"
        end = "1230237"
        ref_allele = "T"
        alt_allele = "A"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_NS")
        cur_annotation = Annotation(value="3", datasourceName="ESP", dataType="Integer",
                                    description="Number of Samples With Data",
                                    tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records

                    # TODO: This looks risky to be calling the MutationData constructor directly
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
                    return True
        else:
           return TranscriptProviderUtils.test_overlap(int(mut.start), int(mut.end), int(startPos), int(endPos))
        return False
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records

                    # TODO: This looks risky to be calling the MutationData constructor directly
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt,
                                          build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(
                        chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(
                        mut.start) == int(startPos) and int(
                            mut.end) == int(endPos):
                    return True
        else:
            return TranscriptProviderUtils.test_overlap(
                int(mut.start), int(mut.end), int(startPos), int(endPos))
        return False
Exemple #4
0
    def _createMutation(self, record, alt_index, build):
        chrom = MutUtils.convertChromosomeStringToMutationDataFormat(
            record.CHROM)
        startPos = int(record.POS)
        endPos = int(record.POS)
        ref = record.REF.strip()
        ref = "" if ref == "." else ref

        alt = ref
        if not record.is_monomorphic:
            alt = str(record.ALT[alt_index]).strip()

        mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos,
                                                   ref, alt, build,
                                                   self._mutation_data_factory)
        ID = "" if record.ID is None else record.ID
        mut.createAnnotation("id", ID, "INPUT", tags=[TagConstants.ID])
        mut.createAnnotation("qual",
                             str(record.QUAL),
                             "INPUT",
                             tags=[TagConstants.QUAL])
        mut.createAnnotation("alt_allele_seen", str(True), "INPUT")
        if self.collapse_filter_fields:
            mut = self._add_filter_data_2_mutation_single_field(mut, record)
        else:
            mut = self._addFilterData2Mutation(mut, record)
        mut = self._addInfoData2Mutation(mut, record, alt_index)
        return mut
Exemple #5
0
    def _determine_matching_alt_indices(self, mut, record, build):
        """

        :param mut:
        :param record:
        :return:
        """
        indices = []
        if record.is_monomorphic:
            chrom = MutUtils.convertChromosomeStringToMutationDataFormat(
                record.CHROM)
            startPos = record.POS
            endPos = record.POS
            ref_allele = record.REF

            if self.match_mode == "exact":
                if mut.chr == chrom and mut.ref_allele == ref_allele:
                    indices = [-1]
            else:
                if mut.chr == chrom and int(mut.start) <= startPos and int(
                        mut.end) >= endPos:
                    indices = [-1]
        else:
            # Iterate over all alternates in the record
            for index in xrange(0, len(record.ALT)):
                chrom = MutUtils.convertChromosomeStringToMutationDataFormat(
                    record.CHROM)
                startPos = record.POS
                endPos = record.POS
                ref = str(record.REF)
                alt = str(record.ALT[index])
                ds_mut = MutUtils.initializeMutFromAttributes(
                    chrom, startPos, endPos, ref, alt, build)

                if self.match_mode == "exact":
                    if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                        and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                        and int(mut.end) == int(ds_mut.end):
                        indices += [index]
                else:  # cases whether the match mode isn't exact
                    if mut.chr == ds_mut.chr and int(mut.start) == int(
                            ds_mut.start) and int(mut.end) == int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) >= int(ds_mut.start) \
                        and int(mut.end) >= int(ds_mut.end) and int(mut.start) <= int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) <= int(
                            ds_mut.start) and int(mut.end) >= int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) \
                        and int(mut.end) <= int(ds_mut.end) and int(mut.end) >= int(ds_mut.start):
                        indices += [index]

        # if len(indices) == 0:
        #     indices = [None]

        return indices
    def _determine_matching_alt_indices(self, mut, record, build):
        """

        :param mut:
        :param record:
        :return:
        """
        indices = []
        if record.is_monomorphic:
            chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
            startPos = record.POS
            endPos = record.POS
            ref_allele = record.REF

            if self.match_mode == "exact":
                if mut.chr == chrom and mut.ref_allele == ref_allele:
                    indices = [-1]
            else:
                if mut.chr == chrom and int(mut.start) <= startPos and int(mut.end) >= endPos:
                    indices = [-1]
        else:
            # Iterate over all alternates in the record
            for index in xrange(0, len(record.ALT)):
                chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
                startPos = record.POS
                endPos = record.POS
                ref = str(record.REF)
                alt = str(record.ALT[index])
                ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)

                if self.match_mode == "exact":
                    if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                        and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                        and int(mut.end) == int(ds_mut.end):
                        indices += [index]
                else:  # cases whether the match mode isn't exact
                    if mut.chr == ds_mut.chr and int(mut.start) == int(ds_mut.start) and int(mut.end) == int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) >= int(ds_mut.start) \
                        and int(mut.end) >= int(ds_mut.end) and int(mut.start) <= int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) and int(mut.end) >= int(ds_mut.end):
                        indices += [index]
                    elif mut.chr == ds_mut.chr and int(mut.start) <= int(ds_mut.start) \
                        and int(mut.end) <= int(ds_mut.end) and int(mut.end) >= int(ds_mut.start):
                        indices += [index]

        # if len(indices) == 0:
        #     indices = [None]

        return indices
    def testExampleVcfDBAnnotationWithIndelAvgMatch(self):
        """

        """
        tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_avg", "hg19"])
        tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedVcfDirName, "vcf_db_avg.config"), tabixIndexedVcfDirName)

        chrom = "4"
        start = "1234567"
        end = "1234567"
        ref_allele = "GTC"
        alt_allele = "GTCTTA"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="0.5", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AC")
        cur_annotation = Annotation(value="3.0", datasourceName="ESP", dataType="Float",
                                    description="Allele Count", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_H2")
        cur_annotation = Annotation(value="False|False|False", datasourceName="ESP", dataType="String",
                                    description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=None)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_AA")
        cur_annotation = Annotation(value="T", datasourceName="ESP", dataType="String",
                                    description="Ancestral Allele", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Z")
        cur_annotation = Annotation(value="2.0,3.0,3.0", datasourceName="ESP", dataType="Float",
                                    description="A random variable, Z", tags=[TagConstants.INFO,
                                                                              TagConstants.NOT_SPLIT], number=3)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
    def testExampleVcfDBAnnotationWithMissingIndelExactMatch(self):
        """

        """
        tabixIndexedVcfDirName = os.path.join(*["testdata", "vcf_db_exact", "hg19"])
        tabixIndexedVcfDatasource = DatasourceFactory.createDatasource(
            os.path.join(tabixIndexedVcfDirName, "vcf_db_exact.config"), tabixIndexedVcfDirName)

        chrom = "21"
        start = "1234567"
        end = "1234567"
        ref_allele = "AGTC"
        alt_allele = "A"
        build = "hg19"
        m1 = MutUtils.initializeMutFromAttributes(chrom, start, end, ref_allele, alt_allele, build)

        m1_annotated = tabixIndexedVcfDatasource.annotate_mutation(m1)

        m1_annotation = m1_annotated.getAnnotation("ESP_AF")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
                                    description="Allele Frequency", tags=[TagConstants.INFO, TagConstants.SPLIT],
                                    number=-1)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_X")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="String",
                                    description="A random variable, X", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=2)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_H2")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Flag",
                                    description="HapMap2 membership", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=0)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Y")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="String",
                                    description="A random variable, Y", tags=[TagConstants.INFO, TagConstants.NOT_SPLIT],
                                    number=-2)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")

        m1_annotation = m1_annotated.getAnnotation("ESP_Z")
        cur_annotation = Annotation(value="", datasourceName="ESP", dataType="Float",
                                    description="A random variable, Z", tags=[TagConstants.INFO,
                                                                              TagConstants.NOT_SPLIT], number=3)
        self.assertTrue(m1_annotation.isEqual(cur_annotation), "Annotations do not match.")
    def _createMutation(self, record, alt_index, build):
        chrom = MutUtils.convertChromosomeStringToMutationDataFormat(record.CHROM)
        startPos = int(record.POS)
        endPos = int(record.POS)
        ref = record.REF
        ref = "" if ref == "." else ref

        alt = ref
        if not record.is_monomorphic:
            alt = str(record.ALT[alt_index])

        mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)
        ID = "" if record.ID is None else record.ID
        mut.createAnnotation("id", ID, "INPUT", tags=[TagConstants.ID])
        mut.createAnnotation("qual", str(record.QUAL), "INPUT", tags=[TagConstants.QUAL])
        mut.createAnnotation("alt_allele_seen", str(True), "INPUT")
        mut = self._addFilterData2Mutation(mut, record)
        mut = self._addInfoData2Mutation(mut, record, alt_index)
        return mut
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt,
                                          build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(
                        chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(
                        mut.start) == int(startPos) and int(
                            mut.end) == int(endPos):
                    return True
        else:
            if mut.chr == chrom and int(mut.start) == int(startPos) and int(
                    mut.end) == int(endPos):
                return True
            elif mut.chr == chrom and int(mut.start) >= int(startPos) and int(mut.end) >= int(endPos) \
                and int(mut.start) <= int(endPos):
                return True
            elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(
                    mut.end) >= int(endPos):
                return True
            elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(mut.end) <= int(endPos) \
                and int(mut.end) >= int(startPos):
                return True

        return False
    def _is_matching(self, mut, tsv_record):

        chrom = tsv_record[self.tsv_index["chrom"]]
        startPos = tsv_record[self.tsv_index["start"]]
        endPos = tsv_record[self.tsv_index["end"]]
        build = "hg19"

        if self.match_mode == "exact":
            if "ref" in self.tsv_index and "alt" in self.tsv_index:  # ref and alt information is present
                ref = tsv_record[self.tsv_index["ref"]]
                alt = tsv_record[self.tsv_index["alt"]]
                if ref == "-" or alt == "-":  # addresses Mutation Annotation Format based tsv records
                    ds_mut = MutationData(chrom, startPos, endPos, ref, alt, build)
                else:  # addresses tsv records where the input isn't a Mutation Annotation Format file
                    ds_mut = MutUtils.initializeMutFromAttributes(chrom, startPos, endPos, ref, alt, build)

                if mut.chr == ds_mut.chr and mut.ref_allele == ds_mut.ref_allele \
                    and mut.alt_allele == ds_mut.alt_allele and int(mut.start) == int(ds_mut.start) \
                    and int(mut.end) == int(ds_mut.end):
                    return True
            else:  # do not use ref and alt information
                if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
                    return True
        else:
            if mut.chr == chrom and int(mut.start) == int(startPos) and int(mut.end) == int(endPos):
                return True
            elif mut.chr == chrom and int(mut.start) >= int(startPos) and int(mut.end) >= int(endPos) \
                and int(mut.start) <= int(endPos):
                return True
            elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(mut.end) >= int(endPos):
                return True
            elif mut.chr == chrom and int(mut.start) <= int(startPos) and int(mut.end) <= int(endPos) \
                and int(mut.end) >= int(startPos):
                return True

        return False