Example #1
0
def test_offtarget_filter(
    test_scheme,
    setup_filter,
    get_test_file,
    get_empty_maf_record,
    vcf_region,
    end_position,
    expected,
):
    """
    Test offtarget filter
    """
    bed_file = [
        get_test_file("fake_regions.bed.gz"),
        get_test_file("fake_regions_2.bed.gz"),
    ]
    filterer = setup_filter(bed_file)
    maf_record = get_empty_maf_record
    maf_record["vcf_region"] = get_builder("vcf_region",
                                           test_scheme,
                                           value=vcf_region)
    maf_record["End_Position"] = get_builder("End_Position",
                                             test_scheme,
                                             value=end_position)
    result = filterer.filter(maf_record)
    assert result is expected
Example #2
0
    def fix_depths(self, maf_dic, tumor_only=False):
        """
        Sets the total depths of tumor/normal to be the sum of the ref and alt
        count columns if the sum of the ref and alt count columns is less than
        the dp.

        :param maf_dic: ``dict`` of the maf record to format
        :param tumor_only: ``True`` if there is no matched normal else ``False``
        :return: updated maf_dic with formatted depths
        """
        # fix depths
        tsum = maf_dic["t_ref_count"].value + maf_dic["t_alt_count"].value
        tdp = maf_dic["t_depth"].value
        if tsum > tdp:
            maf_dic["t_depth"] = get_builder("t_depth",
                                             self.scheme,
                                             value=tsum)

        if tumor_only is False:
            nsum = maf_dic["n_ref_count"].value + maf_dic["n_alt_count"].value
            ndp = maf_dic["n_depth"].value
            if nsum > ndp:
                maf_dic["n_depth"] = get_builder("n_depth",
                                                 self.scheme,
                                                 value=nsum)

        return maf_dic
def test_annotate_dbsnp(test_scheme, setup_annotator, get_test_file,
                        get_empty_maf_record):
    db_path = get_test_file('dbsnp_valstatus.db')
    annotator = setup_annotator(test_scheme, source=db_path)

    ## should match
    maf_record = get_empty_maf_record
    maf_record['dbSNP_RS'] = get_builder('dbSNP_RS',
                                         test_scheme,
                                         value='rs540')
    maf_record = annotator.annotate(maf_record)

    assert maf_record['dbSNP_Val_Status'].value == ['byOtherPop']

    ## novel should return empty list
    maf_record['dbSNP_RS'] = get_builder('dbSNP_RS',
                                         test_scheme,
                                         value='novel')
    maf_record['dbSNP_Val_Status'] = get_builder('dbSNP_Val_Status',
                                                 test_scheme,
                                                 value=None)
    maf_record = annotator.annotate(maf_record)

    assert maf_record['dbSNP_Val_Status'].value == []

    ## empty should return empty list
    maf_record['dbSNP_RS'] = get_builder('dbSNP_RS', test_scheme, value=None)
    maf_record['dbSNP_Val_Status'] = get_builder('dbSNP_Val_Status',
                                                 test_scheme,
                                                 value=None)
    maf_record = annotator.annotate(maf_record)

    assert maf_record['dbSNP_Val_Status'].value == []
Example #4
0
    def annotate(self, maf_record, vcf_record, var_allele_idx=1):
        region = "{0}:{1}-{2}".format(vcf_record.chrom, vcf_record.pos,
                                      vcf_record.pos + 1)
        alt = vcf_record.alleles[var_allele_idx]
        cosmic_ids = []
        for record in self.f.fetch(region=region):
            try:
                if (vcf_record.pos == record.pos
                        and vcf_record.ref == record.ref
                        and alt == record.alts[0]):
                    cosmic_ids.append(record.id)
            except TypeError:
                # Weirdly formatted COSMIC variants
                pass

        if cosmic_ids:
            if maf_record["dbSNP_RS"].value == ["novel"]:
                maf_record["dbSNP_RS"] = get_builder("dbSNP_RS",
                                                     self.scheme,
                                                     value=None)
            maf_record["COSMIC"] = get_builder(
                "COSMIC",
                self.scheme,
                value=";".join(sorted(list(set(cosmic_ids)))))
        else:
            maf_record["COSMIC"] = get_builder("COSMIC",
                                               self.scheme,
                                               value=None)

        return maf_record
Example #5
0
def test_entrez_symbol_and_feature(
    test_scheme,
    setup_annotator,
    get_test_file,
    get_empty_maf_record,
):

    # setup annotator
    json_path = get_test_file("ex_entrez.json")
    annotator = setup_annotator(test_scheme, entrez_json_file=json_path)

    init_maf_record = get_empty_maf_record
    init_maf_record[MAF_SYMBOL] = get_builder(MAF_SYMBOL,
                                              test_scheme,
                                              value='PRAMEF27',
                                              default='')
    init_maf_record[MAF_FEATURE] = get_builder(MAF_FEATURE,
                                               test_scheme,
                                               value='ENST00000436041',
                                               default='')

    # print(test_scheme.column_class('Entrez_Gene_Id').__name__)
    maf_record = annotator.annotate(init_maf_record)

    assert maf_record['Entrez_Gene_Id'].value == 101929983
def test_annotate_dbsnp(test_scheme, setup_annotator, get_test_file,
                        get_empty_maf_record):
    db_path = get_test_file("dbsnp_valstatus.db")
    annotator = setup_annotator(test_scheme, source=db_path)

    # should match
    maf_record = get_empty_maf_record
    maf_record["dbSNP_RS"] = get_builder("dbSNP_RS",
                                         test_scheme,
                                         value="rs540")
    maf_record = annotator.annotate(maf_record)

    assert maf_record["dbSNP_Val_Status"].value == ["byOtherPop"]

    # novel should return empty list
    maf_record["dbSNP_RS"] = get_builder("dbSNP_RS",
                                         test_scheme,
                                         value="novel")
    maf_record["dbSNP_Val_Status"] = get_builder("dbSNP_Val_Status",
                                                 test_scheme,
                                                 value=None)
    maf_record = annotator.annotate(maf_record)

    assert maf_record["dbSNP_Val_Status"].value == []

    # empty should return empty list
    maf_record["dbSNP_RS"] = get_builder("dbSNP_RS", test_scheme, value=None)
    maf_record["dbSNP_Val_Status"] = get_builder("dbSNP_Val_Status",
                                                 test_scheme,
                                                 value=None)
    maf_record = annotator.annotate(maf_record)

    assert maf_record["dbSNP_Val_Status"].value == []
Example #7
0
    def maf_from_first_element(self,
                               results,
                               callers,
                               star_callers=[],
                               tumor_only=False):
        """
        Simply creates a MAF record from the first record in each caller.
        """
        maf_dic = {}
        selected_caller = None
        for caller in self.caller_order():
            if caller in callers:
                selected_caller = results[caller][0]
                break

        for column in self.columns:
            if column in self.allele_columns() or column == "callers":
                continue

            elif column in self.average_columns(tumor_only=tumor_only):
                vals = []
                for caller in callers:
                    curr = results[caller]
                    if not curr:
                        continue
                    curr = curr[0]
                    vals.append(curr[column].value)
                maf_dic[column] = get_builder(column,
                                              self.scheme,
                                              value=self.do_mean_to_int(vals))

            elif column in self.combine_columns():
                vals = self.do_uniq_list(
                    [results[i][0] for i in results if results[i]], column)
                maf_dic[column] = get_builder(column, self.scheme, value=vals)

            # NOTE: Not a solution, just a temp place holder until we fully build out the RNA annotator
            elif column == 'RNA_Support':
                maf_dic[column] = get_builder(column,
                                              self.scheme,
                                              value='Unknown')

            # NOTE: Not a solution, just a temp place holder until we fully build out the RNA annotator
            elif column in (
                    'RNA_ref_count',
                    'RNA_alt_count',
                    'RNA_depth',
            ):
                maf_dic[column] = get_builder(column, self.scheme, value=None)

            else:
                maf_dic[column] = selected_caller[column]

        return self.format_dic_to_record(maf_dic,
                                         callers,
                                         star_callers=star_callers,
                                         tumor_only=tumor_only)
Example #8
0
def test_nonexonic_filter(test_scheme, setup_filter, get_test_file, get_empty_maf_record, 
                             vcf_region, end_position, expected):
    """
    Test nonexonic filter 
    """
    bed_file = get_test_file("fake_regions.bed.gz") 
    filterer = setup_filter(bed_file) 
    maf_record = get_empty_maf_record
    maf_record['vcf_region'] = get_builder('vcf_region', test_scheme, value=vcf_region)
    maf_record['End_Position'] = get_builder('End_Position', test_scheme, value=end_position)
    result = filterer.filter(maf_record)
    assert result is expected
def test_exac_filter_4(test_scheme, setup_filter, get_empty_maf_record):
    """
    Test exac filter when all but 1 freqs is above cutoff 
    """
    cutoff = 0.0004
    filterer = setup_filter(cutoff)
    maf_record = get_empty_maf_record
    for key in subpops:
        maf_record[key] = get_builder(key, test_scheme, value=0.0004)
    maf_record['nontcga_ExAC_AF_Adj'] = get_builder('nontcga_ExAC_AF_Adj',
                                                    test_scheme,
                                                    value=0.00041)
    result = filterer.filter(maf_record)
    assert result is True
Example #10
0
def test_hotspot_annotator_overlap(test_scheme, setup_annotator, get_test_file,
                                        get_empty_maf_record):
    """
    Is a hotspot 
    """
    tsv_path = get_test_file('fake_hotspot.tsv')
    annotator = setup_annotator(test_scheme, source=tsv_path) 

    maf_record = get_empty_maf_record
    maf_record['Hugo_Symbol'] = get_builder('Hugo_Symbol', test_scheme, value='ASXL1')
    maf_record['HGVSp_Short'] = get_builder('HGVSp_Short', test_scheme, value='p.R548fs')

    maf_record = annotator.annotate(maf_record)
     
    assert maf_record['hotspot'].value.value == 'Y'
Example #11
0
def test_hotspot_annotator_no_overlap_1(test_scheme, setup_annotator, get_test_file,
                                        get_empty_maf_record):
    """
    Not a hotspot 
    """
    tsv_path = get_test_file('fake_hotspot.tsv')
    annotator = setup_annotator(test_scheme, source=tsv_path) 

    maf_record = get_empty_maf_record
    maf_record['Hugo_Symbol'] = get_builder('Hugo_Symbol', test_scheme, value='Unknown')
    maf_record['HGVSp_Short'] = get_builder('HGVSp_Short', test_scheme, value=None)

    maf_record = annotator.annotate(maf_record)
     
    assert maf_record['hotspot'].value.value == 'N'
 def annotate(self, maf_record, vcf_record, tumor_sample):
     maf_record["Mutation_Status"] = get_builder(
         "Mutation_Status",
         self.scheme,
         value=self.mapper[self.caller](vcf_record, tumor_sample),
     )
     return maf_record
Example #13
0
    def format_dic_to_record(self,
                             maf_dic,
                             callers,
                             star_callers=[],
                             tumor_only=False):
        """
        Formats the dictionary into a MafRecord.
        """
        # Depths
        maf_dic = self.fix_depths(maf_dic, tumor_only=tumor_only)

        # Alleles
        maf_dic = self.standardize_alleles(maf_dic, tumor_only=tumor_only)

        # Callers
        _callers = callers + ['{0}*'.format(i) for i in star_callers]
        maf_dic['callers'] = get_builder('callers',
                                         self.scheme,
                                         value=sorted(_callers))

        # Create MafRecord
        maf_record = init_empty_maf_record()
        for column in self.columns:
            idx = self.scheme.column_index(name=column)
            col = maf_dic[column]
            col.column_index = idx
            maf_record[column] = col
        return maf_record
Example #14
0
def test_cosmic_ins(
    test_scheme,
    setup_annotator,
    get_test_file,
    get_empty_maf_record,
    vcf_gen,
    get_test_vcf_record,
):
    vcf_path = get_test_file("ex2.vcf.gz")
    annotator = setup_annotator(test_scheme, source=vcf_path)

    gen = vcf_gen("ex2.vcf.gz")

    # insertion
    record = gen.insertion

    # setup
    vcf_record = get_test_vcf_record(
        chrom=record.chrom,
        pos=record.pos,
        alleles=record.alleles,
        ref=record.ref,
        alts=record.alts,
    )
    maf_record = get_empty_maf_record
    maf_record["dbSNP_RS"] = get_builder("dbSNP_RS",
                                         test_scheme,
                                         value="novel")
    maf_record = annotator.annotate(maf_record, vcf_record, var_allele_idx=1)

    assert maf_record["COSMIC"].value == ["COSM0002"]
    assert maf_record["dbSNP_RS"].value == []
Example #15
0
    def annotate(self, maf_record, vcf_record, var_allele_idx=1):
        region = '{0}:{1}-{2}'.format(vcf_record.chrom, vcf_record.pos,
                                      vcf_record.stop)
        alt = vcf_record.alleles[var_allele_idx]
        res = {}
        raw_af = None
        adj_af = None
        for record in self.f.fetch(region=region):
            if vcf_record.pos == record.pos and \
               vcf_record.ref == record.ref and \
               alt in record.alts:
                e_allele_idx = record.alts.index(alt)
                for p in self.popkeys:
                    ac_key = 'AC_{0}'.format(p)
                    an_key = 'AN_{0}'.format(p)
                    ac = record.info[ac_key][e_allele_idx]
                    an = record.info[an_key]
                    if an:
                        af = ac / float(an)
                        res[p] = af
                    else:
                        res[p] = None

                if record.info['AN']:
                    raw_af = record.info['AC'][e_allele_idx] / float(
                        record.info['AN'])
                if record.info['AN_Adj']:
                    adj_af = record.info['AC_Adj'][e_allele_idx] / float(
                        record.info['AN_Adj'])
                break

        # Overall
        maf_record['nontcga_ExAC_AF'] = get_builder("nontcga_ExAC_AF",
                                                    self.scheme,
                                                    value=raw_af)
        maf_record['nontcga_ExAC_AF_Adj'] = get_builder("nontcga_ExAC_AF_Adj",
                                                        self.scheme,
                                                        value=adj_af)

        # pops
        for p in self.popkeys:
            key = 'nontcga_ExAC_AF_{0}'.format(p)
            maf_record[key] = get_builder(key, self.scheme, value=res.get(p))

        return maf_record
Example #16
0
    def annotate(self, maf_record, vcf_record, var_allele_idx=1):
        """
        Annotate each variant with AF records from GnomAD
        """
        region = "{0}:{1}-{2}".format(vcf_record.chrom, vcf_record.pos,
                                      vcf_record.stop)
        alt = vcf_record.alleles[var_allele_idx]
        found = False

        for record in self.f.fetch(region=region):
            if (vcf_record.pos == record.pos and vcf_record.ref == record.ref
                    and alt in record.alts):
                found = True
                for source_col, maf_col in GNOMAD_SRC_TO_MAF.items():
                    value = record.info.get(source_col)
                    default = ''

                    if source_col == "POP_MAX_non_cancer_adj" and value is not None:
                        value = list(value)
                        default = []

                    elif isinstance(value, tuple):
                        value = value[0]

                    maf_record[maf_col] = get_builder(maf_col,
                                                      self.scheme,
                                                      value=value,
                                                      default=default)

                break

        if found:
            return maf_record
        else:
            for source_col, maf_col in GNOMAD_SRC_TO_MAF.items():
                default = ''

                if source_col == "POP_MAX_non_cancer_adj":
                    default = []

                maf_record[maf_col] = get_builder(maf_col,
                                                  self.scheme,
                                                  value=default)
            return maf_record
Example #17
0
def test_hotspot_annotator_no_overlap_1(test_scheme, setup_annotator,
                                        get_test_file, get_empty_maf_record):
    """
    Not a hotspot
    """
    tsv_path = get_test_file("fake_hotspot.tsv")
    annotator = setup_annotator(test_scheme, source=tsv_path)

    maf_record = get_empty_maf_record
    maf_record["Hugo_Symbol"] = get_builder("Hugo_Symbol",
                                            test_scheme,
                                            value="Unknown")
    maf_record["HGVSp_Short"] = get_builder("HGVSp_Short",
                                            test_scheme,
                                            value=None)

    maf_record = annotator.annotate(maf_record)

    assert maf_record["hotspot"].value.value == "N"
Example #18
0
def test_hotspot_annotator_overlap(test_scheme, setup_annotator, get_test_file,
                                   get_empty_maf_record):
    """
    Is a hotspot
    """
    tsv_path = get_test_file("fake_hotspot.tsv")
    annotator = setup_annotator(test_scheme, source=tsv_path)

    maf_record = get_empty_maf_record
    maf_record["Hugo_Symbol"] = get_builder("Hugo_Symbol",
                                            test_scheme,
                                            value="ASXL1")
    maf_record["HGVSp_Short"] = get_builder("HGVSp_Short",
                                            test_scheme,
                                            value="p.R548fs")

    maf_record = annotator.annotate(maf_record)

    assert maf_record["hotspot"].value.value == "Y"
Example #19
0
    def maf_from_first_element(self,
                               results,
                               callers,
                               star_callers=[],
                               tumor_only=False):
        """
        Simply creates a MAF record from the first record in each caller.
        """
        maf_dic = {}
        selected_caller = None
        for caller in self.caller_order():
            if caller in callers:
                selected_caller = results[caller][0]
                break

        for column in self.columns:
            if column in self.allele_columns() or column == 'callers':
                continue

            elif column in self.average_columns():
                vals = []
                for caller in callers:
                    curr = results[caller]
                    if not curr: continue
                    curr = curr[0]
                    vals.append(curr[column].value)
                maf_dic[column] = get_builder(column,
                                              self.scheme,
                                              value=self.do_mean_to_int(vals))

            elif column in self.combine_columns():
                vals = self.do_uniq_list(
                    [results[i][0] for i in results if results[i]], column)
                maf_dic[column] = get_builder(column, self.scheme, value=vals)

            else:
                maf_dic[column] = selected_caller[column]

        return self.format_dic_to_record(maf_dic,
                                         callers,
                                         star_callers=star_callers,
                                         tumor_only=tumor_only)
def test_exac_filter_3(test_scheme, setup_filter, get_empty_maf_record):
    """
    Test exac filter when all freqs are exactly cutoff 
    """
    cutoff = 0.0004
    filterer = setup_filter(cutoff)
    maf_record = get_empty_maf_record
    for key in subpops:
        maf_record[key] = get_builder(key, test_scheme, value=0.0004)
    result = filterer.filter(maf_record)
    assert result is False
def test_normal_depth_filter(
    test_scheme, setup_filter, get_empty_maf_record, normal_depth, expected
):
    """
    Test Normal Depth filter
    """
    filterer = setup_filter(7)
    maf_record = get_empty_maf_record
    maf_record["n_depth"] = get_builder("n_depth", test_scheme, value=normal_depth)
    result = filterer.filter(maf_record)
    assert result is expected
def test_multiallelic_filter(test_scheme, setup_filter, get_empty_maf_record,
                             vcf_region, expected):
    """
    Test multiallelic filter
    """
    filterer = setup_filter()
    maf_record = get_empty_maf_record
    maf_record["vcf_region"] = get_builder("vcf_region",
                                           test_scheme,
                                           value=vcf_region)
    result = filterer.filter(maf_record)
    assert result is expected
def test_blacklist_filter(test_scheme, setup_filter, get_test_file, 
                    get_empty_maf_record, tumor_uuid, expected_bool, expected_tags):
    """
    Test blacklist filter
    """
    tsv_path = get_test_file('fake_blacklist.tsv') 
    filterer = setup_filter(tsv_path)
    maf_record = get_empty_maf_record
    maf_record['Tumor_Sample_UUID'] = get_builder('Tumor_Sample_UUID', test_scheme, value=tumor_uuid)
    result = filterer.filter(maf_record)
    assert result is expected_bool 
    assert filterer.tags == expected_tags
Example #24
0
    def standardize_alleles(self, maf_dic, tumor_only=False):
        """
        Helper utility to standardize all alleles to Ref/Alt tumor and
        Ref/Ref normal.

        :param maf_dic: ``dict`` of the maf record to format
        :param tumor_only: ``True`` if there is no matched normal else ``False``
        :return: updated maf_dic with formatted alleles
        """
        ref = maf_dic["Reference_Allele"].value
        alt = maf_dic["Allele"].value
        maf_dic["Tumor_Seq_Allele1"] = get_builder("Tumor_Seq_Allele1",
                                                   self.scheme,
                                                   value=ref)
        maf_dic["Tumor_Seq_Allele2"] = get_builder("Tumor_Seq_Allele2",
                                                   self.scheme,
                                                   value=alt)
        if tumor_only is False:
            maf_dic["Match_Norm_Seq_Allele1"] = get_builder(
                "Match_Norm_Seq_Allele1", self.scheme, value=ref)
            maf_dic["Match_Norm_Seq_Allele2"] = get_builder(
                "Match_Norm_Seq_Allele2", self.scheme, value=ref)
        else:
            maf_dic["Match_Norm_Seq_Allele1"] = get_builder(
                "Match_Norm_Seq_Allele1", self.scheme, value=None)
            maf_dic["Match_Norm_Seq_Allele2"] = get_builder(
                "Match_Norm_Seq_Allele2", self.scheme, value=None)
        return maf_dic
Example #25
0
 def annotate(self, maf_record):
     gene = maf_record['Hugo_Symbol'].value
     mval = "N"
     if gene in self.data:
         hgvsp = None if not maf_record['HGVSp_Short'].value else \
             maf_record['HGVSp_Short'].value.lstrip('p.')
         if hgvsp and 'fs*' in hgvsp:
             idx = hgvsp.index('fs')
             hgvsp = hgvsp[:idx - 1] + 'fs'
         if hgvsp and hgvsp in self.data[gene]:
             mval = "Y"
     maf_record['hotspot'] = get_builder("hotspot", self.scheme, value=mval)
     return maf_record
Example #26
0
 def annotate(self, maf_record):
     gene = maf_record["Hugo_Symbol"].value
     mval = "N"
     if gene in self.data:
         hgvsp = (None if not maf_record["HGVSp_Short"].value else
                  maf_record["HGVSp_Short"].value.lstrip("p."))
         if hgvsp and "fs*" in hgvsp:
             idx = hgvsp.index("fs")
             hgvsp = hgvsp[:idx - 1] + "fs"
         if hgvsp and hgvsp in self.data[gene]:
             mval = "Y"
     maf_record["hotspot"] = get_builder("hotspot", self.scheme, value=mval)
     return maf_record
Example #27
0
def test_pon_filter(test_scheme, setup_filter, get_test_file,
                    get_empty_maf_record, vcf_region, expected):
    """
    Test pon filter
    """
    vcf_path = get_test_file("fake_exac.vcf.gz")
    filterer = setup_filter(vcf_path)
    maf_record = get_empty_maf_record
    maf_record["vcf_region"] = get_builder("vcf_region",
                                           test_scheme,
                                           value=vcf_region)
    result = filterer.filter(maf_record)
    assert result is expected
def test_filter_gnomad_null(test_scheme, setup_filter, get_empty_maf_record):
    """
    Test FilterGnomAD when data is null
    """
    cutoff = 0.0004
    filterer = setup_filter(cutoff)
    maf_record = get_empty_maf_record
    for key in GNOMAD_MAF_COLUMNS:
        value = ''
        if key == 'gnomAD_non_cancer_MAX_AF_POPS_adj':
            value = []
        maf_record[key] = get_builder(key, test_scheme, value=value)
    result = filterer.filter(maf_record)
    assert result is False
 def annotate(self, maf_record, vcf_record, strip_chr=False):
     # Add reference context
     if strip_chr:
         region = '{0}:{1}-{2}'.format(
             vcf_record.chrom.replace('chr', '')
             if vcf_record.chrom != 'chrM' else 'MT',
             max(1, vcf_record.pos - self.context_size),
             vcf_record.stop + self.context_size)
     else:
         region = '{0}:{1}-{2}'.format(
             vcf_record.chrom, max(1, vcf_record.pos - self.context_size),
             vcf_record.stop + self.context_size)
     maf_record['CONTEXT'] = get_builder("CONTEXT",
                                         self.scheme,
                                         value=self.fa.fetch(region=region))
     return maf_record
 def write_record(self, record):
     """
     Helper function to write out the formatted merged public record.
     """
     self.metrics.collect_output(record)
     to_null = ('Match_Norm_Seq_Allele1', 'Match_Norm_Seq_Allele2',
                'Match_Norm_Validation_Allele1',
                'Match_Norm_Validation_Allele2', 'n_ref_count',
                'n_alt_count')
     new_record = init_empty_maf_record()
     for column in self._columns:
         if column in to_null:
             new_record[column] = get_builder(column,
                                              self._scheme,
                                              value=None)
         else:
             new_record[column] = record[column]
     self.maf_writer += new_record