Exemple #1
0
def get_variant_from_vcf_fields(vcf_fields, alt_allele_pos):
    """
    Get a basic variant from vcf_fields, for allele given by alt_allele_pos
    """

    chrom = vcf_fields[0] if 'chr' in vcf_fields[0] else 'chr' + vcf_fields[0]
    pos = int(vcf_fields[1])

    # if we can't get a genomic location, just ignore it and print a message humans will ignore too
    # obviously need a better way to approach this
    if not genomeloc.valid_pos(chrom, pos):
        print "ERROR: could not figure out coordinates for %s:%d...maybe a nonstandard chromosome?" % (chrom, pos)
        return None

    ref = vcf_fields[3]
    orig_alt_alleles = vcf_fields[4].split(',')
    alt = orig_alt_alleles[alt_allele_pos]

    xpos = genomeloc.get_single_location(chrom, pos)
    xpos, ref, alt = get_minimal_representation(xpos, ref, alt)

    variant = Variant(xpos, ref, alt)
    variant.set_extra('alt_allele_pos', alt_allele_pos)
    variant.set_extra('orig_alt_alleles', orig_alt_alleles)

    if vcf_fields[2] and vcf_fields[2] != '.':
        variant.vcf_id = vcf_fields[2]

    return variant
Exemple #2
0
def get_variant_from_vcf_fields(vcf_fields, alt_allele_pos):
    """
    Get a basic variant from vcf_fields, for allele given by alt_allele_pos
    """

    chrom = vcf_fields[0] if 'chr' in vcf_fields[0] else 'chr' + vcf_fields[0]
    pos = int(vcf_fields[1])

    # if we can't get a genomic location, just ignore it and print a message humans will ignore too
    # obviously need a better way to approach this
    if not genomeloc.valid_pos(chrom, pos):
        print "ERROR: could not figure out coordinates for %s:%d...maybe a nonstandard chromosome?" % (
            chrom, pos)
        return None

    ref = vcf_fields[3]
    orig_alt_alleles = vcf_fields[4].split(',')
    alt = orig_alt_alleles[alt_allele_pos]

    xpos = genomeloc.get_single_location(chrom, pos)
    xpos, ref, alt = get_minimal_representation(xpos, ref, alt)

    variant = Variant(xpos, ref, alt)
    variant.set_extra('alt_allele_pos', alt_allele_pos)
    variant.set_extra('orig_alt_alleles', orig_alt_alleles)

    if vcf_fields[2] and vcf_fields[2] != '.':
        variant.vcf_id = vcf_fields[2]

    return variant
Exemple #3
0
def get_exac_af(chrom, pos, ref, alt):
    populations = ['AMR', 'EAS', 'FIN', 'NFE', 'SAS', 'AFR']

    chrom_without_chr = chrom.replace("chr", "")
    xpos = genomeloc.get_single_location(chrom, pos)
    variant_length = len(ref) + len(alt)

    # check whether the alleles match
    matching_exac_variant = None
    matching_exac_variant_i = None
    for record in exac_vcf.fetch(chrom_without_chr, pos - variant_length,
                                 pos + variant_length):
        exac_xpos = genomeloc.get_xpos(record.CHROM, record.POS)
        for exac_alt_i, exac_alt in enumerate(record.ALT):
            exac_variant_xpos, exac_ref, exac_alt = get_minimal_representation(
                exac_xpos, str(record.REF), str(exac_alt))
            if exac_variant_xpos == xpos and exac_ref == ref and exac_alt == alt:
                if matching_exac_variant is not None:
                    print(
                        "ERROR: multiple exac variants match the variant: %s %s %s %s"
                        % (chrom, pos, ref, alt))
                matching_exac_variant = record
                matching_exac_variant_i = exac_alt_i
                #print("Variant %s %s %s matches: %s %s %s %s" % (xpos, ref, alt, record, exac_variant_xpos, exac_ref, exac_alt) )

    if matching_exac_variant is None:
        #print("Variant %s %s %s %s not found in ExAC" % (chrom, pos, alt, ref))
        return None, None, None

    pop_max_af = -1
    pop_max_population = None
    for p in populations:
        if matching_exac_variant.INFO['AN_' + p] > 0:
            pop_af = matching_exac_variant.INFO[
                'AC_' + p][matching_exac_variant_i] / float(
                    matching_exac_variant.INFO['AN_' + p])
            if pop_af > pop_max_af:
                pop_max_af = pop_af
                pop_max_population = p

    if matching_exac_variant.INFO['AN_Adj'] != 0:
        global_af = float(matching_exac_variant.INFO['AC_Adj']
                          [matching_exac_variant_i]) / float(
                              matching_exac_variant.INFO['AN_Adj'])
    else:
        assert float(
            matching_exac_variant.INFO['AC_Adj'][matching_exac_variant_i]) == 0
        global_af = 0

    return global_af, pop_max_af, pop_max_population
def get_exac_af(chrom, pos, ref, alt):
    populations = ['AMR', 'EAS', 'FIN', 'NFE', 'SAS', 'AFR']

    chrom_without_chr = chrom.replace("chr", "")
    xpos = genomeloc.get_single_location(chrom, pos)
    variant_length = len(ref) + len(alt)

    # check whether the alleles match
    matching_exac_variant = None
    matching_exac_variant_i = None
    for record in exac_vcf.fetch(chrom_without_chr, pos - variant_length, pos + variant_length):
        exac_xpos = genomeloc.get_xpos(record.CHROM, record.POS)
        for exac_alt_i, exac_alt in enumerate(record.ALT):
            exac_variant_xpos, exac_ref, exac_alt = get_minimal_representation(exac_xpos, str(record.REF), str(exac_alt))
            if exac_variant_xpos == xpos and exac_ref == ref and exac_alt == alt:
                if matching_exac_variant is not None:
                    print("ERROR: multiple exac variants match the variant: %s %s %s %s" % (chrom, pos, ref, alt))
                matching_exac_variant = record
                matching_exac_variant_i = exac_alt_i
                #print("Variant %s %s %s matches: %s %s %s %s" % (xpos, ref, alt, record, exac_variant_xpos, exac_ref, exac_alt) )

    if matching_exac_variant is None:
        #print("Variant %s %s %s %s not found in ExAC" % (chrom, pos, alt, ref))
        return None, None, None

    pop_max_af = -1
    pop_max_population = None
    for p in populations:
        if matching_exac_variant.INFO['AN_'+p] > 0:
            pop_af = matching_exac_variant.INFO['AC_'+p][matching_exac_variant_i]/float(matching_exac_variant.INFO['AN_'+p])
            if pop_af > pop_max_af:
                pop_max_af = pop_af
                pop_max_population = p


    if matching_exac_variant.INFO['AN_Adj'] != 0:
        global_af = float(matching_exac_variant.INFO['AC_Adj'][matching_exac_variant_i])/float(matching_exac_variant.INFO['AN_Adj'])
    else:
        assert float(matching_exac_variant.INFO['AC_Adj'][matching_exac_variant_i]) == 0
        global_af = 0

    return global_af, pop_max_af, pop_max_population