Beispiel #1
0
 def __init__(self, locus, index, start, stop, seq):
     """Build new HomAltAllele."""
     self.locus = locus
     self.index = index
     self.start = start
     self.stop = stop
     self.seq = normalize_seq(seq)
Beispiel #2
0
 def __init__(self, locus, index, start, stop, seq, phase=None):
     """Build new HetAltAllele."""
     self.locus = locus
     self.index = index
     self.start = start
     self.stop = stop
     self.seq = normalize_seq(seq)
     self.phase = phase
Beispiel #3
0
def build_match_strings(ref, start, stop, allele, mode='sensitive', debug=False):
    """Build allele matching strings."""
    alts = allele.alts

    if debug:
        print('  Allele: start={}, stop={}, size={}, ref={}, seq={}'.format(
            allele.start,
            allele.stop,
            allele.stop - allele.start,
            allele.ref,
            ','.join(alts),
        ), file=sys.stderr)

    super_ref = normalize_seq(ref[start:stop])

    # Require reference matches within the wobble zone + padding built into each normalized allele
    if mode == 'specific':
        super_alleles = [normalize_seq(ref[start:allele.start] + alt + ref[allele.stop:stop]) for alt in alts]
    elif mode == 'sensitive':
        super_alleles = [
            normalize_seq(
                '*' * (allele.min_start - start)
                + ref[allele.min_start:allele.start]
                + alt
                + ref[allele.stop:allele.max_stop]
                + '*' * (stop - allele.max_stop)
            ) for alt in alts
        ]
    else:
        raise ValueError(f'invalid match mode specified: {mode}')

    if debug:
        print('                MODE:', mode,          file=sys.stderr)
        print('       SUPER ALLELES:', super_alleles, file=sys.stderr)
        print('        SUPER REF:   ', super_ref,     file=sys.stderr)

    assert all(len(a) == stop - start - len(allele.ref) + len(alt) for a, alt in zip(super_alleles, alts))

    return super_ref, super_alleles
Beispiel #4
0
 def __init__(self, start, stop, seq):
     self.start = start
     self.stop = stop
     self.seq = normalize_seq(seq)
Beispiel #5
0
 def seq(self):
     return normalize_seq(self.ref[self.start:self.stop])
Beispiel #6
0
 def __init__(self, start, stop, seq, phase=None):
     self.start = start
     self.stop = stop
     self.seq = normalize_seq(seq)
     self.phase = phase
Beispiel #7
0
def find_allele(ref, allele, superlocus, debug=False):
    # FASTPATH: Avoid constructing the graph match if the allele and the superlocus
    #           match perfectly.
    if (len(superlocus) == 1 and allele.start == superlocus[0].start
                             and allele.stop  == superlocus[0].stop
                             and allele.alleles[1] in superlocus[0].alleles[1:]
                             and 'PASS' in superlocus[0].record.filter):

        i = superlocus[0].alleles.index(allele.alleles[1])
        z = superlocus[0].allele_indices.count(i)

        return z

    # Bounds come from normalized extremes
    start, stop = get_superlocus_bounds([[allele], superlocus])

    if debug:
        print('  Allele: start={}, stop={}, size={}, seq={}'.format(allele.start, allele.stop, allele.stop-allele.start, allele.alleles[1]), file=sys.stderr)

    # Require reference matches within the wobble zone + padding built into each normalized allele
    super_allele = ('*' * (allele.min_start - start)
                 +  ref[allele.min_start:allele.start]
                 +  allele.alleles[1]
                 +  ref[allele.stop:allele.max_stop]
                 +  '*' * (stop - allele.max_stop))

    super_allele = normalize_seq(super_allele)

    assert len(super_allele) == stop - start - len(allele.alleles[0]) + len(allele.alleles[1])

    # Create genotype sets for each superlocus
    try:
        graph, constraints = generate_graph(ref, start, stop, superlocus, debug)
        graph = list(graph)

        if debug:
            for i, (start, stop, alleles) in enumerate(graph):
                print('  GRAPH{:02d}: start={}, stop={}, alleles={}'.format(i, start, stop, alleles), file=sys.stderr)
            print(file=sys.stderr)

        paths = generate_paths(graph, debug=debug)

        if debug:
            paths = list(paths)
            for i, p in enumerate(paths):
                print('  PATH{:02d}: {}'.format(i, p), file=sys.stderr)
            print(file=sys.stderr)

    except OverlapError:
        return None

    # Generate the set of diploid genotypes (actually haplotypes)
    genos = set(generate_genotypes(paths, constraints, debug))

    # Apply matcher to each pair of allele
    matches = [(fancy_match(super_allele, a1), fancy_match(super_allele, a2))
                         for a1, a2 in genos]

    # Find the highest zygosity
    z = max(((a1 or 0) + (a2 or 0)) for a1, a2 in matches)

    # If no match, check for the presense of any nocalls
    if not z and any(None in m for m in matches):
        z = None

    if debug:
        print('   ALLELE:{} {}'.format(len(super_allele), super_allele), file=sys.stderr)
        for i, (g, m) in enumerate(zip(genos, matches)):
            print('   GENO{:02d}:{} {}'.format(i, tuple(map(len, g)),  g), file=sys.stderr)
            print('  MATCH{:02d}: {}'.format(i, m), file=sys.stderr)
        print(file=sys.stderr)
        print('  ZYGOSITY: {}'.format(z), file=sys.stderr)

    return z
Beispiel #8
0
 def seq(self):
     """Return the sequence of this allele."""
     return normalize_seq(self.ref[self.start:self.stop])
Beispiel #9
0
def test_normalize_seq():
    """Test sequence normalization."""
    assert normalize_seq('') == ''
    assert normalize_seq('ACGT') == 'ACGT'
    assert normalize_seq('ACGTNacgtNRYSWKMBDHV') == 'ACGTNACGTNNNNNNNNNNN'
Beispiel #10
0
def test_normalize_seq():
    assert normalize_seq('') == ''
    assert normalize_seq('ACGT') == 'ACGT'
    assert normalize_seq('ACGTNacgtNRYSWKMBDHV') == 'ACGTNACGTNNNNNNNNNNN'
Beispiel #11
0
 def __init__(self, start, stop, seq):
     self.start = start
     self.stop = stop
     self.seq = normalize_seq(seq)
Beispiel #12
0
 def seq(self):
     return normalize_seq(self.ref[self.start:self.stop])
Beispiel #13
0
 def __init__(self, start, stop, seq, phase=None):
     self.start = start
     self.stop = stop
     self.seq = normalize_seq(seq)
     self.phase = phase