Ejemplo n.º 1
0
def add_snp(options, snp, position, sequence):
    if snp.alt != ".":
        genotype = "".join(vcfwrap.get_ml_genotype(snp, options.nth_sample))
        encoded = sequences.encode_genotype(genotype)
    else:
        encoded = snp.ref
    sequence[position] = encoded
Ejemplo n.º 2
0
    def filter_singletons(self, to_filter, filter_using):
        included, excluded, to_filter \
            = self._group(filter_using, to_filter)

        sequence = list(to_filter.sequence)
        sequences = [record.sequence.upper() for record in included]
        for (index, nts) in enumerate(zip(*sequences)):
            current_nt = sequence[index].upper()
            if current_nt in "N-":
                continue

            allowed_nts = set()
            for allowed_nt in nts:
                if allowed_nt not in "N-":
                    allowed_nts.update(NT_CODES[allowed_nt])
            filtered_nts = frozenset(NT_CODES[current_nt]) & allowed_nts

            if not filtered_nts:
                filtered_nts = "N"

            genotype = encode_genotype(filtered_nts)
            if genotype != current_nt:
                sequence[index] = genotype.lower()
        new_record = FASTA(to_filter.name, to_filter.meta, "".join(sequence))

        return MSA([new_record] + included + excluded)
Ejemplo n.º 3
0
    def filter_singletons(self, to_filter, filter_using):
        included, excluded, to_filter \
            = self._group(filter_using, to_filter)

        sequence = list(to_filter.sequence)
        sequences = [record.sequence.upper() for record in included]
        for (index, nts) in enumerate(zip(*sequences)):
            current_nt = sequence[index].upper()
            if current_nt in "N-":
                continue

            allowed_nts = set()
            for allowed_nt in nts:
                if allowed_nt not in "N-":
                    allowed_nts.update(NT_CODES[allowed_nt])
            filtered_nts = frozenset(NT_CODES[current_nt]) & allowed_nts

            if not filtered_nts:
                filtered_nts = "N"

            genotype = encode_genotype(filtered_nts)
            if genotype != current_nt:
                sequence[index] = genotype.lower()
        new_record = FASTA(to_filter.name,
                           to_filter.meta,
                           "".join(sequence))

        return MSA([new_record] + included + excluded)
Ejemplo n.º 4
0
def add_snp(options, snp, position, sequence):
    if snp.alt != ".":
        genotype = "".join(vcfwrap.get_ml_genotype(snp, options.nth_sample))
        encoded = sequences.encode_genotype(genotype)
    else:
        encoded = snp.ref
    sequence[position] = encoded
Ejemplo n.º 5
0
def test_genotype__bad_input(value):
    with pytest.raises(ValueError):
        encode_genotype(value)
Ejemplo n.º 6
0
def test_genotype__bad_input__mixedcase():
    encode_genotype("At")
Ejemplo n.º 7
0
def test_genotype__bad_input__lowercase():
    encode_genotype("a")
Ejemplo n.º 8
0
 def test_function(src, dst):
     assert_equal(encode_genotype(src), dst)
Ejemplo n.º 9
0
 def test_function(sequence):
     assert_equal(encode_genotype(sequence), "Y")
Ejemplo n.º 10
0
def test_genotype__bad_input__non_nucleotide():
    encode_genotype("+")
Ejemplo n.º 11
0
def test_genotype__bad_input__non_nucleotide():
    encode_genotype("+")
Ejemplo n.º 12
0
def test_genotype__permutations(src, dst):
    for seq in itertools.permutations(src):
        assert encode_genotype("".join(src)) == dst
Ejemplo n.º 13
0
def test_comma_or_not(sequence):
    assert encode_genotype(sequence) == "Y"
Ejemplo n.º 14
0
def test_genotype__bad_input__mixedcase():
    encode_genotype("At")
Ejemplo n.º 15
0
def test_genotype__bad_input__lowercase():
    encode_genotype("a")
Ejemplo n.º 16
0
 def test_function(src, dst):
     assert_equal(encode_genotype(src), dst)
Ejemplo n.º 17
0
 def test_function(sequence):
     assert_equal(encode_genotype(sequence), "Y")
Ejemplo n.º 18
0
def test_genotype__bad_input__unknown_nucleotide():
    encode_genotype("Z")
Ejemplo n.º 19
0
def test_genotype__bad_input__unknown_nucleotide():
    encode_genotype("Z")