def add_snp(options, snp, position, sequence):
    if snp.alt != ".":
        genotype = "".join(vcfwrap.get_ml_genotype(snp, options.nth_sample))
        encoded = sequences.encode_genotype(genotype)
    else:
        encoded = snp.ref
    sequence[position] = encoded
Exemple #2
0
    def filter_singletons(self, to_filter, filter_using):
        included, excluded, to_filter \
            = self._group(filter_using, to_filter)

        sequence = list(to_filter.sequence)
        sequences = [record.sequence.upper() for record in included]
        for (index, nts) in enumerate(zip(*sequences)):
            current_nt = sequence[index].upper()
            if current_nt in "N-":
                continue

            allowed_nts = set()
            for allowed_nt in nts:
                if allowed_nt not in "N-":
                    allowed_nts.update(NT_CODES[allowed_nt])
            filtered_nts = frozenset(NT_CODES[current_nt]) & allowed_nts

            if not filtered_nts:
                filtered_nts = "N"

            genotype = encode_genotype(filtered_nts)
            if genotype != current_nt:
                sequence[index] = genotype.lower()
        new_record = FASTA(to_filter.name, to_filter.meta, "".join(sequence))

        return MSA([new_record] + included + excluded)
Exemple #3
0
    def filter_singletons(self, to_filter, filter_using):
        included, excluded, to_filter \
            = self._group(filter_using, to_filter)

        sequence = list(to_filter.sequence)
        sequences = [record.sequence.upper() for record in included]
        for (index, nts) in enumerate(zip(*sequences)):
            current_nt = sequence[index].upper()
            if current_nt in "N-":
                continue

            allowed_nts = set()
            for allowed_nt in nts:
                if allowed_nt not in "N-":
                    allowed_nts.update(NT_CODES[allowed_nt])
            filtered_nts = frozenset(NT_CODES[current_nt]) & allowed_nts

            if not filtered_nts:
                filtered_nts = "N"

            genotype = encode_genotype(filtered_nts)
            if genotype != current_nt:
                sequence[index] = genotype.lower()
        new_record = FASTA(to_filter.name,
                           to_filter.meta,
                           "".join(sequence))

        return MSA([new_record] + included + excluded)
Exemple #4
0
def add_snp(options, snp, position, sequence):
    if snp.alt != ".":
        genotype = "".join(vcfwrap.get_ml_genotype(snp, options.nth_sample))
        encoded = sequences.encode_genotype(genotype)
    else:
        encoded = snp.ref
    sequence[position] = encoded
Exemple #5
0
def test_genotype__bad_input(value):
    with pytest.raises(ValueError):
        encode_genotype(value)
Exemple #6
0
def test_genotype__bad_input__mixedcase():
    encode_genotype("At")
Exemple #7
0
def test_genotype__bad_input__lowercase():
    encode_genotype("a")
Exemple #8
0
 def test_function(src, dst):
     assert_equal(encode_genotype(src), dst)
Exemple #9
0
 def test_function(sequence):
     assert_equal(encode_genotype(sequence), "Y")
Exemple #10
0
def test_genotype__bad_input__non_nucleotide():
    encode_genotype("+")
Exemple #11
0
def test_genotype__bad_input__non_nucleotide():
    encode_genotype("+")
Exemple #12
0
def test_genotype__permutations(src, dst):
    for seq in itertools.permutations(src):
        assert encode_genotype("".join(src)) == dst
Exemple #13
0
def test_comma_or_not(sequence):
    assert encode_genotype(sequence) == "Y"
Exemple #14
0
def test_genotype__bad_input__mixedcase():
    encode_genotype("At")
Exemple #15
0
def test_genotype__bad_input__lowercase():
    encode_genotype("a")
Exemple #16
0
 def test_function(src, dst):
     assert_equal(encode_genotype(src), dst)
Exemple #17
0
 def test_function(sequence):
     assert_equal(encode_genotype(sequence), "Y")
Exemple #18
0
def test_genotype__bad_input__unknown_nucleotide():
    encode_genotype("Z")
Exemple #19
0
def test_genotype__bad_input__unknown_nucleotide():
    encode_genotype("Z")