def add_snp(options, snp, position, sequence): if snp.alt != ".": genotype = "".join(vcfwrap.get_ml_genotype(snp, options.nth_sample)) encoded = sequences.encode_genotype(genotype) else: encoded = snp.ref sequence[position] = encoded
def filter_singletons(self, to_filter, filter_using): included, excluded, to_filter \ = self._group(filter_using, to_filter) sequence = list(to_filter.sequence) sequences = [record.sequence.upper() for record in included] for (index, nts) in enumerate(zip(*sequences)): current_nt = sequence[index].upper() if current_nt in "N-": continue allowed_nts = set() for allowed_nt in nts: if allowed_nt not in "N-": allowed_nts.update(NT_CODES[allowed_nt]) filtered_nts = frozenset(NT_CODES[current_nt]) & allowed_nts if not filtered_nts: filtered_nts = "N" genotype = encode_genotype(filtered_nts) if genotype != current_nt: sequence[index] = genotype.lower() new_record = FASTA(to_filter.name, to_filter.meta, "".join(sequence)) return MSA([new_record] + included + excluded)
def test_genotype__bad_input(value): with pytest.raises(ValueError): encode_genotype(value)
def test_genotype__bad_input__mixedcase(): encode_genotype("At")
def test_genotype__bad_input__lowercase(): encode_genotype("a")
def test_function(src, dst): assert_equal(encode_genotype(src), dst)
def test_function(sequence): assert_equal(encode_genotype(sequence), "Y")
def test_genotype__bad_input__non_nucleotide(): encode_genotype("+")
def test_genotype__permutations(src, dst): for seq in itertools.permutations(src): assert encode_genotype("".join(src)) == dst
def test_comma_or_not(sequence): assert encode_genotype(sequence) == "Y"
def test_genotype__bad_input__unknown_nucleotide(): encode_genotype("Z")