Esempio n. 1
0
 def test_complement_ambiguous_dna_values(self):
     for ambig_char, values in sorted(ambiguous_dna_values.items()):
         compl_values = str(
             Seq.Seq(values, alphabet=IUPAC.ambiguous_dna).complement())
         ambig_values = ambiguous_dna_values[
             ambiguous_dna_complement[ambig_char]]
         self.assertEqual(set(compl_values), set(ambig_values))
Esempio n. 2
0
def _calc_at_content_values():
    at_u = {}
    at_a = {}
    unamb = "GCTASWN"
    for b, opts in ambiguous_dna_values.items():
        d = at_u if b in unamb else at_a
        d[b] = float(opts.count("A") + opts.count("T")) / len(opts)
        d[b.lower()] = d[b]
    return at_u, at_a
Esempio n. 3
0
def _calc_gc_content_values():
    gc_u = {}
    gc_a = {}
    unamb = "GCTASWN"
    for b, opts in ambiguous_dna_values.items():
        d = gc_u if b in unamb else gc_a
        d[b] = float(opts.count("C") + opts.count("G")) / len(opts)
        d[b.lower()] = d[b]
    return gc_u, gc_a
Esempio n. 4
0
File: vcf.py Progetto: jsgounot/PgPy
def load_iupac():
    iupac_code = {
        tuple(letters): code
        for code, letters in ambiguous_dna_values.items()
    }
    return {
        permutation: (code, )
        for letters, code in iupac_code.items()
        for permutation in permutations(letters)
    }
Esempio n. 5
0
 def test_complement_ambiguous_dna_values(self):
     for ambig_char, values in sorted(ambiguous_dna_values.items()):
         compl_values = Seq.Seq(values).complement()
         ambig_values = ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]
         self.assertCountEqual(compl_values, ambig_values)
Esempio n. 6
0
def complement(sequence):
    # TODO - Add a complement function to Bio/Seq.py?
    # There is already a complement method on the Seq and MutableSeq objects.
    return Seq.reverse_complement(sequence)[::-1]


def sorted_dict(d):
    """A sorted repr of a dictionary."""
    return "{%s}" % ", ".join("%s: %s" % (repr(k), repr(v))
                              for k, v in sorted(d.items()))


print("")
print("DNA Ambiguity mapping: %s" % sorted_dict(ambiguous_dna_values))
print("DNA Complement mapping: %s" % sorted_dict(ambiguous_dna_complement))
for ambig_char, values in sorted(ambiguous_dna_values.items()):
    compl_values = complement(values)
    print("%s={%s} --> {%s}=%s" % (ambig_char, values, compl_values,
                                   ambiguous_dna_complement[ambig_char]))
    assert set(compl_values) == set(
        ambiguous_dna_values[ambiguous_dna_complement[ambig_char]])

print("")
print("RNA Ambiguity mapping: %s" % sorted_dict(ambiguous_rna_values))
print("RNA Complement mapping: %s" % sorted_dict(ambiguous_rna_complement))
for ambig_char, values in sorted(ambiguous_rna_values.items()):
    compl_values = complement(values).replace(
        "T", "U")  # need to help as no alphabet
    print("%s={%s} --> {%s}=%s" % (ambig_char, values, compl_values,
                                   ambiguous_rna_complement[ambig_char]))
    assert set(compl_values) == set(
Esempio n. 7
0
def complement(sequence):
    # TODO - Add a complement function to Bio/Seq.py?
    # There is already a complement method on the Seq and MutableSeq objects.
    return Seq.reverse_complement(sequence)[::-1]


def sorted_dict(d):
    """A sorted repr of a dictionary."""
    return "{%s}" % ", ".join("%s: %s" % (repr(k), repr(v))
                              for k, v in sorted(d.items()))

print("")
print("DNA Ambiguity mapping: %s" % sorted_dict(ambiguous_dna_values))
print("DNA Complement mapping: %s" % sorted_dict(ambiguous_dna_complement))
for ambig_char, values in sorted(ambiguous_dna_values.items()):
    compl_values = complement(values)
    print("%s={%s} --> {%s}=%s" %
        (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char]))
    assert set(compl_values) == set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]])

print("")
print("RNA Ambiguity mapping: %s" % sorted_dict(ambiguous_rna_values))
print("RNA Complement mapping: %s" % sorted_dict(ambiguous_rna_complement))
for ambig_char, values in sorted(ambiguous_rna_values.items()):
    compl_values = complement(values).replace("T", "U")  # need to help as no alphabet
    print("%s={%s} --> {%s}=%s" %
        (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char]))
    assert set(compl_values) == set(ambiguous_rna_values[ambiguous_rna_complement[ambig_char]])

print("")
Esempio n. 8
0
 def test_complement_ambiguous_dna_values(self):
     for ambig_char, values in sorted(ambiguous_dna_values.items()):
         compl_values = str(Seq.Seq(values, alphabet=IUPAC.ambiguous_dna).complement())
         self.assertEqual(set(compl_values),
                          set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]))
Esempio n. 9
0
def _calc_base_values(base):
    d = {}
    for b, opts in ambiguous_dna_values.items():
        d[b] = float(opts.count(base)) / len(opts)
        d[b.lower()] = float(opts.count(base)) / len(opts)
    return d
Esempio n. 10
0
                if debug:
                    print('yield', segment_smpl_seqs,
                          [(snp_['start'], snp_['stop'] - 1)
                           for snp_ in snps_to_yield])
                yield segment, segment_smpl_seqs, snps_to_yield

    reamining_smpl_seqs = [srfs[pos - offset:] for srfs in sample_ref_seqs]
    #remaining_seq = ref_seq[pos - offset:]
    if reamining_smpl_seqs[0]:
        reg = pos, region[2] if len(region) > 2 else None
        yield reg, reamining_smpl_seqs, []


IUPAC = {
    tuple(sorted(nucls)): iupac.encode('utf8')
    for iupac, nucls in ambiguous_dna_values.items()
}


def to_str(bytes_):
    if isinstance(bytes_, bytes):
        return bytes_.decode('utf-8')
    else:
        return bytes_


def to_bytes(str_):
    if not isinstance(str_, bytes):
        return str_.encode('utf-8')
    else:
        return str_
Esempio n. 11
0
            pos = max([snp_['stop'] for snp_ in snps_to_yield])
            if len(region) < 3 or pos <= region[2]:
                # Are the SNPs inside the region to return?
                if debug:
                    print('yield', segment_smpl_seqs,
                          [(snp_['start'], snp_['stop'] - 1) for snp_ in snps_to_yield])
                yield  segment, segment_smpl_seqs, snps_to_yield

    reamining_smpl_seqs = [srfs[pos - offset:] for srfs in sample_ref_seqs]
    #remaining_seq = ref_seq[pos - offset:]
    if reamining_smpl_seqs[0]:
        reg = pos, region[2] if len(region) > 2 else None
        yield reg, reamining_smpl_seqs, []


IUPAC = {tuple(sorted(nucls)): iupac.encode('utf8') for iupac, nucls in ambiguous_dna_values.items()}


def to_str(bytes_):
    if isinstance(bytes_, bytes):
        return bytes_.decode('utf-8')
    else:
        return bytes_


def to_bytes(str_):
    if not isinstance(str_, bytes):
        return str_.encode('utf-8')
    else:
        return str_