Esempio n. 1
0
    def test_strand_symmetry(self):
        """correctly compute test of strand symmetry"""
        from cogent3 import get_moltype
        from cogent3.core.alignment import Aligned

        seq = DnaSequence("ACGGCTGAAGCGCTCCGGGTTTAAAACG")
        ssym = seq.strand_symmetry(motif_length=1)
        assert_allclose(ssym.observed.array, [[7, 5], [7, 9]])
        assert_allclose(ssym.expected.array, [[6, 6], [8, 8]])

        # RNA too
        seq = seq.to_rna()
        ssym = seq.strand_symmetry(motif_length=1)
        assert_allclose(ssym.observed.array, [[7, 5], [7, 9]])

        # Aligned
        seq = DnaSequence("ACGGCTGAAGCGCTCCGGGTTTAAAACG")
        m, s = seq.parse_out_gaps()
        seq = Aligned(m, s)
        ssym = seq.strand_symmetry(motif_length=1)
        assert_allclose(ssym.observed.array, [[7, 5], [7, 9]])

        with self.assertRaises(TypeError):
            text = get_moltype("text")
            m, s = text.make_seq(
                "ACGGCTGAAGCGCTCCGGGTTTAAAACG").parse_out_gaps()
            s.strand_symmetry(motif_length=1)

        # with motif_length=2
        seq = DnaSequence("AC GG CT GA AG CG CT CC GG GT TT AA AA CG".replace(
            " ", ""))
        ssym = seq.strand_symmetry(motif_length=2)
        self.assertLessEqual(len(ssym.observed.keys()), 8)
        assert_allclose(ssym.observed["AA"].to_array(), [2, 1])
        assert_allclose(ssym.observed["CC"].to_array(), [1, 2])
Esempio n. 2
0
def pairwise_to_multiple(pwise, ref_seq, moltype, info=None):
    """
    turns pairwise alignments to a reference into a multiple alignment

    Parameters
    ----------
    pwise
        Series of pairwise alignments to ref_seq as
        [(non-refseq name, aligned pair), ...]
    ref_seq
        The sequence common in all pairwise alignments
    moltype
        molecular type for the returned alignment
    info
        info object

    Returns
    -------
    ArrayAlign
    """
    if not hasattr(ref_seq, "name"):
        raise TypeError(
            f"ref_seq must be a cogent3 sequence, not {type(ref_seq)}")

    refseqs = [
        s for _, aln in pwise for s in aln.seqs if s.name == ref_seq.name
    ]
    ref_gaps = _gap_union(refseqs)

    m = gap_coords_to_map(ref_gaps, len(ref_seq))
    aligned = [Aligned(m, ref_seq)]
    for other_name, aln in pwise:
        curr_ref = aln.named_seqs[ref_seq.name]
        curr_ref_gaps = dict(curr_ref.map.get_gap_coordinates())
        other_seq = aln.named_seqs[other_name]
        other_gaps = dict(other_seq.map.get_gap_coordinates())
        diff_gaps = _combined_refseq_gaps(curr_ref_gaps, ref_gaps)
        inject = _gaps_for_injection(other_gaps, diff_gaps,
                                     len(other_seq.data))
        if inject:
            m = gap_coords_to_map(inject, len(other_seq.data))
            other_seq = Aligned(m, other_seq.data)

        aligned.append(other_seq)
    # default to ArrayAlign
    return Alignment(aligned, moltype=moltype,
                     info=info).to_type(array_align=True, moltype=moltype)
Esempio n. 3
0
 def __init__(self, leaf):
     _Alignable.__init__(self, leaf)
     if hasattr(leaf, "sequence"):
         self.seq = leaf.sequence
         aligned = Aligned([(0, len(self.seq))], self.seq, len(self.seq))
         self.aligneds = [(self.leaf.edge_name, aligned)]
     self.max_preds = 1
     self._pog = None
Esempio n. 4
0
def alignment_traceback(seqs, aligned_positions, word_length):
    """Alignment object from state matrix and ending point."""
    (starts, ends, maps) = map_traceback(aligned_positions)
    aligneds = []
    for (start, end, amap, (name, seq)) in zip(starts, ends, maps, seqs):
        gs = Aligned(amap * word_length,
                     seq[start * word_length:end * word_length])
        aligneds.append((name, gs))
    return Alignment(moltype=None, data=aligneds)
Esempio n. 5
0
 def test_convert_input(self):
     """converts data for dotplotting"""
     m, seq = DNA.make_seq("ACGGT--A").parse_out_gaps()
     aligned_seq = Aligned(m, seq)
     mapped_gap, new_seq = _convert_input(aligned_seq, None)
     self.assertIs(new_seq.moltype, DNA)
     self.assertIs(mapped_gap, m)
     self.assertIs(new_seq, seq)
     mapped_gap, new_seq = _convert_input("ACGGT--A", DNA)
     self.assertEqual(str(mapped_gap), str(m))
     self.assertEqual(str(new_seq), str(seq))
Esempio n. 6
0
    def _make_aligned(self, feature_types=None, where_feature=None):
        if self.aln_loc is None or self.aln_map is None:  # is this required?
            self._make_map_func()
        region = self._cached["region"]
        if region is None:
            self._cached["aligned_seq"] = None
            return
        if feature_types:
            seq = region.get_annotated_seq(feature_types, where_feature)
        else:
            seq = region.seq

        # we get the seq objects to allow for copying of their annotations
        gapped_seq = Aligned(self.aln_map, seq)

        self._cached["aligned_seq"] = gapped_seq
Esempio n. 7
0
def deserialise_seq(data, aligned=False):
    """deserialises sequence and any annotations

    Parameters
    ----------
    data : dict
        a result of json.loads of a to_rich_dict()
    aligned
        whether sequence type is for an Alignment, in which case an Aligned
        instance will be returned
    Returns
    -------

    """
    from cogent3.core.moltype import get_moltype

    data.pop("version", None)
    data["moltype"] = get_moltype(data.pop("moltype"))
    annotations = data.pop("annotations", None)
    make_seq = data["moltype"].make_seq
    type_ = data.pop("type")
    klass = _get_class(type_)
    if "-" in data["seq"]:
        aligned = True

    data.pop("moltype")
    result = make_seq(**data)
    if aligned:
        map_, result = result.parse_out_gaps()

    if annotations:
        deserialise_annotation(annotations, result)

    if aligned:
        result = Aligned(map_, result)

    return result
Esempio n. 8
0
def make_aligned(gaps_lengths, seq, name="seq1"):
    seq = seq.moltype.make_seq(seq, name=name)
    return Aligned(gap_coords_to_map(gaps_lengths, len(seq)), seq)