def test_strand_symmetry(self): """correctly compute test of strand symmetry""" from cogent3 import get_moltype from cogent3.core.alignment import Aligned seq = DnaSequence("ACGGCTGAAGCGCTCCGGGTTTAAAACG") ssym = seq.strand_symmetry(motif_length=1) assert_allclose(ssym.observed.array, [[7, 5], [7, 9]]) assert_allclose(ssym.expected.array, [[6, 6], [8, 8]]) # RNA too seq = seq.to_rna() ssym = seq.strand_symmetry(motif_length=1) assert_allclose(ssym.observed.array, [[7, 5], [7, 9]]) # Aligned seq = DnaSequence("ACGGCTGAAGCGCTCCGGGTTTAAAACG") m, s = seq.parse_out_gaps() seq = Aligned(m, s) ssym = seq.strand_symmetry(motif_length=1) assert_allclose(ssym.observed.array, [[7, 5], [7, 9]]) with self.assertRaises(TypeError): text = get_moltype("text") m, s = text.make_seq( "ACGGCTGAAGCGCTCCGGGTTTAAAACG").parse_out_gaps() s.strand_symmetry(motif_length=1) # with motif_length=2 seq = DnaSequence("AC GG CT GA AG CG CT CC GG GT TT AA AA CG".replace( " ", "")) ssym = seq.strand_symmetry(motif_length=2) self.assertLessEqual(len(ssym.observed.keys()), 8) assert_allclose(ssym.observed["AA"].to_array(), [2, 1]) assert_allclose(ssym.observed["CC"].to_array(), [1, 2])
def pairwise_to_multiple(pwise, ref_seq, moltype, info=None): """ turns pairwise alignments to a reference into a multiple alignment Parameters ---------- pwise Series of pairwise alignments to ref_seq as [(non-refseq name, aligned pair), ...] ref_seq The sequence common in all pairwise alignments moltype molecular type for the returned alignment info info object Returns ------- ArrayAlign """ if not hasattr(ref_seq, "name"): raise TypeError( f"ref_seq must be a cogent3 sequence, not {type(ref_seq)}") refseqs = [ s for _, aln in pwise for s in aln.seqs if s.name == ref_seq.name ] ref_gaps = _gap_union(refseqs) m = gap_coords_to_map(ref_gaps, len(ref_seq)) aligned = [Aligned(m, ref_seq)] for other_name, aln in pwise: curr_ref = aln.named_seqs[ref_seq.name] curr_ref_gaps = dict(curr_ref.map.get_gap_coordinates()) other_seq = aln.named_seqs[other_name] other_gaps = dict(other_seq.map.get_gap_coordinates()) diff_gaps = _combined_refseq_gaps(curr_ref_gaps, ref_gaps) inject = _gaps_for_injection(other_gaps, diff_gaps, len(other_seq.data)) if inject: m = gap_coords_to_map(inject, len(other_seq.data)) other_seq = Aligned(m, other_seq.data) aligned.append(other_seq) # default to ArrayAlign return Alignment(aligned, moltype=moltype, info=info).to_type(array_align=True, moltype=moltype)
def __init__(self, leaf): _Alignable.__init__(self, leaf) if hasattr(leaf, "sequence"): self.seq = leaf.sequence aligned = Aligned([(0, len(self.seq))], self.seq, len(self.seq)) self.aligneds = [(self.leaf.edge_name, aligned)] self.max_preds = 1 self._pog = None
def alignment_traceback(seqs, aligned_positions, word_length): """Alignment object from state matrix and ending point.""" (starts, ends, maps) = map_traceback(aligned_positions) aligneds = [] for (start, end, amap, (name, seq)) in zip(starts, ends, maps, seqs): gs = Aligned(amap * word_length, seq[start * word_length:end * word_length]) aligneds.append((name, gs)) return Alignment(moltype=None, data=aligneds)
def test_convert_input(self): """converts data for dotplotting""" m, seq = DNA.make_seq("ACGGT--A").parse_out_gaps() aligned_seq = Aligned(m, seq) mapped_gap, new_seq = _convert_input(aligned_seq, None) self.assertIs(new_seq.moltype, DNA) self.assertIs(mapped_gap, m) self.assertIs(new_seq, seq) mapped_gap, new_seq = _convert_input("ACGGT--A", DNA) self.assertEqual(str(mapped_gap), str(m)) self.assertEqual(str(new_seq), str(seq))
def _make_aligned(self, feature_types=None, where_feature=None): if self.aln_loc is None or self.aln_map is None: # is this required? self._make_map_func() region = self._cached["region"] if region is None: self._cached["aligned_seq"] = None return if feature_types: seq = region.get_annotated_seq(feature_types, where_feature) else: seq = region.seq # we get the seq objects to allow for copying of their annotations gapped_seq = Aligned(self.aln_map, seq) self._cached["aligned_seq"] = gapped_seq
def deserialise_seq(data, aligned=False): """deserialises sequence and any annotations Parameters ---------- data : dict a result of json.loads of a to_rich_dict() aligned whether sequence type is for an Alignment, in which case an Aligned instance will be returned Returns ------- """ from cogent3.core.moltype import get_moltype data.pop("version", None) data["moltype"] = get_moltype(data.pop("moltype")) annotations = data.pop("annotations", None) make_seq = data["moltype"].make_seq type_ = data.pop("type") klass = _get_class(type_) if "-" in data["seq"]: aligned = True data.pop("moltype") result = make_seq(**data) if aligned: map_, result = result.parse_out_gaps() if annotations: deserialise_annotation(annotations, result) if aligned: result = Aligned(map_, result) return result
def make_aligned(gaps_lengths, seq, name="seq1"): seq = seq.moltype.make_seq(seq, name=name) return Aligned(gap_coords_to_map(gaps_lengths, len(seq)), seq)