def __init__(self, moltype, invalid=-9, alignment=None, invalid_raises=False): super(_PairwiseDistance, self).__init__() moltype = get_moltype(moltype) if moltype.label not in self.valid_moltypes: name = self.__class__.__name__ msg = (f"Invalid moltype for {name}: '{moltype.label}' not " f"in {self.valid_moltypes}") raise ValueError(msg) self.moltype = moltype self.char_to_indices = get_moltype_index_array(moltype, invalid=invalid) self._dim = len(list(moltype)) self._dists = None self._dupes = None self._duped = None self._invalid_raises = invalid_raises self.names = None self.indexed_seqs = None if alignment is not None: self._convert_seqs_to_indices(alignment) self._func_args = []
def test_strand_symmetry(self): """correctly compute test of strand symmetry""" from cogent3 import get_moltype from cogent3.core.alignment import Aligned seq = DnaSequence("ACGGCTGAAGCGCTCCGGGTTTAAAACG") ssym = seq.strand_symmetry(motif_length=1) assert_allclose(ssym.observed.array, [[7, 5], [7, 9]]) assert_allclose(ssym.expected.array, [[6, 6], [8, 8]]) # RNA too seq = seq.to_rna() ssym = seq.strand_symmetry(motif_length=1) assert_allclose(ssym.observed.array, [[7, 5], [7, 9]]) # Aligned seq = DnaSequence("ACGGCTGAAGCGCTCCGGGTTTAAAACG") m, s = seq.parse_out_gaps() seq = Aligned(m, s) ssym = seq.strand_symmetry(motif_length=1) assert_allclose(ssym.observed.array, [[7, 5], [7, 9]]) with self.assertRaises(TypeError): text = get_moltype("text") m, s = text.make_seq( "ACGGCTGAAGCGCTCCGGGTTTAAAACG").parse_out_gaps() s.strand_symmetry(motif_length=1) # with motif_length=2 seq = DnaSequence("AC GG CT GA AG CG CT CC GG GT TT AA AA CG".replace( " ", "")) ssym = seq.strand_symmetry(motif_length=2) self.assertLessEqual(len(ssym.observed.keys()), 8) assert_allclose(ssym.observed["AA"].to_array(), [2, 1]) assert_allclose(ssym.observed["CC"].to_array(), [1, 2])
def test_align_to_ref_generic_moltype(self): """tests when the moltype is generic""" test_moltypes = ["text", "rna", "protein", "protein_with_stop", "bytes", "ab"] for test_moltype in test_moltypes: aligner = align_app.align_to_ref(moltype=test_moltype) self.assertEqual(aligner._moltype.label, test_moltype) self.assertEqual( aligner._kwargs["S"], make_generic_scoring_dict(10, get_moltype(test_moltype)), )
def __init__(self, distance=None, moltype=None, fast_calc=None, slow_calc=None): super(fast_slow_dist, self).__init__( input_types=ALIGNED_TYPE, output_types=(PAIRWISE_DISTANCE_TYPE, SERIALISABLE_TYPE), data_types=("ArrayAlignment", "Alignment"), ) self._formatted_params() self._moltype = moltype if moltype is None else get_moltype(moltype) self._sm = None if (fast_calc or slow_calc) and distance: raise ValueError("cannot combine distance and fast/slow") if distance: fast_calc = distance slow_calc = distance d = set(["hamming", "paralinear", "logdet"]) & set( [slow_calc, fast_calc]) if d and not self._moltype: raise ValueError(f"you must provide a moltype for {d}") try: fast_calc = get_distance_calculator(fast_calc, moltype=self._moltype) except (ValueError, AttributeError): fast_calc = None try: slow_calc = get_model(slow_calc) except ValueError: slow_calc = None if not (fast_calc or slow_calc): raise ValueError(f"invalid values for {slow_calc} or {fast_calc}") self.fast_calc = fast_calc if fast_calc and self._moltype and fast_calc.moltype != self._moltype: raise ValueError( f"{self._moltype} incompatible moltype with fast calculator {fast_calc.moltype}" ) elif fast_calc: self._moltype = fast_calc.moltype if slow_calc and self._moltype and slow_calc.moltype != self._moltype: raise ValueError("incompatible moltype with slow calculator") elif slow_calc: self._moltype = slow_calc.moltype self._sm = slow_calc
def make_generic_scoring_dict(match, mtype): """returns scoring dict for alignment Parameters ---------- match : int value for a match, mismatches default to -1 mtype MolType instance or string that can be used to get_moltype """ from cogent3 import get_moltype mtype = get_moltype(mtype) S = {} for a in mtype: for b in mtype: if a == b: score = match else: score = -1 S[a, b] = score return S
def __init__(self, distance=None, moltype=None, fast_calc=None, slow_calc=None): """ Parameters ---------- moltype : str cogent3 moltype distance : str Name of a distance method available as both fast and slow calculator. fast_calc Name of a fast distance calculator. See cogent3.available_distances(). slow_calc Name of a slow distance calculator. See cogent3.available_models(). Notes ----- If you provide fast_calc or slow_calc, you must specify the moltype. """ super(fast_slow_dist, self).__init__( input_types=self._input_types, output_types=self._output_types, data_types=self._data_types, ) self._formatted_params() self._moltype = moltype if moltype is None else get_moltype(moltype) self._sm = None if (fast_calc or slow_calc) and distance: raise ValueError("cannot combine distance and fast/slow") if distance: fast_calc = distance slow_calc = distance d = {"hamming", "percent", "paralinear", "logdet" } & {slow_calc, fast_calc} if d and not self._moltype: raise ValueError(f"you must provide a moltype for {d}") try: fast_calc = get_distance_calculator(fast_calc, moltype=self._moltype) except (ValueError, AttributeError): fast_calc = None try: slow_calc = get_model(slow_calc) except ValueError: slow_calc = None if not (fast_calc or slow_calc): raise ValueError(f"invalid values for {slow_calc} or {fast_calc}") self.fast_calc = fast_calc if fast_calc and self._moltype and fast_calc.moltype != self._moltype: raise ValueError( f"{self._moltype} incompatible moltype with fast calculator {fast_calc.moltype}" ) elif fast_calc: self._moltype = fast_calc.moltype if slow_calc and self._moltype and slow_calc.moltype != self._moltype: raise ValueError("incompatible moltype with slow calculator") elif slow_calc: self._moltype = slow_calc.moltype self._sm = slow_calc self.func = self.calc_distance