def test_get_array_type(self): """get_array_type should return unsigned type that fits elements.""" self.assertEqual(get_array_type(0), uint8) self.assertEqual(get_array_type(100), uint8) self.assertEqual(get_array_type(256), uint8) #boundary case self.assertEqual(get_array_type(257), uint16) #boundary case self.assertEqual(get_array_type(10000), uint16) self.assertEqual(get_array_type(65536), uint16) self.assertEqual(get_array_type(65537), uint32)
def fromPair(cls, first, second, Alphabet, average=True): """Class method: returns new Counts from two sequences. """ size = len(Alphabet.SubEnumerations[-1]) #if they're ModelSequence objects, use the _data attribute if hasattr(first, '_data'): first, second = first._data, second._data #figure out what size we need the result to go in: note that the #result is on a pair alphabet, so the data type of the single #alphabet (that the sequence starts off in) might not work. data_type = get_array_type(product(map(len, Alphabet.SubEnumerations))) first = asarray(first, data_type) second = asarray(second, data_type) items = first * size + second counts = reshape(Alphabet.counts(items), Alphabet.Shape) if average: return cls((counts + transpose(counts))/2.0, Alphabet) else: return cls(counts, Alphabet)
def fromPair(cls, first, second, Alphabet, average=True): """Class method: returns new Counts from two sequences. """ size = len(Alphabet.SubEnumerations[-1]) #if they're ModelSequence objects, use the _data attribute if hasattr(first, '_data'): first, second = first._data, second._data #figure out what size we need the result to go in: note that the #result is on a pair alphabet, so the data type of the single #alphabet (that the sequence starts off in) might not work. data_type = get_array_type(product(map(len, Alphabet.SubEnumerations))) first = asarray(first, data_type) second = asarray(second, data_type) items = first * size + second counts = reshape(Alphabet.counts(items), Alphabet.Shape) if average: return cls((counts + transpose(counts)) / 2.0, Alphabet) else: return cls(counts, Alphabet)
def _from_triple_small(cls, first, second, outgroup, Alphabet): """Class method: returns new Counts for first from three sequences. Sequence order is first, second, outgroup. Use this method when the sequences are short and/or the alphabet is small: relatively memory intensive because it makes an array the size of the seq x the alphabet for each sequence. Fast on short sequences, though. NOTE: requires input to either all be ModelSequence objects, or all not be ModelSequence objects. Could change this if desirable. """ #if they've got data, assume ModelSequence objects. Otherwise, arrays. if hasattr(first, '_data'): first, second, outgroup = first._data, second._data, outgroup._data size = len(Alphabet.SubEnumerations[-1]) a_eq_b = equal(first, second) a_ne_b = logical_not(a_eq_b) a_eq_x = equal(first, outgroup) b_eq_x = equal(second, outgroup) #figure out what size we need the result to go in: note that the #result is on a pair alphabet, so the data type of the single #alphabet (that the sequence starts off in) might not work. data_type = get_array_type( product(list(map(len, Alphabet.SubEnumerations)))) first = asarray(first, data_type) second = asarray(second, data_type) b_to_a = second * size + first a_to_a = first * size + first b_to_a_items = compress(logical_and(b_eq_x, a_ne_b), b_to_a) a_to_a_items = compress(logical_or(a_eq_b, a_eq_x), a_to_a) items = concatenate((b_to_a_items, a_to_a_items)) counts = reshape(Alphabet.counts(items), Alphabet.Shape) return cls(counts, Alphabet)
def _from_triple_small(cls, first, second, outgroup, Alphabet): """Class method: returns new Counts for first from three sequences. Sequence order is first, second, outgroup. Use this method when the sequences are short and/or the alphabet is small: relatively memory intensive because it makes an array the size of the seq x the alphabet for each sequence. Fast on short sequences, though. NOTE: requires input to either all be ModelSequence objects, or all not be ModelSequence objects. Could change this if desirable. """ #if they've got data, assume ModelSequence objects. Otherwise, arrays. if hasattr(first, '_data'): first, second, outgroup = first._data, second._data, outgroup._data size = len(Alphabet.SubEnumerations[-1]) a_eq_b = equal(first, second) a_ne_b = logical_not(a_eq_b) a_eq_x = equal(first, outgroup) b_eq_x = equal(second, outgroup) #figure out what size we need the result to go in: note that the #result is on a pair alphabet, so the data type of the single #alphabet (that the sequence starts off in) might not work. data_type = get_array_type(product(map(len, Alphabet.SubEnumerations))) first = asarray(first, data_type) second = asarray(second, data_type) b_to_a = second*size + first a_to_a = first*size + first b_to_a_items = compress(logical_and(b_eq_x, a_ne_b), b_to_a) a_to_a_items = compress(logical_or(a_eq_b, a_eq_x), a_to_a) items = concatenate((b_to_a_items, a_to_a_items)) counts = reshape(Alphabet.counts(items), Alphabet.Shape) return cls(counts, Alphabet)