Exemple #1
0
 def test_columnUncertainty(self):
     """columnUncertainty: should handle full and empty profiles
     """
     p = Profile(array([[0.25, 0.5], [0.25, 0.5], [0.25, 0], [0.25, 0]]), "AB")
     self.assertEqual(p.columnUncertainty(), [2, 1])
     # for empty cols nothing is returned as the uncertainty
     self.assertEqual(self.empty.columnUncertainty().tolist(), [])
     p = Profile(array([[], [], []]), "")
     self.assertEqual(p.columnUncertainty().tolist(), [])
     # doesn't work on 1D array
     self.assertRaises(ProfileError, self.oned.columnUncertainty)
Exemple #2
0
 def test_columnUncertainty(self):
     """columnUncertainty: should handle full and empty profiles
     """
     p = Profile(array([[.25,.5],[.25,.5],[.25,0],[.25,0]]),"AB")
     self.assertEqual(p.columnUncertainty(),[2,1])
     #for empty cols nothing is returned as the uncertainty
     self.assertEqual(self.empty.columnUncertainty().tolist(),[])
     p = Profile(array([[],[],[]]),"")
     self.assertEqual(p.columnUncertainty().tolist(),[])
     #doesn't work on 1D array
     self.assertRaises(ProfileError,self.oned.columnUncertainty)
Exemple #3
0
class ProfileTests(TestCase):
    """Tests for Profile object"""

    def setUp(self):
        """setUp method for all Profile tests"""
        self.full = Profile(array([[2, 4], [3, 5], [4, 8]]), "AB")
        self.empty = Profile(array([[]]), "AB")
        self.empty_row = Profile(array([[1, 1], [0, 0]]), "AB")
        self.empty_col = Profile(array([[0, 1], [0, 1]]), "AB")
        self.consensus = Profile(
            array([[0.2, 0, 0.8, 0], [0, 0.1, 0.2, 0.7], [0, 0, 0, 1], [0.2, 0.3, 0.4, 0.1], [0.5, 0.5, 0, 0]]),
            Alphabet=DNA,
            CharOrder="TCAG",
        )
        self.not_same_value = Profile(
            array([[0.3, 0.5, 0.1, 0.1], [0.4, 0.6, 0, 0.7], [0.3, 0.2, 0, 0], [0, 0, 4, 0]]),
            Alphabet=DNA,
            CharOrder="TCAG",
        )
        self.zero_entry = Profile(array([[0.3, 0.2, 0, 0.5], [0, 0, 0.8, 0.2]]), Alphabet="UCAG")
        self.score1 = Profile(Data=array([[-1, 0, 1, 2], [-2, 2, 0, 0], [-3, 5, 1, 0]]), Alphabet=DNA, CharOrder="ATGC")
        self.score2 = Profile(array([[0.2, 0.4, 0.4, 0], [0.1, 0, 0.9, 0], [0.1, 0.2, 0.3, 0.4]]), Alphabet="TCAG")
        self.oned = Profile(array([0.25, 0.25, 0.25, 0.25]), "ABCD")
        self.pp = Profile(array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]), "ABCD")

    def test_init(self):
        """__init__: should set all attributed correctly"""
        self.assertRaises(TypeError, Profile)
        self.assertRaises(TypeError, Profile, array([[2, 3]]))
        # only alphabet
        p = Profile(array([[0.2, 0.8], [0.7, 0.3]]), "AB")
        self.assertEqual(p.Data, [[0.2, 0.8], [0.7, 0.3]])
        self.assertEqual(p.Alphabet, "AB")
        self.assertEqual(p.CharOrder, list("AB"))
        self.assertEqual(translate("ABBA", p._translation_table), "\x00\x01\x01\x00")
        # alphabet and char order
        p = Profile(array([[0.1, 0.2], [0.4, 0.3]]), Alphabet=DNA, CharOrder="AG")
        self.assertEqual(p.CharOrder, "AG")
        assert p.Alphabet is DNA
        # non-character alphabet
        p = Profile(array([[0.1, 0.2], [0.4, 0.3]]), Alphabet=[7, 3], CharOrder=[3, 7])
        self.assertEqual(p.CharOrder, [3, 7])
        self.assertEqual(p.Alphabet, [7, 3])
        self.assertEqual(p.Data, [[0.1, 0.2], [0.4, 0.3]])

    def test_str(self):
        """__str__: should return string representation of data in profile
        """
        self.assertEqual(str(self.empty_row), str(array([[1, 1], [0, 0]])))

    def test_make_translation_table(self):
        """_make_translation_table: should return correct table from char order
        """
        p = Profile(array([[0.2, 0.8], [0.7, 0.3]]), "ABCDE", "AB")
        self.assertEqual(translate("ABBA", p._translation_table), "\x00\x01\x01\x00")

    def test_hasValidData(self):
        """hasValidData: should work on full and empty profiles"""
        full = self.full.copy()
        full.normalizePositions()
        self.assertEqual(full.hasValidData(), True)
        self.assertEqual(self.empty_row.hasValidData(), False)
        self.assertEqual(self.empty.hasValidData(), False)

    def test_hasValidAttributes(self):
        """hasValidAttributes: should work for different alphabets/char orders
        """
        p = Profile(array([[1, 2], [3, 4]]), Alphabet="ABCD", CharOrder="BAC")
        # self.Data doesn't match len(CharOrder)
        self.assertEqual(p.hasValidAttributes(), False)
        p = Profile(array([[1, 2], [3, 4]]), Alphabet="ABCD", CharOrder="AX")
        # not all chars in CharOrder in Alphabet
        self.assertEqual(p.hasValidAttributes(), False)
        p = Profile(array([[1, 2], [3, 4]]), Alphabet="ABCD", CharOrder="CB")
        # should be fine
        self.assertEqual(p.hasValidAttributes(), True)

    def test_isValid(self):
        """isValid: should work as expected"""
        # everything valid
        p1 = Profile(array([[0.3, 0.7], [0.8, 0.2]]), Alphabet="AB", CharOrder="AB")
        # invalid data, valid attributes
        p2 = Profile(array([[1, 2], [3, 4]]), Alphabet="ABCD", CharOrder="BA")
        # invalid attributes, valid data
        p3 = Profile(array([[0.3, 0.7], [0.8, 0.2]]), Alphabet="ABCD", CharOrder="AF")

        self.assertEqual(p1.isValid(), True)
        self.assertEqual(p2.isValid(), False)
        self.assertEqual(p3.isValid(), False)

    def test_dataAt(self):
        """dataAt: should work on valid position and character"""
        p = Profile(array([[0.2, 0.4, 0.4, 0], [0.1, 0, 0.9, 0], [0.1, 0.2, 0.3, 0.4]]), Alphabet="TCAG")
        self.assertEqual(p.dataAt(0, "C"), 0.4)
        self.assertEqual(p.dataAt(1, "T"), 0.1)
        self.assertRaises(ProfileError, p.dataAt, 1, "U")
        self.assertRaises(ProfileError, p.dataAt, -2, "T")
        self.assertRaises(ProfileError, p.dataAt, 5, "T")

    def test_copy(self):
        """copy: should act as expected while rebinding/modifying attributes
        """
        p = Profile(array([[1, 1], [0.7, 0.3]]), {"A": "A", "G": "G", "R": "AG"}, "AG")
        p_copy = p.copy()
        assert p.Data is p_copy.Data
        assert p.Alphabet is p_copy.Alphabet
        assert p.CharOrder is p_copy.CharOrder

        # modifying p.Data modifies p_copy.Data
        p.Data[1, 1] = 100
        assert p.Alphabet is p_copy.Alphabet

        # normalizing p.Data rebinds it, so p_copy.Data is unchanged
        p.normalizePositions()
        assert not p.Data is p_copy.Data

        # Adding something to the alphabet changes both p and p_copy
        p.Alphabet["Y"] = "TC"
        assert p.Alphabet is p_copy.Alphabet

        # Rebinding the CharOrder does only change the original
        p.CharOrder = "XX"
        assert not p.CharOrder is p_copy.CharOrder

    def test_normalizePositions(self):
        """normalizePositions: should normalize or raise appropriate error
        """
        p = self.full.copy()
        p.normalizePositions()
        self.assertEqual(p.Data, array([[2 / 6, 4 / 6], [3 / 8, 5 / 8], [4 / 12, 8 / 12]]))
        self.assertEqual(sum(p.Data, 1), [1, 1, 1])
        p = self.empty_col.copy()
        p.normalizePositions()
        self.assertEqual(p.Data, array([[0, 1], [0, 1]]))
        p = self.empty_row.copy()
        self.assertRaises(ProfileError, p.normalizePositions)
        p = Profile(array([[0.0, 0.0]]), "AB")
        self.assertRaises(ProfileError, p.normalizePositions)

        # negative numbers!!!!!!
        p1 = Profile(array([[3, -2], [4, -3]]), "AB")
        p1.normalizePositions()
        self.assertEqual(p1.Data, array([[3, -2], [4, -3]]))
        p2 = Profile(array([[3, -3], [4, -3]]), "AB")
        self.assertRaises(ProfileError, p2.normalizePositions)

    def test_normalizeSequences(self):
        """normalizeSequences: should normalize or raise appropriate error
        """
        p = self.full.copy()
        p.normalizeSequences()
        self.assertEqual(p.Data, array([[2 / 9, 4 / 17], [3 / 9, 5 / 17], [4 / 9, 8 / 17]]))
        self.assertEqual(sum(p.Data, axis=0), [1, 1])
        p = self.empty_row.copy()
        p.normalizeSequences()
        self.assertEqual(p.Data, array([[1, 1], [0, 0]]))
        p = self.empty_col.copy()
        self.assertRaises(ProfileError, p.normalizeSequences)
        p = Profile(array([[0.0], [0.0]]), "AB")
        self.assertRaises(ProfileError, p.normalizeSequences)

        # negative numbers!!!!!!
        p1 = Profile(array([[3, 4], [-2, -3]]), "AB")
        p1.normalizeSequences()
        self.assertEqual(p1.Data, array([[3, 4], [-2, -3]]))
        p2 = Profile(array([[3, 4], [-3, -3]]), "AB")
        self.assertRaises(ProfileError, p2.normalizeSequences)

    def test_prettyPrint_without_parameters(self):
        """prettyPrint: should work without parameters passed in"""
        p = self.full
        self.assertEqual(p.prettyPrint(), "2\t4\n3\t5\n4\t8")
        self.assertEqual(p.prettyPrint(include_header=True), "A\tB\n2\t4\n3\t5\n4\t8")
        self.assertEqual(p.prettyPrint(transpose_data=True), "2\t3\t4\n4\t5\t8")
        self.assertEqual(p.prettyPrint(include_header=True, transpose_data=True), "A\t2\t3\t4\nB\t4\t5\t8")
        # empty
        self.assertEqual(self.empty.prettyPrint(), "")
        self.assertEqual(self.empty.prettyPrint(transpose_data=True), "")

        # it will still print with invalid data (e.g if len(CharOrder)
        # doesn't match the data
        p = self.full.copy()
        p.CharOrder = "ABC"

        self.assertEqual(p.prettyPrint(include_header=True), "A\tB\tC\n2\t4\t \n3\t5\t \n4\t8\t ")
        # it will truncate the CharOrder if data is transposed
        # and CharOrder is longer then the number of rows in the
        # transposed data
        self.assertEqual(p.prettyPrint(include_header=True, transpose_data=True), "A\t2\t3\t4\nB\t4\t5\t8")

    def test_prettyPrint_four_cases(self):
        """prettyPrint: with/without header/transpose/limit"""
        p = self.full
        p = self.pp
        self.assertEqual(p.prettyPrint(), "1\t 2\t 3\t 4\n5\t 6\t 7\t 8\n9\t10\t11\t12")
        self.assertEqual(p.prettyPrint(column_limit=3), "1\t 2\t 3\n5\t 6\t 7\n9\t10\t11")
        self.assertEqual(
            p.prettyPrint(column_limit=3, include_header=True), "A\t B\t C\n1\t 2\t 3\n5\t 6\t 7\n9\t10\t11"
        )
        self.assertEqual(
            p.prettyPrint(column_limit=3, include_header=False, transpose_data=True),
            "1\t5\t 9\n2\t6\t10\n3\t7\t11\n4\t8\t12",
        )
        self.assertEqual(
            p.prettyPrint(column_limit=2, include_header=False, transpose_data=True), "1\t5\n2\t6\n3\t7\n4\t8"
        )
        self.assertEqual(
            p.prettyPrint(column_limit=3, include_header=True, transpose_data=True),
            "A\t1\t5\nB\t2\t6\nC\t3\t7\nD\t4\t8",
        )

    def test_reduce_wrong_size(self):
        """reduce: should fail when profiles have different sizes"""
        p1 = Profile(array([[1, 0], [0, 1]]), Alphabet="AB")
        p2 = Profile(array([[1, 0, 0], [1, 0, 0]]), Alphabet="ABC")
        self.assertRaises(ProfileError, p1.reduce, p2)

    def test_reduce_normalization_error(self):
        """reduce: fails when input or output can't be normalized"""
        # Will raise errors when input data can't be normalized
        self.assertRaises(ProfileError, self.empty.reduce, self.empty, add)
        self.assertRaises(ProfileError, self.full.reduce, self.empty_row, add)

        # don't normalize input, but do normalize output
        # fails when one row adds up to zero
        p1 = Profile(array([[3, 3], [4, 4]]), "AB")
        p2 = Profile(array([[3, 3], [-4, -4]]), "AB")
        self.assertRaises(ProfileError, p1.reduce, p2, add, False, True)

    def test_reduce_operators(self):
        """reduce: should work fine with different operators
        """
        # different operators, normalize input, don't normalize output
        p1 = Profile(array([[1, 0, 0], [0, 1, 0]]), Alphabet="ABC")
        p2 = Profile(array([[1, 0, 0], [0, 0, 1]]), Alphabet="ABC")

        self.assertEqual(p1.reduce(p2).Data, array([[1, 0, 0], [0, 0.5, 0.5]]))
        self.assertEqual(
            p1.reduce(p2, add, normalize_input=True, normalize_output=False).Data, array([[2, 0, 0], [0, 1, 1]])
        )
        self.assertEqual(
            p1.reduce(p2, subtract, normalize_input=True, normalize_output=False).Data, array([[0, 0, 0], [0, 1, -1]])
        )
        self.assertEqual(
            p1.reduce(p2, multiply, normalize_input=True, normalize_output=False).Data, array([[1, 0, 0], [0, 0, 0]])
        )

        self.assertRaises(ProfileError, p1.reduce, p2, divide, normalize_input=True, normalize_output=False)

        # don't normalize and normalize only input
        p3 = Profile(array([[1, 2], [3, 4]]), Alphabet="AB")
        p4 = Profile(array([[4, 3], [2, 1]]), Alphabet="AB")

        self.assertEqual(
            p3.reduce(p4, add, normalize_input=False, normalize_output=False).Data, array([[5, 5], [5, 5]])
        )
        self.assertFloatEqual(
            p3.reduce(p4, add, normalize_input=True, normalize_output=False).Data,
            array([[19 / 21, 23 / 21], [23 / 21, 19 / 21]]),
        )

        # normalize input and output
        p5 = Profile(array([[1, 1, 0, 0], [1, 1, 1, 1]]), Alphabet="ABCD")
        p6 = Profile(array([[1, 0, 0, 0], [1, 0, 0, 1]]), Alphabet="ABCD")

        self.assertEqual(
            p5.reduce(p6, add, normalize_input=True, normalize_output=True).Data,
            array([[0.75, 0.25, 0, 0], [0.375, 0.125, 0.125, 0.375]]),
        )

        # it can collapse empty profiles when normalizing is turned off
        self.assertEqual(
            self.empty.reduce(self.empty, normalize_input=False, normalize_output=False).Data.tolist(), [[]]
        )

        # more specific tests of the operators will be in the
        # separate functions

    def test__add_(self):
        """__add__: should not normalize input or output, just add"""
        p1 = Profile(array([[0.3, 0.4, 0.1, 0], [0.1, 0.1, 0.1, 0.7]]), Alphabet="ABCD")
        p2 = Profile(array([[1, 0, 0, 0], [1, 0, 0, 1]]), Alphabet="ABCD")
        self.assertEqual((p1 + p2).Data, array([[1.3, 0.4, 0.1, 0], [1.1, 0.1, 0.1, 1.7]]))
        self.assertRaises(ProfileError, self.empty.__add__, p1)
        self.assertEqual((self.empty + self.empty).Data.tolist(), [[]])

    def test__sub_(self):
        """__sub__: should subtract two profiles, no normalization"""
        p1 = Profile(array([[0.3, 0.4, 0.1, 0], [0.1, 0.1, 0.1, 0.7]]), Alphabet="ABCD")
        p2 = Profile(array([[1, 0, 0, 0], [1, 0, 0, 1]]), Alphabet="ABCD")
        self.assertFloatEqual((p1 - p2).Data, array([[-0.7, 0.4, 0.1, 0], [-0.9, 0.1, 0.1, -0.3]]))

    def test__mul_(self):
        """__mul__: should multiply two profiles, no normalization"""
        p1 = Profile(array([[1, -2, 3, 0], [1, 1, 1, 0.5]]), Alphabet="ABCD")
        p2 = Profile(array([[1, 0, 0, 0], [1, 0, 3, 2]]), Alphabet="ABCD")
        self.assertEqual((p1 * p2).Data, array([[1, 0, 0, 0], [1, 0, 3, 1]]))

    def test__div_(self):
        """__div__ and __truediv__: always true division b/c __future__.division
        """
        p1 = Profile(array([[2, 3], [4, 5]]), "AB")
        p2 = Profile(array([[1, 0], [4, 5]]), "AB")  # Int 0
        p3 = Profile(array([[1, 0.0], [4, 5]]), "AB")  # Float 0.0
        p4 = Profile(array([[1, 2], [8.0, 5]]), "AB")  # Float 0.0

        self.assertRaises(ProfileError, p1.__truediv__, p2)
        # infinity in result data
        self.assertRaises(ProfileError, p1.__div__, p3)
        self.assertFloatEqual((p1.__div__(p4)).Data, array([[2, 1.5], [0.5, 1]]))

    def test_distance(self):
        """distance: should return correct distance between the profiles
        """
        p1 = Profile(array([[2, 4], [3, 1]]), "AB")
        p2 = Profile(array([[4, 6], [5, 3]]), "AB")
        p3 = Profile(array([[4, 6], [5, 3], [1, 1]]), "AB")
        p4 = Profile(array([2, 2]), "AB")
        p5 = Profile(array([2, 2, 2]), "AB")
        p6 = Profile(array([[]]), "AB")

        self.assertEqual(p1.distance(p2), 4)
        self.assertEqual(p2.distance(p1), 4)
        self.assertEqual(p1.distance(p4), sqrt(6))
        self.assertEqual(p6.distance(p6), 0)

        # Raises error when frames are not aligned
        self.assertRaises(ProfileError, p1.distance, p3)
        self.assertRaises(ProfileError, p1.distance, p5)

    def test_toOddsMatrix(self):
        """toOddsMatrix: should work on valid data or raise an error
        """
        p = Profile(
            array(
                [
                    [0.1, 0.3, 0.5, 0.1],
                    [0.25, 0.25, 0.25, 0.25],
                    [0.05, 0.8, 0.05, 0.1],
                    [0.7, 0.1, 0.1, 0.1],
                    [0.6, 0.15, 0.05, 0.2],
                ]
            ),
            Alphabet="ACTG",
        )
        p_exp = Profile(
            array([[0.4, 1.2, 2, 0.4], [1, 1, 1, 1], [0.2, 3.2, 0.2, 0.4], [2.8, 0.4, 0.4, 0.4], [2.4, 0.6, 0.2, 0.8]]),
            Alphabet="ACTG",
        )
        self.assertEqual(p.toOddsMatrix().Data, p_exp.Data)
        assert p.Alphabet is p.toOddsMatrix().Alphabet
        self.assertEqual(p.toOddsMatrix([0.25, 0.25, 0.25, 0.25]).Data, p_exp.Data)

        # fails if symbol_freqs has wrong size
        self.assertRaises(ProfileError, p.toOddsMatrix, [0.25, 0.25, 0.25, 0.25, 0.25, 0.25])
        self.assertRaises(ProfileError, self.zero_entry.toOddsMatrix, [0.1, 0.2, 0.3])
        # works on empty profile
        self.assertEqual(self.empty.toOddsMatrix().Data.tolist(), [[]])
        # works with different input
        self.assertEqual(self.zero_entry.toOddsMatrix().Data, array([[1.2, 0.8, 0, 2], [0, 0, 3.2, 0.8]]))
        self.assertFloatEqual(
            self.zero_entry.toOddsMatrix([0.1, 0.2, 0.3, 0.4]).Data, array([[3, 1, 0, 1.25], [0, 0, 2.667, 0.5]]), 1e-3
        )
        # fails when one of the background frequencies is 0
        self.assertRaises(ProfileError, self.zero_entry.toOddsMatrix, [0.1, 0.2, 0.3, 0])

    def test_toLogOddsMatrix(self):
        """toLogOddsMatrix: should work as expected"""
        # This test can be short, because it mainly depends on toOddsMatrix
        # for which everything has been tested
        p = Profile(
            array(
                [
                    [0.1, 0.3, 0.5, 0.1],
                    [0.25, 0.25, 0.25, 0.25],
                    [0.05, 0.8, 0.05, 0.1],
                    [0.7, 0.1, 0.1, 0.1],
                    [0.6, 0.15, 0.05, 0.2],
                ]
            ),
            Alphabet="ACTG",
        )
        p_exp = Profile(
            array(
                [
                    [-1.322, 0.263, 1.0, -1.322],
                    [0.0, 0.0, 0.0, 0.0],
                    [-2.322, 1.678, -2.322, -1.322],
                    [1.485, -1.322, -1.322, -1.322],
                    [1.263, -0.737, -2.322, -0.322],
                ]
            ),
            Alphabet="ACTG",
        )
        self.assertFloatEqual(p.toLogOddsMatrix().Data, p_exp.Data, eps=1e-3)
        # works on empty matrix
        self.assertEqual(self.empty.toLogOddsMatrix().Data.tolist(), [[]])

    def test__score_indices(self):
        """_score_indices: should work on valid input"""
        self.assertEqual(self.score1._score_indices(array([0, 1, 1, 3, 0, 3]), offset=0), [6, 2, -3, 0])
        self.assertFloatEqual(
            self.score2._score_indices(array([3, 1, 2, 0, 2, 2, 3]), offset=0), [0.3, 1.4, 0.8, 1.4, 1.7]
        )
        self.assertFloatEqual(self.score2._score_indices(array([3, 1, 2, 0, 2, 2, 3]), offset=3), [1.4, 1.7])
        # Errors will be raised on invalid input. Errors are not handled
        # in this method. Validation of the input is done elsewhere
        self.assertRaises(IndexError, self.score2._score_indices, array([3, 1, 63, 0, 4, 2, 3]), offset=3)

    def test__score_profile(self):
        """_score_profile: should work on valid input"""
        p1 = Profile(
            array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0.5, 0.5], [0, 0, 0, 1], [0.25, 0.25, 0.25, 0.25]]), "TCAG"
        )
        p2 = Profile(
            array(
                [[0, 1, 0, 0], [0.2, 0, 0.8, 0], [0, 0, 0.5, 0.5], [1 / 3, 1 / 3, 0, 1 / 3], [0.25, 0.25, 0.25, 0.25]]
            ),
            "TCAG",
        )

        self.assertFloatEqual(self.score2._score_profile(p1, offset=0), [0.55, 1.25, 0.45])
        self.assertFloatEqual(self.score2._score_profile(p1, offset=2), [0.45])
        self.assertFloatEqual(self.score2._score_profile(p2, offset=0), [1.49, 1.043, 0.483], 1e-3)

        # Errors will be raised on invalid input. Errors are not handled
        # in this method. Validation of the input is done elsewhere
        # In this case you don't get an error, but for sure an unexpected
        # result
        self.assertFloatEqual(self.score2._score_profile(p1, offset=3).tolist(), [])

    def test_score_sequence(self):
        """score: should work correctly for Sequence as input
        """
        # works on normal valid data
        s1 = self.score1.score("ATTCAC", offset=0)
        self.assertEqual(s1, [6, 2, -3, 0])
        self.assertFloatEqual(self.score2.score("TCAAGT", offset=0), [0.5, 1.6, 1.7, 0.5])
        # works with different offset
        self.assertFloatEqual(self.score2.score("TCAAGT", offset=2), [1.7, 0.5])
        self.assertFloatEqual(self.score2.score("TCAAGT", offset=3), [0.5])
        # raises error on invalid offset
        self.assertRaises(ProfileError, self.score2.score, "TCAAGT", offset=4)
        # works on seq of minimal length
        self.assertFloatEqual(self.score2.score("AGT", offset=0), [0.5])
        # raises error when sequence is too short
        self.assertRaises(ProfileError, self.score2.score, "", offset=0)
        # raises error on empty profile
        self.assertRaises(ProfileError, self.empty.score, "ACGT")
        # raises error when sequence contains characters that
        # are not in the characterorder
        self.assertRaises(ProfileError, self.score2.score, "ACBRT")

    def test_score_sequence_object(self):
        """score: should work correctly on Sequence object as input
        """
        # DnaSequence object
        ds = self.score1.score(DNA.Sequence("ATTCAC"), offset=0)
        self.assertEqual(ds, [6, 2, -3, 0])
        # ModelSequence object
        ms = self.score1.score(ModelSequence("ATTCAC", Alphabet=DNA.Alphabet), offset=0)
        self.assertEqual(ms, [6, 2, -3, 0])

    def test_score_no_trans_table(self):
        """score: should work when no translation table is present
        """
        p = Profile(Data=array([[-1, 0, 1, 2], [-2, 2, 0, 0], [-3, 5, 1, 0]]), Alphabet=DNA, CharOrder="ATGC")
        # remove translation table
        del p.__dict__["_translation_table"]
        # then score the profile
        s1 = p.score(DNA.Sequence("ATTCAC"), offset=0)
        self.assertEqual(s1, [6, 2, -3, 0])

    def test_score_profile(self):
        """score: should work correctly for Profile as input
        """
        p1 = Profile(
            array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0.5, 0.5], [0, 0, 0, 1], [0.25, 0.25, 0.25, 0.25]]), "TCAG"
        )
        p2 = Profile(
            array(
                [[0, 1, 0, 0], [0.2, 0, 0.8, 0], [0, 0, 0.5, 0.5], [1 / 3, 1 / 3, 0, 1 / 3], [0.25, 0.25, 0.25, 0.25]]
            ),
            "TCAG",
        )
        p3 = Profile(array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1]]), "TCAG")
        p4 = Profile(array([[1, 0, 0, 0], [0, 1, 0, 0]]), "TCAG")
        p5 = Profile(array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1]]), "AGTC")

        # works on normal valid data
        self.assertFloatEqual(self.score2.score(p1, offset=0), [0.55, 1.25, 0.45])
        self.assertFloatEqual(self.score2.score(p2, offset=0), [1.49, 1.043, 0.483], 1e-3)
        # works with different offset
        self.assertFloatEqual(self.score2.score(p1, offset=1), [1.25, 0.45])
        self.assertFloatEqual(self.score2.score(p1, offset=2), [0.45])
        # raises error on invalid offset
        self.assertRaises(ProfileError, self.score2.score, p1, offset=3)
        # works on profile of minimal length
        self.assertFloatEqual(self.score2.score(p3, offset=0), [0.6])
        # raises error when profile is too short
        self.assertRaises(ProfileError, self.score2.score, p4, offset=0)
        # raises error on empty profile
        self.assertRaises(ProfileError, self.empty.score, p1)
        # raises error when character order doesn't match
        self.assertRaises(ProfileError, self.score2.score, p5)

    def test_rowUncertainty(self):
        """rowUncertainty: should handle full and empty profiles
        """
        p = Profile(array([[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]]), "ABCD")
        self.assertEqual(p.rowUncertainty(), [2, 1])

        # for empty rows 0 is returned as the uncertainty
        self.assertEqual(self.empty.rowUncertainty().tolist(), [])
        p = Profile(array([[], [], []]), "")
        self.assertEqual(p.rowUncertainty().tolist(), [])
        # doesn't work on 1D array
        self.assertRaises(ProfileError, self.oned.rowUncertainty)

    def test_columnUncertainty(self):
        """columnUncertainty: should handle full and empty profiles
        """
        p = Profile(array([[0.25, 0.5], [0.25, 0.5], [0.25, 0], [0.25, 0]]), "AB")
        self.assertEqual(p.columnUncertainty(), [2, 1])
        # for empty cols nothing is returned as the uncertainty
        self.assertEqual(self.empty.columnUncertainty().tolist(), [])
        p = Profile(array([[], [], []]), "")
        self.assertEqual(p.columnUncertainty().tolist(), [])
        # doesn't work on 1D array
        self.assertRaises(ProfileError, self.oned.columnUncertainty)

    def test_rowDegeneracy(self):
        """rowDegneracy: should work as expected"""
        p1 = self.consensus
        p2 = self.not_same_value

        self.assertEqual(p1.rowDegeneracy(), [1, 1, 1, 2, 1])
        self.assertEqual(p1.rowDegeneracy(cutoff=0.5), [1, 1, 1, 2, 1])
        self.assertEqual(p1.rowDegeneracy(cutoff=0.75), [1, 2, 1, 3, 2])
        # when a row seems to add up to the cutoff value, it's not
        # always found because of floating point error. E.g. second row
        # in this example
        self.assertEqual(p1.rowDegeneracy(cutoff=1), [2, 4, 1, 4, 2])
        # when the cutoff can't be found, the number of columns in the
        # profile is returned (for each row)
        self.assertEqual(p1.rowDegeneracy(cutoff=1.5), [4, 4, 4, 4, 4])

        self.assertEqual(p2.rowDegeneracy(cutoff=0.95), [4, 2, 4, 1])
        self.assertEqual(p2.rowDegeneracy(cutoff=1.4), [4, 3, 4, 1])

        self.assertEqual(self.empty.rowDegeneracy(), [])

    def test_columnDegeneracy(self):
        """columnDegeneracy: shoudl work as expected"""
        p1 = self.consensus
        p1.Data = transpose(p1.Data)
        p2 = self.not_same_value
        p2.Data = transpose(p2.Data)
        p1d = p1.columnDegeneracy()
        self.assertEqual(p1d, [1, 1, 1, 2, 1])
        self.assertEqual(p1.columnDegeneracy(cutoff=0.5), [1, 1, 1, 2, 1])
        self.assertEqual(p1.columnDegeneracy(cutoff=0.75), [1, 2, 1, 3, 2])
        # when a row seems to add up to the cutoff value, it's not
        # always found because of floating point error. E.g. second row
        # in this example
        self.assertEqual(p1.columnDegeneracy(cutoff=1), [2, 4, 1, 4, 2])
        # when the cutoff can't be found, the number of rows in the
        # profile is returned (for each column)
        self.assertEqual(p1.columnDegeneracy(cutoff=1.5), [4, 4, 4, 4, 4])

        self.assertEqual(p2.columnDegeneracy(cutoff=0.95), [4, 2, 4, 1])
        self.assertEqual(p2.columnDegeneracy(cutoff=1.4), [4, 3, 4, 1])

        self.assertEqual(self.empty.columnDegeneracy(), [])

    def test_rowMax(self):
        """rowMax should return max value in each row"""
        p1 = self.consensus
        obs = p1.rowMax()
        self.assertEqual(obs, array([0.8, 0.7, 1, 0.4, 0.5]))

    def test_toConsensus(self):
        """toConsensus: should work with all the different options
        """
        p = self.consensus
        self.assertEqual(p.toConsensus(fully_degenerate=False), "AGGAT")
        self.assertEqual(p.toConsensus(fully_degenerate=True), "WVGNY")
        self.assertEqual(p.toConsensus(cutoff=0.75), "ARGHY")
        self.assertEqual(p.toConsensus(cutoff=0.95), "WVGNY")
        self.assertEqual(p.toConsensus(cutoff=2), "WVGNY")

        p = self.not_same_value
        self.assertEqual(p.toConsensus(fully_degenerate=False), "CGTA")
        self.assertEqual(p.toConsensus(fully_degenerate=True), "NBYA")
        self.assertEqual(p.toConsensus(cutoff=0.75), "YSYA")
        self.assertEqual(p.toConsensus(cutoff=2), "NBYA")
        self.assertEqual(p.toConsensus(cutoff=5), "NBYA")

        # when you specify both fully_generate and a cutoff value
        # the cutoff takes priority and is used in the calculation
        self.assertEqual(p.toConsensus(cutoff=0.75, fully_degenerate=True), "YSYA")

        # raises AttributeError when Alphabet doens't have Degenerates
        p = Profile(array([[0.2, 0.8], [0.7, 0.3]]), "AB")
        self.assertRaises(AttributeError, p.toConsensus, cutoff=0.5)

    def test_toConsensus_include_all(self):
        """toConsensus: Should include all possibilities when include_all=True
        """
        p1 = Profile(
            array([[0.2, 0, 0.8, 0], [0, 0.1, 0.2, 0.7], [0, 0, 0, 1], [0.2, 0.3, 0.4, 0.1], [0.5, 0.5, 0, 0]]),
            Alphabet=DNA,
            CharOrder="TCAG",
        )
        self.assertEqual(p1.toConsensus(cutoff=0.4, include_all=True), "AGGAY")
        p2 = Profile(
            array([[0.25, 0.25, 0.25, 0.25], [0.1, 0.1, 0.1, 0], [0.4, 0, 0.4, 0], [0, 0.2, 0.2, 0.3]]),
            Alphabet=DNA,
            CharOrder="TCAG",
        )
        self.assertEqual(p2.toConsensus(cutoff=0.4, include_all=True), "NHWV")

    def test_randomIndices(self):
        """randomIndices: 99% of new frequencies should be within 3*SD
        """
        r_num, c_num = 100, 20
        num_elements = r_num * c_num
        r = random([r_num, c_num])
        p = Profile(r, "A" * c_num)
        p.normalizePositions()
        d = p.Data
        n = 1000

        # Test only works on normalized profile, b/c of 1-d below
        means = n * d
        three_stds = sqrt(d * (1 - d) * n) * 3
        result = [p.randomIndices() for x in range(n)]
        a = Alignment(transpose(result))

        def absoluteProfile(alignment, char_order):
            f = a.columnFreqs()
            res = zeros([len(f), len(char_order)])
            for row, freq in enumerate(f):
                for i in freq:
                    res[row, ord(i)] = freq[i]
            return res

        ap = absoluteProfile(a, p.CharOrder)
        failure = abs(ap - means) > three_stds
        assert sum(sum(failure)) / num_elements <= 0.01

    def test_randomSequence(self):
        """randomSequence: 99% of new frequencies should be within 3*SD"""
        r_num, c_num = 100, 20
        num_elements = r_num * c_num
        alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        r = random([r_num, c_num])
        p = Profile(r, alpha[:c_num])
        p.normalizePositions()
        d = p.Data
        n = 1000

        # Test only works on normalized profile, b/c of 1-d below
        means = n * d
        three_stds = sqrt(d * (1 - d) * n) * 3

        a = Alignment([p.randomSequence() for x in range(n)])

        def absoluteProfile(alignment, char_order):
            f = a.columnFreqs()
            res = zeros([len(f), len(char_order)])
            for row, freq in enumerate(f):
                for i in freq:
                    col = char_order.index(i)
                    res[row, col] = freq[i]
            return res

        ap = absoluteProfile(a, p.CharOrder)
        failure = abs(ap - means) > three_stds
        assert sum(sum(failure)) / num_elements <= 0.01
Exemple #4
0
class ProfileTests(TestCase):
    """Tests for Profile object"""

    def setUp(self):
        """setUp method for all Profile tests"""
        self.full = Profile(array([[2,4],[3,5],[4,8]]),"AB")
        self.empty = Profile(array([[]]),"AB")
        self.empty_row = Profile(array([[1,1],[0,0]]), "AB")
        self.empty_col = Profile(array([[0,1],[0,1]]), "AB")
        self.consensus = Profile(array([[.2,0,.8,0],[0,.1,.2,.7],[0,0,0,1],\
            [.2,.3,.4,.1],[.5,.5,0,0]]),\
            Alphabet=DNA, CharOrder="TCAG")
        self.not_same_value = Profile(array([[.3,.5,.1,.1],[.4,.6,0,.7],\
            [.3,.2,0,0],[0,0,4,0]]),Alphabet=DNA, CharOrder="TCAG")
        self.zero_entry = Profile(array([[.3,.2,0,.5],[0,0,.8,.2]]),\
            Alphabet="UCAG")
        self.score1 = Profile(Data=array([[-1,0,1,2],[-2,2,0,0],[-3,5,1,0]]),\
            Alphabet=DNA, CharOrder="ATGC")
        self.score2 = Profile(array([[.2,.4,.4,0],[.1,0,.9,0],[.1,.2,.3,.4]]),\
            Alphabet="TCAG")
        self.oned = Profile(array([.25,.25,.25,.25]),"ABCD")
        self.pp = Profile(array([[1,2,3,4],[5,6,7,8],[9,10,11,12]]),"ABCD")
        
    def test_init(self):
        """__init__: should set all attributed correctly"""
        self.assertRaises(TypeError, Profile)
        self.assertRaises(TypeError, Profile, array([[2,3]]))
        #only alphabet
        p = Profile(array([[.2,.8],[.7,.3]]),"AB")
        self.assertEqual(p.Data, [[.2,.8],[.7,.3]])
        self.assertEqual(p.Alphabet, "AB")
        self.assertEqual(p.CharOrder, list("AB"))
        self.assertEqual(translate("ABBA",p._translation_table),
            "\x00\x01\x01\x00")
        #alphabet and char order
        p = Profile(array([[.1,.2],[.4,.3]]),Alphabet=DNA,
            CharOrder="AG")
        self.assertEqual(p.CharOrder,"AG")
        assert p.Alphabet is DNA
        #non-character alphabet        
        p = Profile(array([[.1,.2],[.4,.3]]),Alphabet=[7,3],
            CharOrder=[3,7])
        self.assertEqual(p.CharOrder,[3,7])
        self.assertEqual(p.Alphabet, [7,3])
        self.assertEqual(p.Data, [[.1,.2],[.4,.3]])
   
    def test_str(self):
        """__str__: should return string representation of data in profile
        """
        self.assertEqual(str(self.empty_row),str(array([[1,1],[0,0]])))

    def test_make_translation_table(self):
        """_make_translation_table: should return correct table from char order
        """
        p = Profile(array([[.2,.8],[.7,.3]]),"ABCDE","AB")
        self.assertEqual(translate("ABBA",p._translation_table),
            "\x00\x01\x01\x00")

    def test_hasValidData(self):
        """hasValidData: should work on full and empty profiles"""
        full = self.full.copy()
        full.normalizePositions()
        self.assertEqual(full.hasValidData(),True)
        self.assertEqual(self.empty_row.hasValidData(),False)
        self.assertEqual(self.empty.hasValidData(),False)

    def test_hasValidAttributes(self):
        """hasValidAttributes: should work for different alphabets/char orders
        """
        p = Profile(array([[1,2],[3,4]]),Alphabet="ABCD", CharOrder="BAC")
        #self.Data doesn't match len(CharOrder)
        self.assertEqual(p.hasValidAttributes(),False)
        p = Profile(array([[1,2],[3,4]]),Alphabet="ABCD", CharOrder="AX")
        #not all chars in CharOrder in Alphabet
        self.assertEqual(p.hasValidAttributes(),False)
        p = Profile(array([[1,2],[3,4]]),Alphabet="ABCD", CharOrder="CB")
        #should be fine
        self.assertEqual(p.hasValidAttributes(),True)

    def test_isValid(self):
        """isValid: should work as expected"""
        #everything valid
        p1 = Profile(array([[.3,.7],[.8,.2]]),Alphabet="AB",CharOrder="AB")
        #invalid data, valid attributes
        p2 = Profile(array([[1,2],[3,4]]),Alphabet="ABCD", CharOrder="BA")
        #invalid attributes, valid data
        p3 = Profile(array([[.3,.7],[.8,.2]]),Alphabet="ABCD",CharOrder="AF")

        self.assertEqual(p1.isValid(),True)
        self.assertEqual(p2.isValid(),False)
        self.assertEqual(p3.isValid(),False)

    def test_dataAt(self):
        """dataAt: should work on valid position and character"""
        p = Profile(array([[.2,.4,.4,0],[.1,0,.9,0],[.1,.2,.3,.4]]),\
            Alphabet="TCAG")
        self.assertEqual(p.dataAt(0,'C'),.4)
        self.assertEqual(p.dataAt(1,'T'),.1)
        self.assertRaises(ProfileError, p.dataAt, 1, 'U')
        self.assertRaises(ProfileError, p.dataAt, -2, 'T')
        self.assertRaises(ProfileError, p.dataAt, 5, 'T')

    def test_copy(self):
        """copy: should act as expected while rebinding/modifying attributes
        """
        p = Profile(array([[1,1],[.7,.3]]),{'A':'A','G':'G','R':'AG'},"AG")
        p_copy = p.copy()
        assert p.Data is p_copy.Data
        assert p.Alphabet is p_copy.Alphabet
        assert p.CharOrder is p_copy.CharOrder
        
        #modifying p.Data modifies p_copy.Data
        p.Data[1,1] = 100
        assert p.Alphabet is p_copy.Alphabet
        
        #normalizing p.Data rebinds it, so p_copy.Data is unchanged
        p.normalizePositions()
        assert not p.Data is p_copy.Data
        
        #Adding something to the alphabet changes both p and p_copy
        p.Alphabet['Y']='TC'
        assert p.Alphabet is p_copy.Alphabet

        #Rebinding the CharOrder does only change the original
        p.CharOrder='XX'
        assert not p.CharOrder is p_copy.CharOrder

    def test_normalizePositions(self):
        """normalizePositions: should normalize or raise appropriate error
        """
        p = self.full.copy()
        p.normalizePositions()
        self.assertEqual(p.Data,array([[2/6,4/6],[3/8,5/8],[4/12,8/12]]))
        self.assertEqual(sum(p.Data,1),[1,1,1])
        p = self.empty_col.copy()
        p.normalizePositions()
        self.assertEqual(p.Data,array([[0,1],[0,1]]))
        p = self.empty_row.copy()
        self.assertRaises(ProfileError,p.normalizePositions)
        p = Profile(array([[0.0,0.0]]),"AB")
        self.assertRaises(ProfileError,p.normalizePositions)

        #negative numbers!!!!!!
        p1 = Profile(array([[3,-2],[4,-3]]),"AB")
        p1.normalizePositions()
        self.assertEqual(p1.Data,array([[3,-2],[4,-3]]))
        p2 = Profile(array([[3,-3],[4,-3]]),"AB")
        self.assertRaises(ProfileError,p2.normalizePositions)

    def test_normalizeSequences(self):
        """normalizeSequences: should normalize or raise appropriate error
        """
        p = self.full.copy()
        p.normalizeSequences()
        self.assertEqual(p.Data,array([[2/9,4/17],[3/9,5/17],[4/9,8/17]]))
        self.assertEqual(sum(p.Data, axis=0),[1,1])
        p = self.empty_row.copy()
        p.normalizeSequences()
        self.assertEqual(p.Data,array([[1,1],[0,0]]))
        p = self.empty_col.copy()
        self.assertRaises(ProfileError,p.normalizeSequences)
        p = Profile(array([[0.0],[0.0]]),"AB")
        self.assertRaises(ProfileError,p.normalizeSequences)

        #negative numbers!!!!!!
        p1 = Profile(array([[3,4],[-2,-3]]),"AB")
        p1.normalizeSequences()
        self.assertEqual(p1.Data,array([[3,4],[-2,-3]]))
        p2 = Profile(array([[3,4],[-3,-3]]),"AB")
        self.assertRaises(ProfileError,p2.normalizeSequences)

    def test_prettyPrint_without_parameters(self):
        """prettyPrint: should work without parameters passed in"""
        p = self.full
        self.assertEqual(p.prettyPrint(),"2\t4\n3\t5\n4\t8")
        self.assertEqual(p.prettyPrint(include_header=True),\
            "A\tB\n2\t4\n3\t5\n4\t8")
        self.assertEqual(p.prettyPrint(transpose_data=True),\
            "2\t3\t4\n4\t5\t8")
        self.assertEqual(p.prettyPrint(include_header=True,\
            transpose_data=True),"A\t2\t3\t4\nB\t4\t5\t8")
        #empty
        self.assertEqual(self.empty.prettyPrint(),"")
        self.assertEqual(self.empty.prettyPrint(transpose_data=True),"")
        
        #it will still print with invalid data (e.g if len(CharOrder)
        #doesn't match the data
        p = self.full.copy()
        p.CharOrder="ABC"
        
        self.assertEqual(p.prettyPrint(include_header=True),\
            "A\tB\tC\n2\t4\t \n3\t5\t \n4\t8\t ")
        #it will truncate the CharOrder if data is transposed
        #and CharOrder is longer then the number of rows in the 
        #transposed data
        self.assertEqual(p.prettyPrint(include_header=True,\
            transpose_data=True),"A\t2\t3\t4\nB\t4\t5\t8")

    def test_prettyPrint_four_cases(self):
        """prettyPrint: with/without header/transpose/limit"""
        p = self.full
        p = self.pp
        self.assertEqual(p.prettyPrint(),\
            "1\t 2\t 3\t 4\n5\t 6\t 7\t 8\n9\t10\t11\t12")
        self.assertEqual(p.prettyPrint(column_limit=3),\
            "1\t 2\t 3\n5\t 6\t 7\n9\t10\t11")
        self.assertEqual(p.prettyPrint(column_limit=3, include_header=True),\
            "A\t B\t C\n1\t 2\t 3\n5\t 6\t 7\n9\t10\t11")
        self.assertEqual(p.prettyPrint(column_limit=3, include_header=False,\
            transpose_data=True),\
            "1\t5\t 9\n2\t6\t10\n3\t7\t11\n4\t8\t12")
        self.assertEqual(p.prettyPrint(column_limit=2, include_header=False,\
            transpose_data=True),\
            "1\t5\n2\t6\n3\t7\n4\t8")
        self.assertEqual(p.prettyPrint(column_limit=3, include_header=True,\
            transpose_data=True),\
            "A\t1\t5\nB\t2\t6\nC\t3\t7\nD\t4\t8")

    def test_reduce_wrong_size(self):
        """reduce: should fail when profiles have different sizes"""
        p1 = Profile(array([[1,0],[0,1]]),Alphabet="AB")
        p2 = Profile(array([[1,0,0],[1,0,0]]),Alphabet="ABC")
        self.assertRaises(ProfileError,p1.reduce,p2)
        
    def test_reduce_normalization_error(self):
        """reduce: fails when input or output can't be normalized"""
        #Will raise errors when input data can't be normalized
        self.assertRaises(ProfileError,self.empty.reduce,self.empty,add)
        self.assertRaises(ProfileError,self.full.reduce,self.empty_row,add)

        #don't normalize input, but do normalize output
        #fails when one row adds up to zero
        p1 = Profile(array([[3,3],[4,4]]),"AB")
        p2 = Profile(array([[3,3],[-4,-4]]),"AB")
        self.assertRaises(ProfileError,p1.reduce,p2,add,False,True)
    
    def test_reduce_operators(self):
        """reduce: should work fine with different operators
        """
        #different operators, normalize input, don't normalize output
        p1 = Profile(array([[1,0,0],[0,1,0]]),Alphabet="ABC")
        p2 = Profile(array([[1,0,0],[0,0,1]]),Alphabet="ABC")

        self.assertEqual(p1.reduce(p2).Data,array([[1,0,0],[0,.5,.5]]))
        self.assertEqual(p1.reduce(p2,add,normalize_input=True,\
            normalize_output=False).Data,array([[2,0,0],[0,1,1]]))
        self.assertEqual(p1.reduce(p2,subtract,normalize_input=True,\
            normalize_output=False).Data,array([[0,0,0],[0,1,-1]]))
        self.assertEqual(p1.reduce(p2,multiply,normalize_input=True,\
            normalize_output=False).Data,array([[1,0,0],[0,0,0]]))
        
        self.assertRaises(ProfileError,p1.reduce,p2,divide,\
            normalize_input=True,normalize_output=False)

        #don't normalize and normalize only input
        p3 = Profile(array([[1,2],[3,4]]),Alphabet="AB")
        p4 = Profile(array([[4,3],[2,1]]),Alphabet="AB")
        
        self.assertEqual(p3.reduce(p4,add,normalize_input=False,\
            normalize_output=False).Data,array([[5,5],[5,5]]))
        self.assertFloatEqual(p3.reduce(p4,add,normalize_input=True,\
            normalize_output=False).Data,array([[19/21,23/21],[23/21,19/21]]))

        #normalize input and output
        p5 = Profile(array([[1,1,0,0],[1,1,1,1]]),Alphabet="ABCD")
        p6 = Profile(array([[1,0,0,0],[1,0,0,1]]),Alphabet="ABCD")

        self.assertEqual(p5.reduce(p6,add,normalize_input=True,\
            normalize_output=True).Data,array([[.75,.25,0,0],\
            [.375,.125,.125,.375]]))
     
        #it can collapse empty profiles when normalizing is turned off
        self.assertEqual(self.empty.reduce(self.empty,\
            normalize_input=False,normalize_output=False).Data.tolist(),[[]])
       
        #more specific tests of the operators will be in the 
        #separate functions

    def test__add_(self):
        """__add__: should not normalize input or output, just add"""
        p1 = Profile(array([[.3,.4,.1,0],[.1,.1,.1,.7]]),Alphabet="ABCD")
        p2 = Profile(array([[1,0,0,0],[1,0,0,1]]),Alphabet="ABCD")
        self.assertEqual((p1+p2).Data, array([[1.3,.4,.1,0],[1.1,.1,.1,1.7]]))
        self.assertRaises(ProfileError,self.empty.__add__, p1)
        self.assertEqual((self.empty + self.empty).Data.tolist(),[[]])

    def test__sub_(self):
        """__sub__: should subtract two profiles, no normalization"""
        p1 = Profile(array([[.3,.4,.1,0],[.1,.1,.1,.7]]),Alphabet="ABCD")
        p2 = Profile(array([[1,0,0,0],[1,0,0,1]]),Alphabet="ABCD")
        self.assertFloatEqual((p1-p2).Data, array([[-.7,.4,.1,0],\
            [-.9,.1,.1,-.3]]))

    def test__mul_(self):
        """__mul__: should multiply two profiles, no normalization"""
        p1 = Profile(array([[1,-2,3,0],[1,1,1,.5]]),Alphabet="ABCD")
        p2 = Profile(array([[1,0,0,0],[1,0,3,2]]),Alphabet="ABCD")
        self.assertEqual((p1*p2).Data, array([[1,0,0,0],\
            [1,0,3,1]]))

    def test__div_(self):
        """__div__ and __truediv__: always true division b/c __future__.division
        """
        p1 = Profile(array([[2,3],[4,5]]),"AB")
        p2 = Profile(array([[1,0],[4,5]]),"AB") #Int 0
        p3 = Profile(array([[1,0.0],[4,5]]),"AB") #Float 0.0
        p4 = Profile(array([[1,2],[8.0,5]]),"AB") #Float 0.0

        self.assertRaises(ProfileError, p1.__truediv__,p2)
        #infinity in result data
        self.assertRaises(ProfileError, p1.__div__, p3)
        self.assertFloatEqual((p1.__div__(p4)).Data, array([[2,1.5],[0.5,1]]))
    
    def test_distance(self):
        """distance: should return correct distance between the profiles
        """
        p1 = Profile(array([[2,4],[3,1]]), "AB")
        p2 = Profile(array([[4,6],[5,3]]), "AB")
        p3 = Profile(array([[4,6],[5,3],[1,1]]), "AB")
        p4 = Profile(array([2,2]),"AB")
        p5 = Profile(array([2,2,2]),"AB")
        p6 = Profile(array([[]]),"AB")

        self.assertEqual(p1.distance(p2),4)
        self.assertEqual(p2.distance(p1),4)
        self.assertEqual(p1.distance(p4),sqrt(6))
        self.assertEqual(p6.distance(p6),0)
        
        #Raises error when frames are not aligned
        self.assertRaises(ProfileError, p1.distance,p3)
        self.assertRaises(ProfileError,p1.distance,p5)

    def test_toOddsMatrix(self):
        """toOddsMatrix: should work on valid data or raise an error
        """
        p = Profile(array([[.1,.3,.5,.1],[.25,.25,.25,.25],\
            [.05,.8,.05,.1],[.7,.1,.1,.1],[.6,.15,.05,.2]]),\
            Alphabet="ACTG")
        p_exp = Profile(array([[.4, 1.2, 2, .4],[1,1,1,1],[.2,3.2,.2,.4],\
            [2.8,.4,.4,.4],[2.4,.6,.2,.8]]),Alphabet="ACTG")
        self.assertEqual(p.toOddsMatrix().Data,p_exp.Data)
        assert p.Alphabet is p.toOddsMatrix().Alphabet
        self.assertEqual(p.toOddsMatrix([.25,.25,.25,.25]).Data,p_exp.Data)
        
        #fails if symbol_freqs has wrong size
        self.assertRaises(ProfileError, p.toOddsMatrix,\
            [.25,.25,.25,.25,.25,.25])
        self.assertRaises(ProfileError, self.zero_entry.toOddsMatrix,\
            [.1,.2,.3])
        #works on empty profile
        self.assertEqual(self.empty.toOddsMatrix().Data.tolist(),[[]])
        #works with different input
        self.assertEqual(self.zero_entry.toOddsMatrix().Data,\
            array([[1.2,.8,0,2],[0,0,3.2,.8]]))
        self.assertFloatEqual(self.zero_entry.toOddsMatrix([.1,.2,.3,.4]).Data,\
            array([[3,1,0,1.25],[0,0,2.667,.5]]),1e-3)
        #fails when one of the background frequencies is 0
        self.assertRaises(ProfileError, self.zero_entry.toOddsMatrix,\
            [.1,.2,.3,0])
        
    def test_toLogOddsMatrix(self):
        """toLogOddsMatrix: should work as expected"""
        #This test can be short, because it mainly depends on toOddsMatrix
        #for which everything has been tested
        p = Profile(array([[.1,.3,.5,.1],[.25,.25,.25,.25],\
            [.05,.8,.05,.1],[.7,.1,.1,.1],[.6,.15,.05,.2]]),\
            Alphabet="ACTG")
        p_exp = Profile(array(\
            [[-1.322, 0.263, 1., -1.322],\
             [ 0., 0., 0., 0.],\
             [-2.322,  1.678, -2.322, -1.322],\
             [ 1.485, -1.322, -1.322, -1.322],\
             [ 1.263, -0.737, -2.322, -0.322]]),\
             Alphabet="ACTG")
        self.assertFloatEqual(p.toLogOddsMatrix().Data,p_exp.Data,eps=1e-3) 
        #works on empty matrix
        self.assertEqual(self.empty.toLogOddsMatrix().Data.tolist(),[[]])

    def test__score_indices(self):
        """_score_indices: should work on valid input"""
        self.assertEqual(self.score1._score_indices(array([0,1,1,3,0,3]),\
            offset=0),[6,2,-3,0])
        self.assertFloatEqual(self.score2._score_indices(\
            array([3,1,2,0,2,2,3]), offset=0),[.3,1.4,.8,1.4,1.7])
        self.assertFloatEqual(self.score2._score_indices(\
            array([3,1,2,0,2,2,3]), offset=3),[1.4,1.7])
        #Errors will be raised on invalid input. Errors are not handled
        #in this method. Validation of the input is done elsewhere
        self.assertRaises(IndexError,self.score2._score_indices,\
            array([3,1,63,0,4,2,3]), offset=3) 
        
    def test__score_profile(self):
        """_score_profile: should work on valid input"""
        p1 = Profile(array([[1,0,0,0],[0,1,0,0],[0,0,.5,.5],[0,0,0,1],\
            [.25,.25,.25,.25]]),"TCAG")
        p2 = Profile(array([[0,1,0,0],[.2,0,.8,0],[0,0,.5,.5],[1/3,1/3,0,1/3],\
            [.25,.25,.25,.25]]),"TCAG")

        self.assertFloatEqual(self.score2._score_profile(p1,offset=0),\
            [.55,1.25,.45])
        self.assertFloatEqual(self.score2._score_profile(p1,offset=2),\
            [.45])
        self.assertFloatEqual(self.score2._score_profile(p2,offset=0),\
            [1.49,1.043,.483],1e-3)

        #Errors will be raised on invalid input. Errors are not handled
        #in this method. Validation of the input is done elsewhere
        #In this case you don't get an error, but for sure an unexpected
        #result
        self.assertFloatEqual(self.score2._score_profile(p1,offset=3).tolist(),\
            [])

    def test_score_sequence(self):
        """score: should work correctly for Sequence as input
        """
       #works on normal valid data
        s1 = self.score1.score("ATTCAC",offset=0)
        self.assertEqual(s1,\
            [6,2,-3,0])
        self.assertFloatEqual(self.score2.score("TCAAGT",offset=0),
            [.5,1.6,1.7,0.5])
        #works with different offset
        self.assertFloatEqual(self.score2.score("TCAAGT",offset=2),
            [1.7,0.5])
        self.assertFloatEqual(self.score2.score("TCAAGT",offset=3),
            [0.5])
        #raises error on invalid offset 
        self.assertRaises(ProfileError,self.score2.score,\
            "TCAAGT",offset=4)
        #works on seq of minimal length
        self.assertFloatEqual(self.score2.score("AGT",offset=0),
            [0.5])
        #raises error when sequence is too short
        self.assertRaises(ProfileError, self.score2.score,"",offset=0)
        #raises error on empty profile
        self.assertRaises(ProfileError,self.empty.score,"ACGT")
        #raises error when sequence contains characters that 
        #are not in the characterorder
        self.assertRaises(ProfileError,self.score2.score,"ACBRT") 

    def test_score_sequence_object(self):
        """score: should work correctly on Sequence object as input
        """
        # DnaSequence object
        ds = self.score1.score(DNA.Sequence("ATTCAC"),offset=0)
        self.assertEqual(ds, [6,2,-3,0])
        # ModelSequence object
        ms = self.score1.score(ModelSequence("ATTCAC", Alphabet=DNA.Alphabet),\
            offset=0)
        self.assertEqual(ms, [6,2,-3,0])

    def test_score_no_trans_table(self):
        """score: should work when no translation table is present
        """
        p = Profile(Data=array([[-1,0,1,2],[-2,2,0,0],[-3,5,1,0]]),\
            Alphabet=DNA, CharOrder="ATGC")
        # remove translation table
        del p.__dict__['_translation_table']
        # then score the profile
        s1 = p.score(DNA.Sequence("ATTCAC"),offset=0)
        self.assertEqual(s1, [6,2,-3,0])

    def test_score_profile(self):
        """score: should work correctly for Profile as input
        """
        p1 = Profile(array([[1,0,0,0],[0,1,0,0],[0,0,.5,.5],[0,0,0,1],\
            [.25,.25,.25,.25]]),"TCAG")
        p2 = Profile(array([[0,1,0,0],[.2,0,.8,0],[0,0,.5,.5],[1/3,1/3,0,1/3],\
            [.25,.25,.25,.25]]),"TCAG")
        p3 = Profile(array([[1,0,0,0],[0,1,0,0],[0,0,0,1]]),"TCAG")
        p4 = Profile(array([[1,0,0,0],[0,1,0,0]]),"TCAG")
        p5 = Profile(array([[1,0,0,0],[0,1,0,0],[0,0,0,1]]),"AGTC")

        #works on normal valid data
        self.assertFloatEqual(self.score2.score(p1,offset=0),\
            [.55,1.25,.45])
        self.assertFloatEqual(self.score2.score(p2,offset=0),
            [1.49,1.043,.483],1e-3)
        #works with different offset
        self.assertFloatEqual(self.score2.score(p1,offset=1),
            [1.25,0.45])
        self.assertFloatEqual(self.score2.score(p1,offset=2),
            [0.45])
        #raises error on invalid offset 
        self.assertRaises(ProfileError,self.score2.score,\
            p1,offset=3)
        #works on profile of minimal length
        self.assertFloatEqual(self.score2.score(p3,offset=0),
            [0.6])
        #raises error when profile is too short
        self.assertRaises(ProfileError, self.score2.score,p4,offset=0)
        #raises error on empty profile
        self.assertRaises(ProfileError,self.empty.score,p1)
        #raises error when character order doesn't match
        self.assertRaises(ProfileError,self.score2.score,p5) 
 
    def test_rowUncertainty(self):
        """rowUncertainty: should handle full and empty profiles
        """
        p = Profile(array([[.25,.25,.25,.25],[.5,.5,0,0]]),"ABCD")
        self.assertEqual(p.rowUncertainty(),[2,1])
        
        #for empty rows 0 is returned as the uncertainty
        self.assertEqual(self.empty.rowUncertainty().tolist(),[])
        p = Profile(array([[],[],[]]),"")
        self.assertEqual(p.rowUncertainty().tolist(),[])
        #doesn't work on 1D array
        self.assertRaises(ProfileError,self.oned.rowUncertainty)
    
    def test_columnUncertainty(self):
        """columnUncertainty: should handle full and empty profiles
        """
        p = Profile(array([[.25,.5],[.25,.5],[.25,0],[.25,0]]),"AB")
        self.assertEqual(p.columnUncertainty(),[2,1])
        #for empty cols nothing is returned as the uncertainty
        self.assertEqual(self.empty.columnUncertainty().tolist(),[])
        p = Profile(array([[],[],[]]),"")
        self.assertEqual(p.columnUncertainty().tolist(),[])
        #doesn't work on 1D array
        self.assertRaises(ProfileError,self.oned.columnUncertainty)
 
    def test_rowDegeneracy(self):
        """rowDegneracy: should work as expected"""
        p1 = self.consensus
        p2 = self.not_same_value
        
        self.assertEqual(p1.rowDegeneracy(),[1,1,1,2,1])
        self.assertEqual(p1.rowDegeneracy(cutoff=.5),[1,1,1,2,1])
        self.assertEqual(p1.rowDegeneracy(cutoff=.75),[1,2,1,3,2])
        #when a row seems to add up to the cutoff value, it's not
        #always found because of floating point error. E.g. second row
        #in this example
        self.assertEqual(p1.rowDegeneracy(cutoff=1),[2,4,1,4,2])
        #when the cutoff can't be found, the number of columns in the 
        #profile is returned (for each row)
        self.assertEqual(p1.rowDegeneracy(cutoff=1.5),[4,4,4,4,4])

        self.assertEqual(p2.rowDegeneracy(cutoff=.95),[4,2,4,1])
        self.assertEqual(p2.rowDegeneracy(cutoff=1.4),[4,3,4,1])
        
        self.assertEqual(self.empty.rowDegeneracy(),[])

    def test_columnDegeneracy(self):
        """columnDegeneracy: shoudl work as expected"""
        p1 = self.consensus
        p1.Data = transpose(p1.Data)
        p2 = self.not_same_value
        p2.Data = transpose(p2.Data)
        p1d = p1.columnDegeneracy()
        self.assertEqual(p1d,[1,1,1,2,1])
        self.assertEqual(p1.columnDegeneracy(cutoff=.5),[1,1,1,2,1])
        self.assertEqual(p1.columnDegeneracy(cutoff=.75),[1,2,1,3,2])
        #when a row seems to add up to the cutoff value, it's not
        #always found because of floating point error. E.g. second row
        #in this example
        self.assertEqual(p1.columnDegeneracy(cutoff=1),[2,4,1,4,2])
        #when the cutoff can't be found, the number of rows in the 
        #profile is returned (for each column)
        self.assertEqual(p1.columnDegeneracy(cutoff=1.5),[4,4,4,4,4])

        self.assertEqual(p2.columnDegeneracy(cutoff=.95),[4,2,4,1])
        self.assertEqual(p2.columnDegeneracy(cutoff=1.4),[4,3,4,1])

        self.assertEqual(self.empty.columnDegeneracy(),[])

    def test_rowMax(self):
        """rowMax should return max value in each row"""
        p1 = self.consensus
        obs = p1.rowMax()
        self.assertEqual(obs, array([.8, .7, 1, .4, .5]))
    
    def test_toConsensus(self):
        """toConsensus: should work with all the different options
        """
        p = self.consensus
        self.assertEqual(p.toConsensus(fully_degenerate=False),"AGGAT")
        self.assertEqual(p.toConsensus(fully_degenerate=True),"WVGNY")
        self.assertEqual(p.toConsensus(cutoff=0.75),"ARGHY")
        self.assertEqual(p.toConsensus(cutoff=0.95),"WVGNY")
        self.assertEqual(p.toConsensus(cutoff=2),"WVGNY")

        p = self.not_same_value
        self.assertEqual(p.toConsensus(fully_degenerate=False),"CGTA")
        self.assertEqual(p.toConsensus(fully_degenerate=True),"NBYA")
        self.assertEqual(p.toConsensus(cutoff=0.75),"YSYA")
        self.assertEqual(p.toConsensus(cutoff=2),"NBYA")
        self.assertEqual(p.toConsensus(cutoff=5),"NBYA")

        #when you specify both fully_generate and a cutoff value
        #the cutoff takes priority and is used in the calculation
        self.assertEqual(p.toConsensus(cutoff=0.75,fully_degenerate=True),\
            "YSYA")

        #raises AttributeError when Alphabet doens't have Degenerates
        p = Profile(array([[.2,.8],[.7,.3]]),"AB")
        self.assertRaises(AttributeError,p.toConsensus,cutoff=.5)

    def test_toConsensus_include_all(self):
        """toConsensus: Should include all possibilities when include_all=True
        """
        p1 = Profile(array([[.2,0,.8,0],[0,.1,.2,.7],[0,0,0,1],\
            [.2,.3,.4,.1],[.5,.5,0,0]]),\
            Alphabet=DNA, CharOrder="TCAG")
        self.assertEqual(p1.toConsensus(cutoff=0.4, include_all=True),\
            "AGGAY")
        p2 = Profile(array([[.25,0.25,.25,0.25],[0.1,.1,.1,0],\
            [.4,0,.4,0],[0,.2,0.2,0.3]]),\
            Alphabet=DNA, CharOrder="TCAG")
        self.assertEqual(p2.toConsensus(cutoff=0.4,\
            include_all=True), "NHWV")

    def test_randomIndices(self):
        """randomIndices: 99% of new frequencies should be within 3*SD
        """
        r_num, c_num = 100,20
        num_elements = r_num*c_num
        r = random([r_num,c_num])
        p = Profile(r,"A"*c_num)
        p.normalizePositions()
        d = p.Data
        n = 1000
        
        #Test only works on normalized profile, b/c of 1-d below
        means = n*d
        three_stds = sqrt(d*(1-d)*n)*3
        result = [p.randomIndices() for x in range(n)]
        a = Alignment(transpose(result))

        def absoluteProfile(alignment,char_order):
            f = a.columnFreqs()
            res = zeros([len(f),len(char_order)])
            for row, freq in enumerate(f):
                for i in freq:
                    res[row, ord(i)] = freq[i]
            return res

        ap = absoluteProfile(a,p.CharOrder)
        failure = abs(ap-means) > three_stds
        assert sum(sum(failure))/num_elements <= 0.01

    def test_randomSequence(self):
        """randomSequence: 99% of new frequencies should be within 3*SD"""
        r_num, c_num = 100,20
        num_elements = r_num*c_num
        alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        r = random([r_num,c_num])
        p = Profile(r,alpha[:c_num])
        p.normalizePositions()
        d = p.Data
        n = 1000
        
        #Test only works on normalized profile, b/c of 1-d below
        means = n*d
        three_stds = sqrt(d*(1-d)*n)*3

        a = Alignment([p.randomSequence() for x in range(n)])

        def absoluteProfile(alignment,char_order):
            f = a.columnFreqs()
            res = zeros([len(f),len(char_order)])
            for row, freq in enumerate(f):
                for i in freq:
                    col = char_order.index(i)
                    res[row, col] = freq[i]
            return res

        ap = absoluteProfile(a,p.CharOrder)
        failure = abs(ap-means) > three_stds
        assert sum(sum(failure))/num_elements <= 0.01