def test_composition(): "Test sequence compositions" # load data df = txt_to_df(path + '/tests/docs/test_seq.txt', 0) # test relative composition comp_rel = composition(df, 'relative') # test if frequencies add to 1 for i in range(df.shape[0]): assert round(comp_rel.iloc[0, :].sum()) == 1 # test decimal places comp_dec2 = composition(df, 'relative', round_fraction=2) x = str(comp_dec2['A'][0]) assert x[::-1].find('.') == 2 comp_dec2 = composition(df, 'relative', round_fraction=5) y = str(comp_dec2['A'][0]) assert y[::-1].find('.') == 5 # test absolute composition comp_abs = composition(df, 'absolute') # test if frequences == sequence length all_lengths = [6, 9, 7, 6] for i in range(df.shape[0]): assert comp_abs.iloc[i, :].sum() == all_lengths[i]
def test_aaindex3(): "Test AAIndex3" # load data df = txt_to_df(path + '/tests/docs/test_seq.txt', 0) # get aaindex2 aaind3 = aaindex3(df) # test shape assert aaind3.shape == (4, 43) # test some triangular indices TANS760101 = np.array([-4.72, -5.975, -4.18333, -4.04]) GODA950101 = np.array([np.nan, -.05, -.1333, -.14]) ZHAC000106 = np.array([.196, -.34875, .46666, .972]) np.testing.assert_equal(np.round(aaind3['TANS760101'], 3),\ np.round(TANS760101, 3)) # this column contains NaNs assert ('GODA950101' in aaind3) == False np.testing.assert_equal(np.round(aaind3['ZHAC000106'], 3),\ np.round(ZHAC000106, 3)) # test some square indices ZHAC000102 = np.array([-.408, -1.415, .475, 1.532]) ZHAC000103 = np.array([-.052, -.625, .59166, 1.096]) ZHAC000105 = np.array([-.242, -.72, .17, .952]) np.testing.assert_equal(np.round(aaind3['ZHAC000102'], 3),\ np.round(ZHAC000102, 3)) np.testing.assert_equal(np.round(aaind3['ZHAC000103'], 3),\ np.round(ZHAC000103, 3)) np.testing.assert_equal(np.round(aaind3['ZHAC000105'], 3),\ np.round(ZHAC000105, 3)) # test standardization (zscore) aaind3_z = aaindex3(df, 'zscore') # test mean = 0 for i in range(aaind3_z.shape[0]): assert abs(round(aaind3_z.iloc[:, 1].mean())) == 0 # test std --> 1 for i in range(aaind3_z.shape[0]): assert round(aaind3_z.iloc[:,i].std(), 1) ==\ round(aaind3_z.iloc[:,0].std(), 1) # test standardization (minmax) aaind3_mm = aaindex3(df, 'minmax') # test minimum and maximum for i in range(aaind3_mm.shape[0]): assert round(aaind3_mm.iloc[:, i].min()) == 0 assert round(aaind3_mm.iloc[:, i].max()) == 1
def test_aaindex2(): "Test AAIndex2" # load data df = txt_to_df(path + '/tests/docs/test_seq.txt', 0) # get aaindex2 aaind2 = aaindex2(df) # test shape assert aaind2.shape == (4, 94) # test some triangular indices ALTS910101 = np.array([-2, -.125, .333, -2]) VOGG950101 = np.array([4.28, 5.2, 6.05, 4.32]) CROG050101 = np.array([-1.8, .625, .5, -.4]) np.testing.assert_equal(np.round(aaind2['ALTS910101'], 3),\ np.round(ALTS910101, 3)) np.testing.assert_equal(np.round(aaind2['VOGG950101'], 3),\ np.round(VOGG950101, 3)) np.testing.assert_equal(np.round(aaind2['CROG050101'], 3),\ np.round(CROG050101, 3)) # test some square indices LINK010101 = np.array([.0266, .0955, .13, .1276]) KOSJ950108 = np.array([1.62, 18.0875, 16.05, 14.68]) DOSZ010101 = np.array([1.32, 15.7625, -1.1833, -5.12]) np.testing.assert_equal(np.round(aaind2['LINK010101'], 3),\ np.round(LINK010101, 3)) np.testing.assert_equal(np.round(aaind2['KOSJ950108'], 3),\ np.round(KOSJ950108, 3)) np.testing.assert_equal(np.round(aaind2['DOSZ010101'], 3),\ np.round(DOSZ010101, 3)) # test standardization (zscore) aaind2_z = aaindex2(df, 'zscore') # test mean = 0 for i in range(aaind2_z.shape[0]): assert abs(round(aaind2_z.iloc[:, 1].mean())) == 0 # test std --> 1 for i in range(aaind2_z.shape[0]): assert round(aaind2_z.iloc[:,i].std(), 1) ==\ round(aaind2_z.iloc[:,0].std(), 1) # test standardization (minmax) aaind2_mm = aaindex2(df, 'minmax') # test minimum and maximum for i in range(aaind2_mm.shape[0]): assert round(aaind2_mm.iloc[:, i].min()) == 0 assert round(aaind2_mm.iloc[:, i].max()) == 1
def test_lengths(): "Test sequence lengths" # load data df = txt_to_df(path + '/tests/docs/test_seq.txt', 0) # test integer lengths len_int = length(df, 'int') assert np.array_equal(len_int, np.array([6, 9, 7, 6])) # test one-hot-encoded lengths len_ohe = length(df, 'ohe') # columns: [6, 7, 9] assert np.array_equal( len_ohe, np.array([[1., 0., 0.], [0., 0., 1.], [0., 1., 0.], [1., 0., 0.]]))
def test_conversion(): "Test txt_to_df conversion" # load data df = txt_to_df(path+'/tests/docs/test_seq.txt', 0) # test labels and df shape assert df.columns[0] == 'Sequence' assert df.columns[1] == 'Label' assert df.shape == (4, 2) # test sequences assert df['Sequence'][0] == 'AGTYLK' assert df['Sequence'][1] == 'VCIMMMPFP' assert df['Sequence'][2] == 'LRSAHHN' assert df['Sequence'][3] == 'AQEEWD' # test labels for i in range(df.shape[0]): assert df['Label'][i] == 0
def test_aaindex1(): "Test AAIndex1" # load data df = txt_to_df(path + '/tests/docs/test_seq.txt', 0) # get aaindex1 aaind1 = aaindex1(df) # test shape assert aaind1.shape == (4, 553) # test some indices ANDN920101 = np.array([4.3, 4.40555, 4.48714, 4.46]) QIAN880126 = np.array([.01166, -.17111, .05857, -.04333]) KARS160122 = np.array([2.014, 5.48522, 2.789, 1.751]) np.testing.assert_equal(np.round(aaind1['ANDN920101'], 3),\ np.round(ANDN920101, 3)) np.testing.assert_equal(np.round(aaind1['QIAN880126'], 3),\ np.round(QIAN880126, 3)) np.testing.assert_equal(np.round(aaind1['KARS160122'], 3),\ np.round(KARS160122, 3)) # test standardization (zscore) aaind1_z = aaindex1(df, 'zscore') # test mean = 0 for i in range(aaind1_z.shape[0]): assert abs(round(aaind1_z.iloc[:, 1].mean())) == 0 # test std --> 1 for i in range(aaind1_z.shape[0]): assert round(aaind1_z.iloc[:,i].std(), 1) ==\ round(aaind1_z.iloc[:,0].std(), 1) # test standardization (minmax) aaind1_mm = aaindex1(df, 'minmax') # test minimum and maximum for i in range(aaind1_mm.shape[0]): assert round(aaind1_mm.iloc[:, i].min()) == 0 assert round(aaind1_mm.iloc[:, i].max()) == 1
def test_integer_encode(): "Test integer encoding" # load data df = txt_to_df(path + '/tests/docs/test_seq.txt', 0) enc = integer_encode(df) # test array shape and type assert enc.shape == (4, ) assert type(enc) == np.ndarray # test array contents assert np.array_equal(enc[0], np.array([1, 6, 17, 20, 10, 9])) assert np.array_equal(enc[1], np.array([18, 2, 8, 11, 11, 11, 13, 5, 13])) assert np.array_equal(enc[2], np.array([10, 15, 16, 1, 7, 7, 12])) assert np.array_equal(enc[3], np.array([1, 14, 4, 4, 19, 3])) # test padding enc = integer_encode(df, padding=True) assert enc.shape == (4, 9) assert [enc[0][i] == 0 for i in [6, 7, 8]] assert [enc[2][i] == 0 for i in [7, 8]] assert [enc[3][i] == 0 for i in [6, 7, 8]]