def test_encoder_index_lists(): encoder = Encoder() S_idx = encoder.index_dict["S"] A_idx = encoder.index_dict["A"] index_lists = encoder.encode_index_lists(["SSS", "AAA", "SAS"]) eq_(index_lists, [[S_idx, S_idx, S_idx], [A_idx, A_idx, A_idx], [S_idx, A_idx, S_idx]])
def test_encoder_index_array(): encoder = Encoder() S_idx = encoder.index_dict["S"] A_idx = encoder.index_dict["A"] assert S_idx > 0 assert A_idx > 0 X = encoder.encode_index_array(["SSS", "AAA", "SASA"], max_peptide_length=4) expected = np.array([[S_idx, S_idx, S_idx, 0], [A_idx, A_idx, A_idx, 0], [S_idx, A_idx, S_idx, A_idx]]) assert (X == expected).all()
def test_model_with_fixed_length_context(): model = make_variable_length_model_with_fixed_length_context( n_upstream=1, n_downstream=1, max_peptide_length=3) encoder = Encoder() X_peptide = encoder.encode_index_array(["SYF", "QQ", "C", "GLL"], max_peptide_length=3) X_upstream = encoder.encode_index_array(["Q", "A", "L", "I"]) X_downstream = encoder.encode_index_array(["S"] * 4) Y = np.array([True, False, True, False]) input_dict = { "upstream": X_upstream, "downstream": X_downstream, "peptide": X_peptide } model.fit(input_dict, Y, epochs=20) Y_pred = model.predict(input_dict) assert (Y == (Y_pred[:, 0] > 0.5)).all(), (Y, Y_pred)
def test_encoder_FOFE_bidirectional(): # turn off the gap character '-' used for ends of shorter sequences encoder = Encoder(variable_length_sequences=False) x = encoder.encode_FOFE(["AAA", "SSS", "SASA"], bidirectional=True) eq_(x.shape, (3, 40))
def test_encoder_prepare_sequences_stop_token(): encoder = Encoder(add_stop_tokens=True) eq_(encoder.prepare_sequences(["SISI"], 5), ["SISI$-"])
def test_encoder_prepare_sequences_padding(): encoder = Encoder() eq_(encoder.prepare_sequences(["SISI"], 5), ["SISI-"])
def test_encoder_onehot_with_positional_features(): encoder = Encoder(variable_length_sequences=False, add_normalized_position=True, add_normalized_centrality=True) x = encoder.encode_onehot(["AAA", "SSS", "EEE"]) eq_(x.shape, (3, 3, 22))
def test_encoder_onehot(): encoder = Encoder(variable_length_sequences=False) x = encoder.encode_onehot(["AAA", "SSS", "EEE"]) eq_(x.shape, (3, 3, 20))