def test_embedding_conv_1_layer(): model = Predictor(inputs=SequenceInput(length=3, variable_length=False, conv_filter_sizes={2: 3}), outputs=Output(1, activation="sigmoid")) X = ["SAY", "FFQ"] Y = np.array([True, False]) model.fit(X, Y)
def test_predictor_on_more_data(): predictor = Predictor( inputs=[SequenceInput(length=20, name="x", variable_length=True)], outputs=[Output(dim=1, activation="sigmoid", name="y")], dense_layer_sizes=[30], dense_activation="relu") train_df = synthetic_peptides_by_subsequence(1000) test_df = synthetic_peptides_by_subsequence(1000) predictor.fit({"x": train_df.index.values}, train_df.binder.values, epochs=20) y_pred = predictor.predict({"x": test_df.index.values})['y'] y_pred = pandas.Series(y_pred, index=test_df.index) binder_mean_pred = y_pred[test_df.binder].mean() nonbinder_mean_pred = y_pred[~test_df.binder].mean() print(binder_mean_pred, nonbinder_mean_pred) assert binder_mean_pred > nonbinder_mean_pred * 2, (binder_mean_pred, nonbinder_mean_pred)
def test_model_with_fixed_length_context(): model = Predictor(inputs={ "upstream": SequenceInput(length=1, variable_length=False), "downstream": SequenceInput(length=1, variable_length=False), "peptide": SequenceInput(length=3, variable_length=True) }, outputs=Output(1, activation="sigmoid")) Y = np.array([True, False, True, False]) input_dict = { "upstream": ["Q", "A", "L", "I"], "downstream": ["S"] * 4, "peptide": ["SYF", "QQ", "C", "GLL"] } model.fit(input_dict, Y, epochs=20) Y_pred = model.predict(input_dict) assert (Y == (Y_pred > 0.5)).all(), (Y, Y_pred)
def test_discrete_input_with_str_tokens(): pred = Predictor(inputs=DiscreteInput(choices=["x", "y", "z"], embedding_dim=2), outputs=Output(1, "sigmoid")) pred.fit(["x", "x", "y", "z"], [0, 0, 0.5, 1.0], epochs=20) assert pred.predict(["x"]) < pred.predict(["z"])