def test_evaluation_splitting(): # This tests that the evaluation algorithm isn't sending in samples that # were used to train. class TestAlgorithm(pace.PredictionAlgorithm): def train(self, binders, nonbinders): self.binders = set(binders) self.nonbinders = set(nonbinders) def predict(self, samples): for s in samples: assert s not in self.binders assert s not in self.nonbinders return [1] * len(samples) pace.evaluate(TestAlgorithm, nbr_test=2)
def test_evaluation_filtering(): # This tests that the evaluation algorithm is properly filtering samples # to the training and test stages. from itertools import chain from pace.evaluation import SampleFilter, matches_filter class TestAlgorithm(pace.PredictionAlgorithm): def __init__(self, training_filter, test_filter): self.training_filter = training_filter self.test_filter = test_filter def train(self, binders, nonbinders): for s in chain(binders, nonbinders): assert matches_filter(self.training_filter, s.allele, len(s.peptide)) def predict(self, samples): for s in samples: assert matches_filter(self.test_filter, s.allele, len(s.peptide)) return [1] * len(samples) pace.evaluate(lambda: TestAlgorithm( SampleFilter(alleles={'A0101'}, lengths={8, 9, 10, 11}), SampleFilter(alleles={'A0101'}, lengths={8, 9, 10, 11})), selected_alleles=['A0101'], nbr_test=1) pace.evaluate(lambda: TestAlgorithm( SampleFilter(alleles={'A0101'}, lengths={9, 10}), SampleFilter(alleles={'A0101'}, lengths={9, 10})), selected_alleles=['A0101'], selected_lengths=[9, 10], nbr_test=1) pace.evaluate(lambda: TestAlgorithm( SampleFilter(alleles={'A0101'}, lengths={8, 11}), SampleFilter(alleles={'A0201'}, lengths={8, 11})), selected_alleles=['A0101'], selected_lengths=[8, 11], test_alleles=['A0201'], nbr_test=1) pace.evaluate(lambda: TestAlgorithm( SampleFilter(alleles={'A0101'}, lengths={8, 11}), SampleFilter(alleles={'A0101', 'A0201'}, lengths={9, 10})), selected_alleles=['A0101'], selected_lengths=[8, 11], test_alleles=['A0101', 'A0201'], test_lengths=[9, 10], nbr_test=1)
def test_evaluation_nbr(): # This tests that the evaluation algorithm honors the requested nonbinder # ratio, at least for training. class TestAlgorithm(pace.PredictionAlgorithm): def __init__(self, nbr_train): self.nbr_train = nbr_train def train(self, binders, nonbinders): assert len(nonbinders) == int(len(binders) * self.nbr_train) def predict(self, samples): return [1] * len(samples) pace.evaluate(lambda: TestAlgorithm(1)) pace.evaluate(lambda: TestAlgorithm(2), nbr_train=2, nbr_test=1) pace.evaluate(lambda: TestAlgorithm(3), nbr_train=3, nbr_test=1)
import pace import random import pprint class PureGuessingAlgorithm(pace.PredictionAlgorithm): def train(self, binders, nonbinders): pass def predict(self, samples): return [random.uniform(0, 1) for _ in samples] scores = pace.evaluate(PureGuessingAlgorithm) pprint.pprint(scores)
import pace import random import pprint # This is the example from the README. class FairlyPoorAlgorithm(pace.PredictionAlgorithm): def train(self, binders, nonbinders): pass def predict(self, samples): return [1 if s.allele[0] == s.peptide[0] else 0 for s in samples] # Evaluate our algorithm using PACE. scores = pace.evaluate(FairlyPoorAlgorithm) pprint.pprint(scores)
import numpy class RidgeAlgorithm(pace.PredictionAlgorithm): def train(self, binders, nonbinders): x = [list(s.peptide) for s in binders] + [list(s.peptide) for s in nonbinders] y = [1] * len(binders) + [0] * len(nonbinders) encoder = pace.sklearn.create_one_hot_encoder(9) encoder.fit(x) encoded_x = encoder.transform(x).toarray() self.clf = sklearn.linear_model.RidgeClassifier().fit(encoded_x, y) def predict(self, samples): x = [list(s.peptide) for s in samples] encoder = pace.sklearn.create_one_hot_encoder(9) encoder.fit(x) encoded_x = encoder.transform(x).toarray() return self.clf.predict(encoded_x) numpy.random.seed(31415) scores = pace.evaluate(RidgeAlgorithm, selected_lengths=[9], selected_alleles=['B3501']) pprint.pprint(scores)
y=y, verbose=0, batch_size=batch_size, epochs=nEpochs, shuffle=True, validation_split=0.1, class_weight=None, sample_weight=None, initial_epoch=0, callbacks=callbacks) self.model = model def predict(self, samples): x = [list(s.peptide) for s in samples] encoded_x = pace.featurization.encode(x, self.encoding_name) return self.model.predict(encoded_x).squeeze() ### Evaluate algorithm using PACE. alleles = ['A0203'] for allele in alleles: eval_results = pace.evaluate(lambda: DonJulioBlanco('onehot'), selected_lengths=[9], selected_alleles=[allele], dataset=pace.data.load_dataset(16), nbr_train=1, nbr_test=10) print(allele) pprint.pprint(eval_results)