Ejemplo n.º 1
0
def test_evaluation_splitting():
    # This tests that the evaluation algorithm isn't sending in samples that
    # were used to train.

    class TestAlgorithm(pace.PredictionAlgorithm):
        def train(self, binders, nonbinders):
            self.binders = set(binders)
            self.nonbinders = set(nonbinders)

        def predict(self, samples):
            for s in samples:
                assert s not in self.binders
                assert s not in self.nonbinders
            return [1] * len(samples)

    pace.evaluate(TestAlgorithm, nbr_test=2)
Ejemplo n.º 2
0
def test_evaluation_filtering():
    # This tests that the evaluation algorithm is properly filtering samples
    # to the training and test stages.
    from itertools import chain
    from pace.evaluation import SampleFilter, matches_filter

    class TestAlgorithm(pace.PredictionAlgorithm):
        def __init__(self, training_filter, test_filter):
            self.training_filter = training_filter
            self.test_filter = test_filter

        def train(self, binders, nonbinders):
            for s in chain(binders, nonbinders):
                assert matches_filter(self.training_filter, s.allele,
                                      len(s.peptide))

        def predict(self, samples):
            for s in samples:
                assert matches_filter(self.test_filter, s.allele,
                                      len(s.peptide))
            return [1] * len(samples)

    pace.evaluate(lambda: TestAlgorithm(
        SampleFilter(alleles={'A0101'}, lengths={8, 9, 10, 11}),
        SampleFilter(alleles={'A0101'}, lengths={8, 9, 10, 11})),
                  selected_alleles=['A0101'],
                  nbr_test=1)

    pace.evaluate(lambda: TestAlgorithm(
        SampleFilter(alleles={'A0101'}, lengths={9, 10}),
        SampleFilter(alleles={'A0101'}, lengths={9, 10})),
                  selected_alleles=['A0101'],
                  selected_lengths=[9, 10],
                  nbr_test=1)

    pace.evaluate(lambda: TestAlgorithm(
        SampleFilter(alleles={'A0101'}, lengths={8, 11}),
        SampleFilter(alleles={'A0201'}, lengths={8, 11})),
                  selected_alleles=['A0101'],
                  selected_lengths=[8, 11],
                  test_alleles=['A0201'],
                  nbr_test=1)

    pace.evaluate(lambda: TestAlgorithm(
        SampleFilter(alleles={'A0101'}, lengths={8, 11}),
        SampleFilter(alleles={'A0101', 'A0201'}, lengths={9, 10})),
                  selected_alleles=['A0101'],
                  selected_lengths=[8, 11],
                  test_alleles=['A0101', 'A0201'],
                  test_lengths=[9, 10],
                  nbr_test=1)
Ejemplo n.º 3
0
def test_evaluation_nbr():
    # This tests that the evaluation algorithm honors the requested nonbinder
    # ratio, at least for training.

    class TestAlgorithm(pace.PredictionAlgorithm):
        def __init__(self, nbr_train):
            self.nbr_train = nbr_train

        def train(self, binders, nonbinders):
            assert len(nonbinders) == int(len(binders) * self.nbr_train)

        def predict(self, samples):
            return [1] * len(samples)

    pace.evaluate(lambda: TestAlgorithm(1))
    pace.evaluate(lambda: TestAlgorithm(2), nbr_train=2, nbr_test=1)
    pace.evaluate(lambda: TestAlgorithm(3), nbr_train=3, nbr_test=1)
Ejemplo n.º 4
0
import pace
import random
import pprint


class PureGuessingAlgorithm(pace.PredictionAlgorithm):
    def train(self, binders, nonbinders):
        pass

    def predict(self, samples):
        return [random.uniform(0, 1) for _ in samples]


scores = pace.evaluate(PureGuessingAlgorithm)
pprint.pprint(scores)
Ejemplo n.º 5
0
import pace
import random
import pprint

# This is the example from the README.


class FairlyPoorAlgorithm(pace.PredictionAlgorithm):
    def train(self, binders, nonbinders):
        pass

    def predict(self, samples):
        return [1 if s.allele[0] == s.peptide[0] else 0 for s in samples]


# Evaluate our algorithm using PACE.
scores = pace.evaluate(FairlyPoorAlgorithm)
pprint.pprint(scores)
Ejemplo n.º 6
0
import numpy


class RidgeAlgorithm(pace.PredictionAlgorithm):
    def train(self, binders, nonbinders):
        x = [list(s.peptide)
             for s in binders] + [list(s.peptide) for s in nonbinders]
        y = [1] * len(binders) + [0] * len(nonbinders)

        encoder = pace.sklearn.create_one_hot_encoder(9)
        encoder.fit(x)
        encoded_x = encoder.transform(x).toarray()

        self.clf = sklearn.linear_model.RidgeClassifier().fit(encoded_x, y)

    def predict(self, samples):
        x = [list(s.peptide) for s in samples]

        encoder = pace.sklearn.create_one_hot_encoder(9)
        encoder.fit(x)
        encoded_x = encoder.transform(x).toarray()

        return self.clf.predict(encoded_x)


numpy.random.seed(31415)
scores = pace.evaluate(RidgeAlgorithm,
                       selected_lengths=[9],
                       selected_alleles=['B3501'])
pprint.pprint(scores)
Ejemplo n.º 7
0
                  y=y,
                  verbose=0,
                  batch_size=batch_size,
                  epochs=nEpochs,
                  shuffle=True,
                  validation_split=0.1,
                  class_weight=None,
                  sample_weight=None,
                  initial_epoch=0,
                  callbacks=callbacks)

        self.model = model

    def predict(self, samples):
        x = [list(s.peptide) for s in samples]
        encoded_x = pace.featurization.encode(x, self.encoding_name)
        return self.model.predict(encoded_x).squeeze()


### Evaluate algorithm using PACE.
alleles = ['A0203']
for allele in alleles:
    eval_results = pace.evaluate(lambda: DonJulioBlanco('onehot'),
                                 selected_lengths=[9],
                                 selected_alleles=[allele],
                                 dataset=pace.data.load_dataset(16),
                                 nbr_train=1,
                                 nbr_test=10)
    print(allele)
    pprint.pprint(eval_results)