Beispiel #1
0
def setup():
    global ALLELE_SPECIFIC_PREDICTOR, PAN_ALLELE_PREDICTOR
    startup()
    ALLELE_SPECIFIC_PREDICTOR = Class1AffinityPredictor.load(
        get_path("models_class1", "models"))

    PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
        get_path("models_class1_pan", "models.with_mass_spec"))
Beispiel #2
0
def setup():
    global PREDICTORS
    startup()
    PREDICTORS = {
        'allele-specific':
        Class1AffinityPredictor.load(get_path("models_class1", "models")),
        'pan-allele':
        Class1AffinityPredictor.load(
            get_path("models_class1_pan", "models.combined"))
    }
def setup():
    global PAN_ALLELE_PREDICTOR
    startup()
    PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
        get_path("models_class1_pan", "models.combined"),
        optimization_level=0,
    )
Beispiel #4
0
def run_and_check(n_jobs=0, delete=True, additional_args=[]):
    source_models_dir = get_path("models_class1_pan", "models.combined")
    dest_models_dir = tempfile.mkdtemp(prefix="mhcflurry-test-models")

    # Save a new predictor that has no percent rank calibration data.
    original_predictor = Class1AffinityPredictor.load(source_models_dir)
    print("Loaded predictor", source_models_dir)
    new_predictor = Class1AffinityPredictor(
        class1_pan_allele_models=original_predictor.class1_pan_allele_models,
        allele_to_sequence=original_predictor.allele_to_sequence,
    )
    new_predictor.save(dest_models_dir)
    print("Saved predictor to", dest_models_dir)

    new_predictor = Class1AffinityPredictor.load(dest_models_dir)
    assert_equal(len(new_predictor.allele_to_percent_rank_transform), 0)

    args = [
        "mhcflurry-calibrate-percentile-ranks",
        "--models-dir",
        dest_models_dir,
        "--match-amino-acid-distribution-data",
        get_path("data_curated", "curated_training_data.affinity.csv.bz2"),
        "--motif-summary",
        "--num-peptides-per-length",
        "1000",
        "--allele",
        "HLA-A*02:01",
        "HLA-B*07:02",
        "--verbosity",
        "1",
        "--num-jobs",
        str(n_jobs),
    ] + additional_args
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    new_predictor = Class1AffinityPredictor.load(dest_models_dir)
    assert_equal(len(new_predictor.allele_to_percent_rank_transform), 2)

    if delete:
        print("Deleting: %s" % dest_models_dir)
        shutil.rmtree(dest_models_dir)
    else:
        print("Not deleting: %s" % dest_models_dir)
Beispiel #5
0
def initialise_prediction():
    """
    Enable mhcflurry prediction
    :return:
    """
    os.environ[
        'TF_CPP_MIN_LOG_LEVEL'] = '2'  # Prevents warnings. No, I'm not building from source
    from mhcflurry import Class1AffinityPredictor
    global predictor
    predictor = Class1AffinityPredictor.load()
Beispiel #6
0
    def __init__(self,
                 alleles,
                 default_peptide_lengths=[9],
                 predictor=None,
                 models_path=None):
        """
        Parameters
        -----------
        alleles : list of str

        default_peptide_lengths : list of int

        predictor : mhcflurry.Class1AffinityPredictor (optional)
            MHCflurry predictor to use

        models_path : string
            Models dir to use if predictor argument is None

        """
        # moving import here since the mhcflurry package imports
        # Keras and its backend (either Theano or TF) which end up
        # slowing down responsive for any CLI application using MHCtools
        from mhcflurry import Class1AffinityPredictor
        BasePredictor.__init__(self,
                               alleles=alleles,
                               default_peptide_lengths=default_peptide_lengths,
                               min_peptide_length=8,
                               max_peptide_length=15)
        if predictor:
            self.predictor = predictor
        elif models_path:
            logging.info("Loading MHCflurry models from %s" % models_path)
            self.predictor = Class1AffinityPredictor.load(models_path)
        else:
            self.predictor = Class1AffinityPredictor.load()

        # relying on BasePredictor and MHCflurry to both normalize
        # allele names the same way using mhcnames
        for allele in self.alleles:
            if allele not in self.predictor.supported_alleles:
                raise UnsupportedAllele(allele)
Beispiel #7
0
        def predict(self, peptides, alleles=None, binary=False, **kwargs):

            # test whether one peptide or a list
            if not isinstance(peptides, list):
                peptides = [peptides]

            # if no alleles are specified do predictions for all supported alleles
            if alleles is None:
                alleles = self.supportedAlleles
            else:
                # filter for supported alleles
                alleles = filter(lambda a: a in self.supportedAlleles, alleles)

            alleles = self.convert_alleles(alleles)

            # test mhcflurry models are available => download if not
            p = subprocess.Popen(
                ['mhcflurry-downloads', 'path', 'models_class1'],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            if p is not 0:
                subprocess.call(
                    ['mhcflurry-downloads', 'fetch', 'models_class1'])

            # load model
            predictor = Class1AffinityPredictor.load()

            # predict and assign binding affinities
            result = {}
            for a in alleles:
                allele_repr = self.revert_allele_repr(a)
                result[allele_repr] = {}
                for p in peptides:
                    seq = p.__str__()
                    binding_affinity = predictor.predict(allele=a,
                                                         peptides=[seq])[0]
                    if binary:
                        if binding_affinity <= 500:
                            result[allele_repr][p] = 1.0
                        else:
                            result[allele_repr][p] = 0.0
                    else:
                        result[allele_repr][p] = binding_affinity

            # create EpitopePredictionResult object. This is a multi-indexed DataFrame
            # with Peptide and Method as multi-index and alleles as columns
            df_result = EpitopePredictionResult.from_dict(result)
            df_result.index = pandas.MultiIndex.from_tuples(
                [tuple((i, self.name)) for i in df_result.index],
                names=['Seq', 'Method'])
            return df_result
Beispiel #8
0
    def predict(self, sequence=None, peptides=None, length=11, overlap=1,
                      allele='HLA-A0101', name='', **kwargs):
        """Uses mhcflurry python classes for prediction"""

        self.sequence = sequence
        from mhcflurry import Class1AffinityPredictor
        predictor = Class1AffinityPredictor.load()
        if peptides == None:
            peptides, s = peptutils.create_fragments(seq=sequence,
                                                    length=length, overlap=overlap)
        df = predictor.predict_to_dataframe(peptides=peptides, allele=allele)
        #print (df[:5])
        df = self.prepareData(df, name, allele)
        self.data = df
        return df
Beispiel #9
0
def run_and_check(n_jobs=0):
    models_dir = tempfile.mkdtemp(prefix="mhcflurry-test-models")
    hyperparameters_filename = os.path.join(models_dir, "hyperparameters.yaml")
    with open(hyperparameters_filename, "w") as fd:
        json.dump(HYPERPARAMETERS, fd)

    args = [
        "mhcflurry-class1-train-allele-specific-models",
        "--data",
        get_path("data_curated", "curated_training_data.affinity.csv.bz2"),
        "--hyperparameters",
        hyperparameters_filename,
        "--allele",
        "HLA-A*02:01",
        "HLA-A*03:01",
        "--out-models-dir",
        models_dir,
        "--num-jobs",
        str(n_jobs),
    ]
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    # Calibrate percentile ranks
    args = [
        "mhcflurry-calibrate-percentile-ranks",
        "--models-dir",
        models_dir,
        "--num-peptides-per-length",
        "10000",
        "--num-jobs",
        str(n_jobs),
    ]
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    result = Class1AffinityPredictor.load(models_dir)
    predictions = result.predict(peptides=["SLYNTVATL"],
                                 alleles=["HLA-A*02:01"])
    assert_equal(predictions.shape, (1, ))
    assert_array_less(predictions, 1000)
    df = result.predict_to_dataframe(peptides=["SLYNTVATL"],
                                     alleles=["HLA-A*02:01"])
    print(df)
    assert "prediction_percentile" in df.columns

    print("Deleting: %s" % models_dir)
    shutil.rmtree(models_dir)
Beispiel #10
0
def test_mhcflurry():
    predictor = MHCflurry(alleles=[DEFAULT_ALLELE])
    binding_predictions = predictor.predict_subsequences(protein_sequence_dict,
                                                         peptide_lengths=[9])
    eq_(4, len(binding_predictions),
        "Expected 4 binding predictions from %s" % (binding_predictions, ))

    prediction_scores = {(x.peptide, x.allele): x.affinity
                         for x in binding_predictions}

    predictor = Class1AffinityPredictor.load()
    # test one prediction at a time to make sure there's no peptide/allele mixup
    for (peptide, allele), affinity in prediction_scores.items():
        prediction = predictor.predict([peptide], allele=allele)
        assert len(prediction) == 1
        # we've seen results differ a bit so doing an approximate check, not an error condition
        testing.assert_almost_equal(prediction[0], affinity, decimal=0)
Beispiel #11
0
def setup():
    global AFFINITY_PREDICTOR
    global CLEAVAGE_PREDICTOR
    global CLEAVAGE_PREDICTOR_NO_FLANKING
    global PRESENTATION_PREDICTOR
    startup()
    AFFINITY_PREDICTOR = Class1AffinityPredictor.load(get_path(
        "models_class1_pan", "models.combined"),
                                                      optimization_level=0,
                                                      max_models=1)
    CLEAVAGE_PREDICTOR = Class1ProcessingPredictor.load(get_path(
        "models_class1_processing", "models.selected.with_flanks"),
                                                        max_models=1)
    CLEAVAGE_PREDICTOR_NO_FLANKING = Class1ProcessingPredictor.load(
        get_path("models_class1_processing", "models.selected.no_flank"),
        max_models=1)
    PRESENTATION_PREDICTOR = Class1PresentationPredictor.load()
Beispiel #12
0
def do_predictions_mhcflurry(work_item_dicts, constant_data=None):
    """
    Each dict of work items should have keys: work_item_num, peptides, alleles

    """

    # This may run on the cluster in a way that misses all top level imports,
    # so we have to re-import everything here.
    import time
    from mhcflurry.encodable_sequences import EncodableSequences
    from mhcflurry import Class1AffinityPredictor

    if constant_data is None:
        constant_data = GLOBAL_DATA

    args = constant_data['args']

    assert args.predictor == "mhcflurry"
    assert constant_data['cols'] == ["affinity"]

    predictor = Class1AffinityPredictor.load(args.mhcflurry_models_dir)

    results = []
    for (i, d) in enumerate(work_item_dicts):
        work_item_num = d['work_item_num']
        peptides = d['peptides']
        alleles = d['alleles']

        print("Processing work item", i + 1, "of", len(work_item_dicts))
        result = {}
        results.append((work_item_num, result))
        start = time.time()
        peptides = EncodableSequences.create(peptides)
        for (i, allele) in enumerate(alleles):
            print("Processing allele %d / %d: %0.2f sec elapsed" %
                  (i + 1, len(alleles), time.time() - start))
            for col in ["affinity"]:
                result["%s %s" % (allele, col)] = predictor.predict(
                    peptides=peptides,
                    allele=allele,
                    throw=False,
                    model_kwargs={
                        'batch_size': args.mhcflurry_batch_size,
                    }).astype(constant_data['args'].result_dtype)
        print("Done predicting in", time.time() - start, "sec")
    return results
Beispiel #13
0
 def predict(self, input_file, allele, epitope_length, iedb_executable_path,
             iedb_retries):
     predictor = Class1AffinityPredictor.load()
     results = pd.DataFrame()
     for line in input_file:
         match = re.search('^>([0-9]+)$', line)
         if match:
             seq_num = match.group(1)
         else:
             epitopes = self.determine_neoepitopes(line.rstrip(),
                                                   epitope_length)
             df = predictor.predict_to_dataframe(allele=allele,
                                                 peptides=epitopes)
             df['seq_num'] = seq_num
             df['start'] = df.index + 1
             df.rename(columns={
                 'prediction': 'ic50',
                 'prediction_percentile': 'percentile'
             },
                       inplace=True)
             results = results.append(df)
     return (results, 'pandas')
seqs = [
    l.split()[1] for l in mztab_read if l.startswith("PSM")
    if l.split()[1] not in seqs_new_smaller_qval
]
seqs_new_greater_qval = list(set(seqs))
seqs_new_greater_qval = [
    s for s in seqs_new_greater_qval if 7 < len(s) < 13 and not 'U' in s
    and not 'X' in s and not 'Z' in s and not 'J' in s and not 'B' in s
]

#call mhcflurry
#subprocess.call("mhcflurry-predict --peptides {p} --alleles {a} --out {o}".format(p=" ".join(seqs_new), a=" ".join(alleles), o=sys.argv[-1]))
seqs_filtered = []
for allele in alleles:
    print(allele)
    predictor = Class1AffinityPredictor.load()
    df_pred = predictor.predict_to_dataframe(allele=allele,
                                             peptides=seqs_new_greater_qval)
    seqs_filtered += df_pred[df_pred['prediction'] <= float(
        sys.argv[-5])]['peptide'].values.tolist()

#merge sequence lists and append decoys
seqs_new_all = list(set(seqs_new_smaller_qval + seqs_filtered))
seqs_new_all = seqs_new_all + [s[::-1] for s in seqs_new_all]

#write idXML for filtering
op = open(sys.argv[-1], 'w')
op.write(
    '<PeptideIdentification score_type="q-value" higher_score_better="false">'
    + '\n')
for pep in seqs_new_all:
def test_a1_known_epitopes_in_newly_trained_model():
    allele = "HLA-A*01:01"
    df = pandas.read_csv(
        get_path(
            "data_curated", "curated_training_data.affinity.csv.bz2"))
    df = df.loc[
        (df.allele == allele) &
        (df.peptide.str.len() >= 8) &
        (df.peptide.str.len() <= 15)
    ]

    hyperparameters = {
        "max_epochs": 100,
        "patience": 10,
        "early_stopping": True,
        "validation_split": 0.2,

        "random_negative_rate": 0.0,
        "random_negative_constant": 25,

        "peptide_amino_acid_encoding": "BLOSUM62",
        "use_embedding": False,
        "kmer_size": 15,
        "batch_normalization": False,
        "locally_connected_layers": [
            {
                "filters": 8,
                "activation": "tanh",
                "kernel_size": 3
            }
        ],
        "activation": "relu",
        "output_activation": "sigmoid",
        "layer_sizes": [
            32
        ],
        "random_negative_affinity_min": 20000.0,
        "random_negative_affinity_max": 50000.0,
        "dense_layer_l1_regularization": 0.001,
        "dropout_probability": 0.0
    }

    predictor = Class1AffinityPredictor()
    predictor.fit_allele_specific_predictors(
        n_models=2,
        architecture_hyperparameters_list=[hyperparameters],
        allele=allele,
        peptides=df.peptide.values,
        affinities=df.measurement_value.values,
        verbose=0,
    )

    predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor)

    models_dir = tempfile.mkdtemp("_models")
    print(models_dir)
    predictor.save(models_dir)
    predictor2 = Class1AffinityPredictor.load(models_dir)
    predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor2)
    shutil.rmtree(models_dir)

    predictor3 = Class1AffinityPredictor(
        allele_to_allele_specific_models={
            allele: [predictor.allele_to_allele_specific_models[allele][0]]
        })
    predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor3)
    models_dir = tempfile.mkdtemp("_models")
    print(models_dir)
    predictor3.save(models_dir)
    predictor4 = Class1AffinityPredictor.load(models_dir)
    predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictor=predictor4)
    shutil.rmtree(models_dir)
def setup():
    global DOWNLOADED_PREDICTOR
    startup()
    DOWNLOADED_PREDICTOR = Class1AffinityPredictor.load()
    logging.basicConfig(level=logging.DEBUG)
import traceback
import sys

import numpy
import pandas
numpy.random.seed(0)

from mhcflurry import Class1AffinityPredictor

from nose.tools import eq_, assert_raises
from numpy import testing

from mhcflurry.downloads import get_path
from mhcflurry.testing_utils import cleanup, startup

DOWNLOADED_PREDICTOR = Class1AffinityPredictor.load()


def setup():
    global DOWNLOADED_PREDICTOR
    startup()
    DOWNLOADED_PREDICTOR = Class1AffinityPredictor.load()
    logging.basicConfig(level=logging.DEBUG)


def teardown():
    global DOWNLOADED_PREDICTOR
    DOWNLOADED_PREDICTOR = None
    cleanup()

Beispiel #18
0
    def check_allele_valid(self, allele):
        valid_alleles = self.valid_allele_names()
        if allele not in valid_alleles:
            sys.exit(
                "Allele %s not valid for method %s. Run `pvacseq valid_alleles %s` for a list of valid allele names."
                % (allele, self.__class__.__name__, self.__class__.__name__))


class MHCI(PredictionClass, metaclass=ABCMeta):
    @property
    def needs_epitope_length(self):
        return True


mhcflurry_predictor = Class1AffinityPredictor.load()


class MHCflurry(MHCI):
    def valid_allele_names(self):
        return mhcflurry_predictor.supported_alleles

    def check_length_valid_for_allele(self, length, allele):
        return True

    def valid_lengths_for_allele(self, allele):
        return [8, 9, 10, 11, 12, 13, 14]

    def determine_neoepitopes(self, sequence, length):
        epitopes = []
        for i in range(0, len(sequence) - length + 1):
Beispiel #19
0
def run_and_check(n_jobs=0, delete=True, additional_args=[]):
    models_dir = tempfile.mkdtemp(prefix="mhcflurry-test-models")
    hyperparameters_filename = os.path.join(models_dir, "hyperparameters.yaml")
    with open(hyperparameters_filename, "w") as fd:
        json.dump(HYPERPARAMETERS_LIST, fd)

    data_df = pandas.read_csv(
        get_path("data_curated", "curated_training_data.no_mass_spec.csv.bz2"))
    selected_data_df = data_df.loc[data_df.allele.str.startswith("HLA-A")]
    selected_data_df.to_csv(os.path.join(models_dir, "_train_data.csv"),
                            index=False)

    args = [
        "mhcflurry-class1-train-pan-allele-models",
        "--data",
        os.path.join(models_dir, "_train_data.csv"),
        "--allele-sequences",
        get_path("allele_sequences", "allele_sequences.csv"),
        "--hyperparameters",
        hyperparameters_filename,
        "--out-models-dir",
        models_dir,
        "--num-jobs",
        str(n_jobs),
        "--num-folds",
        "2",
        "--verbosity",
        "1",
    ] + additional_args
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    # Run model selection
    models_dir_selected = tempfile.mkdtemp(
        prefix="mhcflurry-test-models-selected")
    args = [
        "mhcflurry-class1-select-pan-allele-models",
        "--data",
        os.path.join(models_dir, "train_data.csv.bz2"),
        "--models-dir",
        models_dir,
        "--out-models-dir",
        models_dir_selected,
        "--max-models",
        "1",
        "--num-jobs",
        str(n_jobs),
    ] + additional_args
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    result = Class1AffinityPredictor.load(models_dir_selected,
                                          optimization_level=0)
    assert_equal(len(result.neural_networks), 2)
    predictions = result.predict(peptides=["SLYNTVATL"],
                                 alleles=["HLA-A*02:01"])
    assert_equal(predictions.shape, (1, ))
    assert_array_less(predictions, 1000)

    if delete:
        print("Deleting: %s" % models_dir)
        shutil.rmtree(models_dir)
        shutil.rmtree(models_dir_selected)
Beispiel #20
0
 def valid_allele_names(self):
     predictor = Class1AffinityPredictor.load()
     return predictor.supported_alleles
def setup():
    global DOWNLOADED_PREDICTOR
    startup()
    DOWNLOADED_PREDICTOR = Class1AffinityPredictor.load()
Beispiel #22
0
        def predict(self, peptides, alleles=None, binary=False, **kwargs):

            # test whether one peptide or a list
            if not isinstance(peptides, list):
                peptides = [peptides]

            # if no alleles are specified do predictions for all supported alleles
            if alleles is None:
                alleles = self.supportedAlleles
            else:
                # filter for supported alleles
                alleles = filter(lambda a: a in self.supportedAlleles, alleles)
            alleles = self.convert_alleles(alleles)

            # test mhcflurry models are available => download if not
            p = subprocess.Popen(
                ['mhcflurry-downloads', 'path', 'models_class1'],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            if p is not 0:
                subprocess.call(
                    ['mhcflurry-downloads', 'fetch', 'models_class1'])

            # load model
            predictor = Class1AffinityPredictor.load()

            # prepare results dictionary
            result = defaultdict(defaultdict)

            # group peptides by length
            peptides.sort(key=len)
            for length, peps in itertools.groupby(peptides, key=len):
                if length not in self.supportedLength:
                    logging.warn(
                        "Peptide length must be at least %i or at most %i for %s but is %i"
                        % (min(self.supportedLength), max(
                            self.supportedLength), self.name, length))
                    continue
                peps = list(peps)

                # predict and assign binding affinities
                for a in alleles:
                    allele_repr = self.revert_allele_repr(a)
                    for p in peps:
                        binding_affinity = predictor.predict(allele=a,
                                                             peptides=[str(p)
                                                                       ])[0]
                        if binary:
                            if binding_affinity <= 500:
                                result[allele_repr][p] = 1.0
                            else:
                                result[allele_repr][p] = 0.0
                        else:
                            result[allele_repr][p] = binding_affinity

            if not result:
                raise ValueError(
                    "No predictions could be made with " + self.name +
                    " for given input. Check your epitope length and HLA allele combination."
                )

            # create EpitopePredictionResult object. This is a multi-indexed DataFrame
            # with Peptide and Method as multi-index and alleles as columns
            df_result = EpitopePredictionResult.from_dict(result)
            df_result.index = pandas.MultiIndex.from_tuples(
                [tuple((i, self.name)) for i in df_result.index],
                names=['Seq', 'Method'])
            return df_result
Beispiel #23
0
import numpy as np
import pandas as pd
from pathlib import Path
from mhcflurry import Class1AffinityPredictor
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

# Load data and models - change path as necessary
DATA = Path("./data")
ms_file = DATA / "abelin_peptides.mhcflurry_no_mass_spec.csv"
MODELS = Path("/Users/haemin/Library/Application Support/mhcflurry/4/1.4.0")

no_ms_model = MODELS / "models_class1_selected_no_mass_spec/models"
no_ms_predictor = Class1AffinityPredictor.load(no_ms_model)

# Read data file and initial cleanup
ms = pd.read_csv(ms_file)
ms = ms.rename(columns={"mhcflurry": "mhcflurry2"})
ms["mhcflurry4"] = np.full_like(ms.mhcflurry2, -1)
ms = ms.loc[ms.allele.isin(no_ms_predictor.supported_alleles), :]
ms["peptide_len"] = ms.peptide.str.len()
ms.loc[ms["peptide_len"] > 12, "peptide_len"] = 13
'''
    Generate Figure A
'''
# Compute PPV values
models = ["netmhc", "netmhcpan", "mhcflurry2", "mhcflurry4"]
alleles = ms.allele[ms.allele.isin(no_ms_predictor.supported_alleles)].unique()

# Compute predictions for mhcflurry4
Beispiel #24
0
def run_and_check_with_model_selection(n_jobs=1):
    models_dir1 = tempfile.mkdtemp(prefix="mhcflurry-test-models")
    hyperparameters_filename = os.path.join(models_dir1,
                                            "hyperparameters.yaml")

    # Include one architecture that has max_epochs = 0. We check that it never
    # gets selected in model selection.
    hyperparameters = [
        deepcopy(HYPERPARAMETERS[0]),
        deepcopy(HYPERPARAMETERS[0]),
    ]
    hyperparameters[-1]["max_epochs"] = 0
    with open(hyperparameters_filename, "w") as fd:
        json.dump(hyperparameters, fd)

    args = [
        "mhcflurry-class1-train-allele-specific-models",
        "--data",
        get_path("data_curated", "curated_training_data.affinity.csv.bz2"),
        "--hyperparameters",
        hyperparameters_filename,
        "--allele",
        "HLA-A*02:01",
        "HLA-A*03:01",
        "--out-models-dir",
        models_dir1,
        "--num-jobs",
        str(n_jobs),
        "--held-out-fraction-reciprocal",
        "10",
        "--n-models",
        "1",
    ]
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    result = Class1AffinityPredictor.load(models_dir1)
    assert_equal(len(result.neural_networks), 4)

    models_dir2 = tempfile.mkdtemp(prefix="mhcflurry-test-models")
    args = [
        "mhcflurry-class1-select-allele-specific-models",
        "--data",
        get_path("data_curated", "curated_training_data.affinity.csv.bz2"),
        "--exclude-data",
        models_dir1 + "/train_data.csv.bz2",
        "--out-models-dir",
        models_dir2,
        "--models-dir",
        models_dir1,
        "--num-jobs",
        str(n_jobs),
        "--mse-max-models",
        "1",
        "--unselected-accuracy-scorer",
        "combined:mass-spec,mse",
        "--unselected-accuracy-percentile-threshold",
        "95",
    ]
    print("Running with args: %s" % args)
    subprocess.check_call(args)

    result = Class1AffinityPredictor.load(models_dir2)
    assert_equal(len(result.neural_networks), 2)
    assert_equal(len(result.allele_to_allele_specific_models["HLA-A*02:01"]),
                 1)
    assert_equal(len(result.allele_to_allele_specific_models["HLA-A*03:01"]),
                 1)
    assert_equal(
        result.allele_to_allele_specific_models["HLA-A*02:01"]
        [0].hyperparameters["max_epochs"], 500)
    assert_equal(
        result.allele_to_allele_specific_models["HLA-A*03:01"]
        [0].hyperparameters["max_epochs"], 500)

    print("Deleting: %s" % models_dir1)
    print("Deleting: %s" % models_dir2)
    shutil.rmtree(models_dir1)