def setUp(self): self.proteins=[] self.alleles = [Allele("HLA-A*01:01"),Allele("HLA-B*07:02"), Allele("HLA-C*03:01")] self.peptides = [Peptide(p) for p in """SFSIFLLAL GHRMAWDMM VYEADDVIL CFTPSPVVV FLLLADARV GPADGMVSK YLYDHLAPM GLRDLAVAV GPTPLLYRL TWVLVGGVL IELGGKPAL LAGGVLAAV QYLAGLSTL NFVSGIQYL VLSDFKTWL ARPDYNPPL KLLPRLPGV RHTPVNSWL GLYLFNWAV ALYDVVSTL RRCRASGVL WPLLLLLLA VTYSLTGLW YFVIFFVAA""".split()] self.result= EpitopePredictorFactory("BIMAS").predict(self.peptides, self.alleles) self.thresh = {"A*01:01":10,"B*07:02":10,"C*03:01":10}
def test_unsupported_allele_length_combination(self): """ Tests default functions needs GLPK installed :return: """ epi_pred = EpitopePredictorFactory("Syfpeithi") cl_pred = CleavageSitePredictorFactory("PCM") alleles = [ Allele("HLA-A*02:01", prob=0.5), Allele("HLA-A*26:01", prob=0.5) ] sbws = EpitopeAssemblyWithSpacer(self.epis, cl_pred, epi_pred, alleles, solver="cbc") sol = sbws.solve() print sol assert all(i == str(j) for i, j in zip(["GHRMAWDMM", "HH", "VYEADDVIL"], sol))
def test_unsupported_allele_length_combination_exception(self): """ Tests default functions needs GLPK installed :return: """ epi_pred = EpitopePredictorFactory("Syfpeithi") cl_pred = CleavageSitePredictorFactory("PCM") alleles = [Allele("HLA-A*26:01", prob=0.5)] sbws = EpitopeAssemblyWithSpacer(self.epis, cl_pred, epi_pred, alleles, solver="cbc") self.assertRaises(ValueError, sbws.solve)
def revert_allele_repr(self, name): if name.startswith("H-2-"): return MouseAllele(name) else: # since we need to support single and double mhc2 alleles name_split = name.split('-') if len(name_split) > 2: return CombinedAllele(name_split[0] + '-' + name_split[1][:4] + '*' + name_split[1][4:] + '-' + name_split[2][:4] + '*' + name_split[2][4:]) else: return Allele(name_split[0] + '-' + name_split[1][:4] + '*' + name_split[1][4:])
def setUp(self): epis ="""GHRMAWDMM VYEADDVIL""".split("\n") self.epis = map(lambda x: Peptide(x.strip()),epis) self.alleles =[Allele("HLA-A*02:01",prob=0.5)]
def predict(self, peptides, alleles=None, **kwargs): """ Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :param alleles: A list of :class:`~Fred2.Core.Allele.Allele` :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or class:`~Fred2.Core.Allele.Allele` :param kwargs: optional parameter (not used yet) :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult` """ def __load_allele_model(allele, length): allele_model = "%s_%i" % (allele, length) return getattr( __import__("Fred2.Data.pssms." + self.name + ".mat." + allele_model, fromlist=[allele_model]), allele_model) if isinstance(peptides, Peptide): pep_seqs = {str(peptides): peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p if alleles is None: al = [Allele("HLA-" + a) for a in self.supportedAlleles] alleles_string = { conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al) } else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") alleles_string = { conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles) } result = {} for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)): peps = list(peps) #dynamicaly import prediction PSSMS for alleles and predict if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s" % (length, self.name)) continue for a in alleles_string.keys(): try: pssm = __load_allele_model(a, length) except ImportError: warnings.warn("No model found for %s with length %i" % (alleles_string[a], length)) continue result[alleles_string[a]] = {} ##here is the prediction and result object missing## for p in peps: score = sum(pssm[i].get(p[i], 0.0) for i in xrange(length)) + pssm.get( -1, {}).get("con", 0) score /= -length score -= pssm[-1]["intercept"] score /= pssm[-1]["slope"] score = math.pow(10, score) if score < 0.0001: score = 0.0001 elif score > 1e6: score = 1e6 result[alleles_string[a]][pep_seqs[p]] = score #print a, score, result if not result: raise ValueError("No predictions could be made with " + self.name + " for given input. Check your" "epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def predict(self, peptides, alleles=None, **kwargs): """ Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :param kwargs: optional parameter (not used yet) :return: Returns a :class:`pandas.DataFrame` object with the prediction results :rtype: :class:`pandas.DataFrame` """ def __load_allele_model(allele, length): allele_model = "%s" % allele return getattr( __import__("Fred2.Data.pssms." + self.name + ".mat." + allele_model, fromlist=[allele_model]), allele_model) if isinstance(peptides, Peptide): pep_seqs = {str(peptides): peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p if alleles is None: al = [Allele("HLA-" + a) for a in self.supportedAlleles] alleles_string = { conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al) } else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") alleles_string = { conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles) } result = {} pep_groups = pep_seqs.keys() pep_groups.sort(key=len) for length, peps in itertools.groupby(pep_groups, key=len): if self.supportedLength is not None and length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s" % (length, self.name)) continue peps = list(peps) for a, allele in alleles_string.iteritems(): if alleles_string[a] not in result: result[allele] = {} #load matrix try: pssm = __load_allele_model(a, length) except ImportError: pssm = [] importance = self.__importance if length <= 9 else \ self.__importance[:5] + ((length - 9) * [0.30]) + self.__importance[5:] for p in peps: score = sum( self.__log_enrichment.get(p[i], 0.0) * importance[i] for i in xrange(length) if i not in pssm) result[allele][pep_seqs[p]] = score if not result: raise ValueError("No predictions could be made with " + self.name + " for given input. Check your" "epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples( [tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def __main__(): parser = argparse.ArgumentParser( "Write out information about supported models by Fred2 for installed predictor tool versions." ) parser.add_argument('-p', "--peptides", help="File with one peptide per line") parser.add_argument('-c', "--mhcclass", default=1, help="MHC class I or II") parser.add_argument('-l', "--max_length", help="Maximum peptide length", type=int) parser.add_argument('-ml', "--min_length", help="Minimum peptide length", type=int) parser.add_argument('-a', "--alleles", help="<Required> MHC Alleles", required=True, type=str) parser.add_argument('-t', '--tools', help='Tools requested for peptide predictions', required=True, type=str) parser.add_argument('-v', '--versions', help='<Required> File with used software versions.', required=True) args = parser.parse_args() selected_methods = [item for item in args.tools.split(',')] with open(args.versions, 'r') as versions_file: tool_version = [(row[0].split()[0], str(row[1])) for row in csv.reader(versions_file, delimiter=":")] # NOTE this needs to be updated, if a newer version will be available via Fred2 and should be used in the future tool_version.append(('syfpeithi', '1.0')) # how to handle this? # get for each method the corresponding tool version methods = { method.strip(): version.strip() for tool, version in tool_version for method in selected_methods if tool.lower() in method.lower() } # get the alleles alleles = [Allele(a) for a in args.alleles.split(";")] peptide_lengths = [] if (args.peptides): peptides = read_peptide_input(args.peptides) peptide_lengths = set([len(pep) for pep in peptides]) else: peptide_lengths = range(args.min_length, args.max_length + 1) with open("model_report.txt", 'w') as output: # check if requested tool versions are supported for method, version in methods.items(): if version not in EpitopePredictorFactory.available_methods()[ method.lower()]: raise ValueError("The specified version " + version + " for " + method + " is not supported by Fred2.") # check if requested alleles are supported support_all_alleles = True no_allele_support = True for a in alleles: supported = False for method, version in methods.items(): predictor = EpitopePredictorFactory(method, version=version) if a not in sorted(predictor.supportedAlleles): output.write("Allele " + convert_allele_back(a) + " is not supported by " + method + " " + version + ".\n") else: supported = True if not supported: output.write( "Allele " + convert_allele_back(a) + " is not supported by any of the requested tools.\n") logger.warning( "Allele " + convert_allele_back(a) + " is not supported by any of the requested tools.") support_all_alleles = False else: no_allele_support = False if support_all_alleles: output.write( "All selected alleles are supported by at least one of the requested tools.\n" ) if no_allele_support: output.write( "None of the specified alleles is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models.\n" ) raise ValueError( "None of the specified alleles is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models." ) output.write("\n") # check if requested lengths are supported support_all_lengths = True no_length_support = True for l in peptide_lengths: supported = False for method, version in methods.items(): predictor = EpitopePredictorFactory(method, version=version) if l not in sorted(predictor.supportedLength): output.write("Peptide length " + str(l) + " is not supported by " + method + " " + version + ".\n") else: supported = True if not supported: output.write( "Peptide length " + str(l) + " is not supported by any of the requested tools.\n") logger.warning( "Peptide length " + str(l) + " is not supported by any of the requested tools.") support_all_lengths = False else: no_length_support = False if support_all_lengths: output.write( "All selected or provided peptide lengths are supported by at least one of the requested tools.\n" ) if no_length_support: output.write( "None of the peptide lengths is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models.\n" ) raise ValueError( "None of the peptide lengths is supported by any of the requested tools. Specify '--show_supported_models' to write out all supported models." )
def predict(self, peptides, alleles=None, **kwargs): """ Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models are made. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :param alleles: A list of :class:`~Fred2.Core.Allele.Allele` :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele` :param kwargs: optional parameter (not used yet) :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult` """ if isinstance(peptides, Peptide): pep_seqs = {str(peptides): peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p if alleles is None: al = [Allele("HLA-" + a) for a in self.supportedAlleles] allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)} else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)} # group peptides by length and result = {} model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name) # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name)) model = svmlight.read_model(model_path) for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)): # load svm model peps = list(peps) if length != 9: warnings.warn("Peptide length of %i is not supported by UniTope" % length) continue for a in allales_string.keys(): if allales_string[a].name in self.supportedAlleles: encoding = self.encode(peps, a) pred = svmlight.classify(model, encoding.values()) result[allales_string[a]] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[allales_string[a]][pep_seqs[pep]] = score if not result: raise ValueError("No predictions could be made for given input. Check your \ epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def setUp(self): #Peptides of different length 9,10,11,12,13,14,15 self.peptides_mhcI = [Peptide("SYFPEITHI"), Peptide("IHTIEPFYS")] self.peptides_fragment = [Peptide("IHTIEPFYSAA")] self.mhcI = [Allele("HLA-B*15:01"), Allele("HLA-A*02:01")] self.mhcII = [Allele("HLA-DRB1*07:01"), Allele("HLA-DRB1*15:01")]
def revert_allele_repr(self, name): if name.startswith("H-2-"): return MouseAllele(name) else: return Allele(name[:5] + '*' + name[5:7] + ':' + name[7:])
def revert_allele_repr(self, name): if name.startswith("H-2-"): return MouseAllele(name) else: return Allele(name)
def __main__(): parser = argparse.ArgumentParser(version=VERSION) parser.add_argument('-c', dest="mhcclass", help='<Required> MHC class', required=True) parser.add_argument('-in', dest="inf", help='<Required> full path to the input file', required=True) parser.add_argument('-out', dest="out", help="<Required> full path to the output file", required=True) parser.add_argument( '-allele', dest="allele", help= "<Required> full path to an allele file, if 'in', allele file will be deduced from in file name", required=True) parser.add_argument( '-dirallele', dest="dirallele", help= "for use with '-allele in', describes full base path to the allele files" ) options = parser.parse_args() if len(sys.argv) <= 1: parser.print_help() sys.exit(1) if not (options.inf or options.out or options.allele): parser.print_help() sys.exit(1) target_alleles_set = set() #Fred2.FileReader.read_lines is broken #alleles = FileReader.read_lines(options.allele, type=Allele) if options.allele == "in" and options.dirallele: if "_W_" not in options.inf: print "No class 1 type run detected." sys.exit(0) af = None for sp in options.inf.split("_"): if sp.startswith("BD"): af = join(options.dirallele, sp.split("-")[1] + ".allele") with open(af, 'r') as handle: for line in handle: target_alleles_set.add(Allele(line.strip().upper())) else: with open(options.allele, 'r') as handle: for line in handle: target_alleles_set.add(Allele(line.strip().upper())) if not target_alleles_set: parser.print_help() sys.exit(1) if options.mhcclass == "I": ttn = EpitopePredictorFactory('netmhcpan', version='3.0') lowerBound = 8 upperBound = 12 elif options.mhcclass == "II": ttn = EpitopePredictorFactory('netmhcIIpan', version='3.1') lowerBound = 15 upperBound = 25 pros = list() peps = list() f = oms.IdXMLFile() f.load(options.inf, pros, peps) pepstr = set() for pep in peps: for h in pep.getHits(): #if "decoy" not in h.getMetaValue("target_decoy"): unmod = h.getSequence().toUnmodifiedString() if lowerBound <= len(unmod) <= upperBound \ and 'U' not in unmod and 'B' not in unmod and 'X' not in unmod and 'Z' not in unmod: pepstr.add(h.getSequence().toUnmodifiedString()) es = [Peptide(x) for x in pepstr] try: preds_n = ttn.predict(es, alleles=target_alleles_set) except Exception as e: print "something went wrong with the netMHC prediction", options.inf, "what:", str( e) sys.exit(1) #only max preds = dict() for index, row in preds_n.iterrows(): score = row.max() #bigger_is_better allele = str(row.idxmax()) categ = categorize(score) seq = row.name[0].tostring() if categ: preds[seq] = (allele, categ, score) npeps = list() for pep in peps: hits = pep.getHits() nhits = list() for h in hits: if h.getSequence().toUnmodifiedString() in preds: x = preds[h.getSequence().toUnmodifiedString()] h.setMetaValue('binder', x[0]) h.setMetaValue(str(x[1]), x[2]) nhits.append(h) else: nhits.append(h) pep.setHits(nhits) f.store(options.out, pros, peps)
def setUp(self): epis = """GHRMAWDMM VYEADDVIL""".split("\n") self.epis = [Peptide(x.strip()) for x in epis] self.alleles = [Allele("HLA-A*02:01", prob=0.5)]