Beispiel #1
0
    def predict(self, peptides, alleles=None, **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        if alleles is None:
            al = [Allele("HLA-"+a) for a in self.supportedAlleles]
            allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)}

        tmp_file = NamedTemporaryFile(delete=False)
        pep = [ p for p in pep_seqs.keys() if len(p) >= max(self.__supported_length)]
        if not pep:
            raise ValueError("No epitopes with length >= %i"%max(self.__supported_length))
        
        tmp_file.write("\n".join(pep_seqs.keys()))
        tmp_file.close()
        tmp_out = NamedTemporaryFile(delete=False)

        results = {}
        for a in allales_string.iterkeys():

            #cmd = self.command%(data_file, a, prediction_file, self._modelpath) #modelpath?

            r = subprocess.call(self.command%(tmp_file.name, a, tmp_out.name), shell=True)

            if r == 127:
                raise RuntimeError("%s is not installed or globally executable."%self.name)
            elif r == -6:
                warnings.warn("No model exists for allele %s."%str(allales_string[a]))
                continue
            elif r != 0:
                warnings.warn("An unknown error occurred for method %s."%self.name)
                continue

            results[allales_string[a]] = {p:s for p, s in itertools.izip(pep_seqs.values(), self.parse_external_result(tmp_out))}
        print results
        if any( not results[k] for k in results.iterkeys()):
            raise ValueError("No predictions could be made for " +self.name+" given input. Check your "
                             "epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(results)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i,self.name)) for i in df_result.index],
                                                        names=['Seq','Method'])
        return df_result
Beispiel #2
0
    def predict(self, peptides, alleles=None, **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        if alleles is None:
            al = [Allele("HLA-"+a) for a in self.supportedAlleles]
            allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)}

        #group peptides by length and
        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            #load svm model

            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            encoding = self.encode(peps)

            for a in allales_string.keys():
                model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(a,length))
                if not os.path.exists(model_path):
                    warnings.warn("No model exists for peptides of length %i or allele %s."%(length,
                                                                                            allales_string[a].name))
                    continue
                model = svmlight.read_model(model_path)


                model = svmlight.read_model(model_path)
                pred = svmlight.classify(model, encoding.values())
                result[allales_string[a]] = {}
                for pep, score in itertools.izip(encoding.keys(), pred):
                    result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your "
                             "epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Beispiel #3
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no
        :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        return EpitopePredictionResult(
            super(ComblibSidney2008,
                  self).predict(peptides, alleles=alleles,
                                **kwargs).applymap(lambda x: math.pow(10, x)))
Beispiel #4
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no
        :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        def __load_allele_model(allele, length):
            allele_model = "%s_%i"%(allele, length)
            return getattr(__import__("Fred2.Data.pssms."+self.name+".mat."+allele_model, fromlist=[allele_model]),
                           allele_model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-"+a) for a in self.supportedAlleles]
            alleles_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            alleles_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            peps = list(peps)
            #dynamicaly import prediction PSSMS for alleles and predict
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length, self.name))
                continue

            for a in alleles_string.keys():
                try:
                    pssm = __load_allele_model(a, length)
                except ImportError:
                    warnings.warn("No model found for %s with length %i"%(alleles_string[a], length))
                    continue

                result[alleles_string[a]] = {}
                ##here is the prediction and result object missing##
                for p in peps:
                    score = sum(pssm[i].get(p[i], 0.0) for i in xrange(length))+pssm.get(-1, {}).get("con", 0)
                    score /= -length
                    score -= pssm[-1]["intercept"]
                    score /= pssm[-1]["slope"]
                    score = math.pow(10, score)
                    if score < 0.0001:
                        score = 0.0001
                    elif score > 1e6:
                        score = 1e6
                    result[alleles_string[a]][pep_seqs[p]] = score
                    #print a, score, result

        if not result:
            raise ValueError("No predictions could be made with " +self.name+" for given input. Check your"
                             "epitope length and HLA allele combination.")

        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Beispiel #5
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no
        :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        def __load_allele_model(allele, length):
            allele_model = "%s_%i" % (allele, length)
            return getattr(
                __import__("Fred2.Data.pssms." + self.name + ".mat." +
                           allele_model,
                           fromlist=[allele_model]), allele_model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(al), al)
            }
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(alleles),
                                                alleles)
            }

        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(),
                                              key=lambda x: len(x)):
            peps = list(peps)
            #dynamicaly import prediction PSSMS for alleles and predict
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s" %
                              (length, self.name))
                continue

            for a in alleles_string.keys():
                try:
                    pssm = __load_allele_model(a, length)
                except ImportError:
                    warnings.warn("No model found for %s with length %i" %
                                  (alleles_string[a], length))
                    continue

                result[alleles_string[a]] = {}
                ##here is the prediction and result object missing##
                for p in peps:
                    score = sum(pssm[i].get(p[i], 0.0)
                                for i in xrange(length)) + pssm.get(
                                    -1, {}).get("con", 0)
                    score /= -length
                    score -= pssm[-1]["intercept"]
                    score /= pssm[-1]["slope"]
                    score = math.pow(10, score)
                    if score < 0.0001:
                        score = 0.0001
                    elif score > 1e6:
                        score = 1e6
                    result[alleles_string[a]][pep_seqs[p]] = score
                    #print a, score, result

        if not result:
            raise ValueError("No predictions could be made with " + self.name +
                             " for given input. Check your"
                             "epitope length and HLA allele combination.")

        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, self.name)) for i in df_result.index],
            names=['Seq', 'Method'])
        return df_result
Beispiel #6
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an :class:`~Fred2.Core.Allele.Allele`. If no
        :class:`~Fred2.Core.Allele.Allele` are given, predictions for all available models are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`pandas.DataFrame` object with the prediction results
        :rtype: :class:`pandas.DataFrame`
        """
        def __load_allele_model(allele, length):
            allele_model = "%s" % allele
            return getattr(
                __import__("Fred2.Data.pssms." + self.name + ".mat." +
                           allele_model,
                           fromlist=[allele_model]), allele_model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(al), al)
            }
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            alleles_string = {
                conv_a: a
                for conv_a, a in itertools.izip(self.convert_alleles(alleles),
                                                alleles)
            }

        result = {}
        pep_groups = pep_seqs.keys()
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):

            if self.supportedLength is not None and length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s" %
                              (length, self.name))
                continue

            peps = list(peps)
            for a, allele in alleles_string.iteritems():

                if alleles_string[a] not in result:
                    result[allele] = {}

                #load matrix
                try:
                    pssm = __load_allele_model(a, length)
                except ImportError:
                    pssm = []

                importance = self.__importance  if length <= 9 else \
                             self.__importance[:5] + ((length - 9) * [0.30]) + self.__importance[5:]

                for p in peps:
                    score = sum(
                        self.__log_enrichment.get(p[i], 0.0) * importance[i]
                        for i in xrange(length) if i not in pssm)
                    result[allele][pep_seqs[p]] = score

        if not result:
            raise ValueError("No predictions could be made with " + self.name +
                             " for given input. Check your"
                             "epitope length and HLA allele combination.")

        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples(
            [tuple((i, self.name)) for i in df_result.index],
            names=['Seq', 'Method'])
        return df_result
Beispiel #7
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        # group peptides by length and
        result = {}

        model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name)
        # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name))
        model = svmlight.read_model(model_path)

        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            # load svm model
            peps = list(peps)
            if length != 9:
                warnings.warn("Peptide length of %i is not supported by UniTope" % length)
                continue

            for a in allales_string.keys():
                if allales_string[a].name in self.supportedAlleles:
                    encoding = self.encode(peps, a)
                    pred = svmlight.classify(model, encoding.values())
                    result[allales_string[a]] = {}
                    for pep, score in itertools.izip(encoding.keys(), pred):
                        result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your \
            epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Beispiel #8
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :param alleles: A list of :class:`~Fred2.Core.Allele.Allele`
        :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele`
        :param kwargs: optional parameter (not used yet)
        :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        if alleles is None:
            al = [Allele("HLA-" + a) for a in self.supportedAlleles]
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)}

        # group peptides by length and
        result = {}

        model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name)
        # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name))
        model = svmlight.read_model(model_path)

        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)):
            # load svm model
            peps = list(peps)
            if length != 9:
                warnings.warn("Peptide length of %i is not supported by UniTope" % length)
                continue

            for a in allales_string.keys():
                if allales_string[a].name in self.supportedAlleles:
                    encoding = self.encode(peps, a)
                    pred = svmlight.classify(model, encoding.values())
                    result[allales_string[a]] = {}
                    for pep, score in itertools.izip(encoding.keys(), pred):
                        result[allales_string[a]][pep_seqs[pep]] = score

        if not result:
            raise ValueError("No predictions could be made for given input. Check your \
            epitope length and HLA allele combination.")
        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index],
                                                        names=['Seq', 'Method'])
        return df_result
Beispiel #9
0
    def predict(self, peptides, alleles=None, **kwargs):
        """
        Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models
        are made.

        :param list(Peptide)/Peptide peptides: A single Peptide or a list of Peptides
        :param list(Alleles) alleles: a list of Alleles
        :param kwargs: optional parameter (not used yet)
        :return: Returns a AResult object with the prediction results
        """
        def __load_allele_model(allele,length):
            allele_model = "%s_%s_%i"%(self.name, allele, length)
            return getattr( __import__("Fred2.Data.EpitopePSSMMatrices", fromlist=[allele_model]), allele_model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        if alleles is None:
            al = [Allele("HLA-"+a) for a in self.supportedAlleles]
            allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)}
        else:
            if isinstance(alleles, Allele):
                alleles = [alleles]
            if any(not isinstance(p, Allele) for p in alleles):
                raise ValueError("Input is not of type Allele")
            allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)}

        result = {}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            peps = list(peps)
            #dynamicaly import prediction PSSMS for alleles and predict
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            for a in allales_string.keys():
                try:
                    pssm = __load_allele_model(a, length)
                except AttributeError:
                    warnings.warn("No model found for %s with length %i"%(allales_string[a], length))
                    continue

                result[allales_string[a]] = {}
                ##here is the prediction and result object missing##
                for p in peps:
                    score = sum(pssm[i].get(p[i], 0.0) for i in xrange(length))+pssm.get(-1,{}).get("con", 0)
                    result[allales_string[a]][pep_seqs[p]] = score
                    #print a, score, result

        if not result:
            raise ValueError("No predictions could be made with " +self.name+" for given input. Check your"
                             "epitope length and HLA allele combination.")


        df_result = EpitopePredictionResult.from_dict(result)
        df_result.index = pandas.MultiIndex.from_tuples([tuple((i,self.name)) for i in df_result.index],
                                                        names=['Seq','Method'])
        return df_result