Exemple #1
0
    def predict(self, peptides,  **kwargs):

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        #group peptides by length and

        result = {self.name:{}}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue


            encoding = self.encode(peps)

            model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length))
            model = svmlight.read_model(model_path)

            pred = svmlight.classify(model, encoding.values())
            result[self.name] = {}
            for pep, score in itertools.izip(encoding.keys(), pred):
                    result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with "+self.name+" for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Exemple #2
0
    def predict(self, peptides, **kwargs):

        def __load_model(length):
            model = "%s_%i"%(self.name, length)
            return getattr( __import__("Fred2.Data.TAPPSSMMatrices", fromlist=[model]), model)


        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            if any(not isinstance(p, Peptide) for p in peptides):
                raise ValueError("Input is not of type Protein or Peptide")
            pep_seqs = {str(p):p for p in peptides}

        result = {self.name:{}}
        for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)):
            try:
                pssm = __load_model(length)
            except ImportError:
                    warnings.warn("No model found for %s with length %i"%(self.name, length))
                    continue

            result = {self.name:{}}
            for p in peps:
                score = sum(pssm[i].get(aa, 0.0) for i, aa in enumerate(p))+pssm.get(-1,{}).get("con", 0)
                result[self.name][pep_seqs[p]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Exemple #3
0
    def predict(self, peptides, **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        def __load_model(length):
            model = "%s_%i" % (self.name, length)
            return getattr(
                __import__("Fred2.Data.pssms." + self.name + ".mat." + model,
                           fromlist=[model]), model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        result = {self.name: {}}
        pep_groups = list(pep_seqs.keys())
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            if length < 9:
                warnings.warn("No model found for %s with length %i" %
                              (self.name, length))
                continue

            try:
                pssm = __load_model(9)
            except ImportError:
                warnings.warn("No model found for %s with length %i" %
                              (self.name, length))
                continue

            for p in peps:
                if length <= 9:
                    score = sum(pssm[i].get(aa, 0.0) for i, aa in enumerate(p))
                else:
                    score = sum(pssm[i].get(p[i], 0.0)
                                for i in range(3)) + pssm[8].get(p[-1], 0.0)
                result[self.name][pep_seqs[p]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Exemple #4
0
    def predict(self, peptides, **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        #group peptides by length and
        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {self.name: {}}
        pep_groups = list(pep_seqs.keys())
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s" %
                              (length, self.name))
                continue

            peps = list(peps)
            for i in range(0, len(peps), chunksize):
                encoding = self.encode(peps[i:i + chunksize])

                model_path = pkg_resources.resource_filename(
                    "Fred2.Data.svms.%s" % self.name,
                    "%s_%i" % (self.name, length))
                model = svmlight.read_model(model_path)

                pred = svmlight.classify(model, list(encoding.values()))
                for pep, score in zip(list(encoding.keys()), pred):
                    result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with " + self.name +
                             " for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Exemple #5
0
    def predict(self, peptides, **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        def __load_model(length):
            model = "%s_%i"%(self.name, length)
            return getattr(__import__("Fred2.Data.pssms."+self.name+".mat."+model, fromlist=[model]), model)

        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides): peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        result = {self.name: {}}
        pep_groups = pep_seqs.keys()
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            if length < 9:
                warnings.warn("No model found for %s with length %i"%(self.name, length))
                continue

            try:
                pssm = __load_model(9)
            except ImportError:
                    warnings.warn("No model found for %s with length %i"%(self.name, length))
                    continue

            for p in peps:
                if length <= 9:
                    score = sum(pssm[i].get(aa, 0.0) for i, aa in enumerate(p))
                else:
                    score = sum(pssm[i].get(p[i], 0.0) for i in xrange(3))+pssm[8].get(p[-1], 0.0)
                result[self.name][pep_seqs[p]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result
Exemple #6
0
    def predict(self, peptides,  **kwargs):
        """
        Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`.

        :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide`
        :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide`
        :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results
        :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult`
        """
        if isinstance(peptides, Peptide):
            pep_seqs = {str(peptides):peptides}
        else:
            pep_seqs = {}
            for p in peptides:
                if not isinstance(p, Peptide):
                    raise ValueError("Input is not of type Protein or Peptide")
                pep_seqs[str(p)] = p

        #group peptides by length and
        chunksize = len(pep_seqs)
        if 'chunks' in kwargs:
            chunksize = kwargs['chunks']

        result = {self.name: {}}
        pep_groups = pep_seqs.keys()
        pep_groups.sort(key=len)
        for length, peps in itertools.groupby(pep_groups, key=len):
            #load svm model
            if length not in self.supportedLength:
                warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name))
                continue

            peps = list(peps)
            for i in xrange(0, len(peps), chunksize):
                encoding = self.encode(peps[i:i+chunksize])

                model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length))
                model = svmlight.read_model(model_path)

                pred = svmlight.classify(model, encoding.values())
                for pep, score in itertools.izip(encoding.keys(), pred):
                        result[self.name][pep_seqs[pep]] = score

        if not result[self.name]:
            raise ValueError("No predictions could be made with "+self.name+" for given input.")
        df_result = TAPPredictionResult.from_dict(result)

        return df_result