def predict(self, peptides, alleles=None, **kwargs): if isinstance(peptides, Peptide): pep_seqs = {str(peptides):peptides} else: if any(not isinstance(p, Peptide) for p in peptides): raise ValueError("Input is not of type Protein or Peptide") pep_seqs = {str(p):p for p in peptides} if alleles is None: al = [Allele("HLA-"+a) for a in self.supportedAlleles] allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)} else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)} #group peptides by length and result = {} for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name)) continue encoding = self.encode(peps) for a in allales_string.keys(): model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(a,length)) if not os.path.exists(model_path): warnings.warn("No model exists for peptides of length %i or allele %s."%(length, allales_string[a].name)) continue model = svmlight.read_model(model_path) model = svmlight.read_model(model_path) pred = svmlight.classify(model, encoding.values()) result[allales_string[a]] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[allales_string[a]][pep_seqs[pep]] = score if not result: raise ValueError("No predictions could be made for given input. Check your " "epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def predict(self, peptides, **kwargs): if isinstance(peptides, Peptide): pep_seqs = {str(peptides):peptides} else: if any(not isinstance(p, Peptide) for p in peptides): raise ValueError("Input is not of type Protein or Peptide") pep_seqs = {str(p):p for p in peptides} #group peptides by length and result = {self.name:{}} for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name)) continue encoding = self.encode(peps) model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length)) model = svmlight.read_model(model_path) pred = svmlight.classify(model, encoding.values()) result[self.name] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[self.name][pep_seqs[pep]] = score if not result[self.name]: raise ValueError("No predictions could be made with "+self.name+" for given input.") df_result = TAPPredictionResult.from_dict(result) return df_result
def ball_only_classifier(circles, color_image, bonus_radius): model = svmlight.read_model("./output/best_single_cup_model_for_ball") ff = find_features() # TODO: fix label = 0 best_classification = 0.5 best_circle = None best_circle_pixels = None for c in circles[:6]: pixels, circle = find_pixels(c, color_image, bonus_radius) # create features for that circle features = ff.generate_features(pixels, label) features = parse_one_line(features) print features # run the classifier on that circle classification = svmlight.classify(model, [features]) print classification if classification[0] > best_classification: best_classification = classification best_circle = [c] best_circle_pixels = pixels # make a decision about whether that circle is circly enough # cv2.imshow("Image processed", circle) # cv2.waitKey() # for the strict form of the classifier, I require that all of the detected circles # are in fact circles. other classifiers may be more lenient return best_circle, best_classification, best_circle_pixels
def test(test_data, fmodel_name): print ('[ test ] ===================') model = svmlight.read_model(fmodel_name) # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p
def test(test_data, fmodel_name): print('[ test ] ===================') model = svmlight.read_model(fmodel_name) # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p
def __init__(self, doc2vec_args={}, doc2vec_model=None, svm_model=None, doc2vec_train_docs=None): self.doc2vec_train_docs = doc2vec_train_docs self.doc2vec_model = Doc2Vec.load( MODEL_DIR_PATH + doc2vec_model) if doc2vec_model else None self.svm_model = read_model(svm_model) if svm_model else None self.doc2vec_args = doc2vec_args
def predict(self, peptides, **kwargs): """ Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult` """ if isinstance(peptides, Peptide): pep_seqs = {str(peptides): peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p #group peptides by length and chunksize = len(pep_seqs) if 'chunks' in kwargs: chunksize = kwargs['chunks'] result = {self.name: {}} pep_groups = list(pep_seqs.keys()) pep_groups.sort(key=len) for length, peps in itertools.groupby(pep_groups, key=len): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s" % (length, self.name)) continue peps = list(peps) for i in range(0, len(peps), chunksize): encoding = self.encode(peps[i:i + chunksize]) model_path = pkg_resources.resource_filename( "Fred2.Data.svms.%s" % self.name, "%s_%i" % (self.name, length)) model = svmlight.read_model(model_path) pred = svmlight.classify(model, list(encoding.values())) for pep, score in zip(list(encoding.keys()), pred): result[self.name][pep_seqs[pep]] = score if not result[self.name]: raise ValueError("No predictions could be made with " + self.name + " for given input.") df_result = TAPPredictionResult.from_dict(result) return df_result
def predict(self, peptides, **kwargs): """ Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult` """ if isinstance(peptides, Peptide): pep_seqs = {str(peptides):peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p #group peptides by length and chunksize = len(pep_seqs) if 'chunks' in kwargs: chunksize = kwargs['chunks'] result = {self.name: {}} pep_groups = pep_seqs.keys() pep_groups.sort(key=len) for length, peps in itertools.groupby(pep_groups, key=len): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name)) continue peps = list(peps) for i in xrange(0, len(peps), chunksize): encoding = self.encode(peps[i:i+chunksize]) model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length)) model = svmlight.read_model(model_path) pred = svmlight.classify(model, encoding.values()) for pep, score in itertools.izip(encoding.keys(), pred): result[self.name][pep_seqs[pep]] = score if not result[self.name]: raise ValueError("No predictions could be made with "+self.name+" for given input.") df_result = TAPPredictionResult.from_dict(result) return df_result
def rec_char(div_img): """ 切割后的单个字符识别 """ result = "" test = binary(div_img) test = chformat(test) for i in range(10): model = svmlight.read_model("model/" + str(i)) prediction = svmlight.classify(model, test) # print prediction if prediction[0] > 0: result = str(i) # print prediction[0] return result
def zrank(aids, topic, fmodel_name): rerank_data = init_rerank_data(aids, topic) print ('[ zrank ] ===================') model = svmlight.read_model(fmodel_name) predictions = svmlight.classify(model, rerank_data) aid_score = zip( [x[0] for x in rerank_data ], predictions) aid_score.sort(key = lambda tup : tup[1], reverse=True) with open(RERANK_RESULT + '_' + topic, 'w') as f : pprint.pprint(aid_score, f) ZC.dump_cache() return [x[0] for x in aid_score]
def zrank(aids, topic, fmodel_name): rerank_data = init_rerank_data(aids, topic) print('[ zrank ] ===================') model = svmlight.read_model(fmodel_name) predictions = svmlight.classify(model, rerank_data) aid_score = zip([x[0] for x in rerank_data], predictions) aid_score.sort(key=lambda tup: tup[1], reverse=True) with open(RERANK_RESULT + '_' + topic, 'w') as f: pprint.pprint(aid_score, f) ZC.dump_cache() return [x[0] for x in aid_score]
def load_model(self): if not os.path.isfile(self._modelname+"/"+self._pref+".model"): return False if self._classtype == "classifier": self._model = svmutil.svm_load_model(self._modelname+\ "/"+self._pref+".model") elif self._classtype == "structured": self._model = svmlight.read_model(self._modelname+\ "/"+self._pref+".model") elif self._classtype == "percrank": m = KernelLBRankPerceptron(kernel=polynomial_kernel) mfile = open(self._modelname+"/"+self._pref+".model", 'rb') m.sv_a,m.sv_1,m.sv_2,m.bias = cPickle.load(mfile) mfile.close() self._model = m return True
def simple_classifier(circles, color_image, bonus_radius): model = svmlight.read_model("./output/best_single_cup_model") ff = find_features() # TODO: fix label = 0 new_circles = [] for c in circles[:6]: circle = find_pixels(c, color_image, bonus_radius) new_circles.append(circle) # create features for that circle features = ff.generate_features(circle[0], label) features = parse_one_line(features) print features # run the classifier on that circle classification = svmlight.classify(model, [features]) print classification # make a decision about whether that circle is circly enough # cv2.imshow("Image processed", circle) # cv2.waitKey() # for the strict form of the classifier, I require that all of the detected circles # are in fact circles. other classifiers may be more lenient return new_circles
def predict(self, peptides, alleles=None, **kwargs): """ Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models are made. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :param alleles: A list of :class:`~Fred2.Core.Allele.Allele` :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele` :param kwargs: optional parameter (not used yet) :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult` """ if isinstance(peptides, Peptide): pep_seqs = {str(peptides): peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p if alleles is None: al = [Allele("HLA-" + a) for a in self.supportedAlleles] allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)} else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)} # group peptides by length and result = {} model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name) # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name)) model = svmlight.read_model(model_path) for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)): # load svm model peps = list(peps) if length != 9: warnings.warn("Peptide length of %i is not supported by UniTope" % length) continue for a in allales_string.keys(): if allales_string[a].name in self.supportedAlleles: encoding = self.encode(peps, a) pred = svmlight.classify(model, encoding.values()) result[allales_string[a]] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[allales_string[a]][pep_seqs[pep]] = score if not result: raise ValueError("No predictions could be made for given input. Check your \ epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
testing_data_keys = [] testing_data_tuples = [] for key, vector in vectors.iteritems(): vals = [] num = 1 for val in vector: vals.append((num, float(val))) num += 1 testing_data_tuples.append((0, vals)) testing_data_keys.append(key) return testing_data_keys, testing_data_tuples print "Loading Model" model = svmlight.read_model('svm-model.dat') testing_data = [] directory = "tests" filenames = [ f for f in listdir(directory + "/originals/") if isfile(join(directory + "/originals/", f)) and f[0] != "." ] start_time = time.time() counter = 0 for filename in filenames: print "\n-----------------------------------" print directory + "/originals/" + filename + "\n"
def writeBufferToFile(path, buffer): outFile = open(path, 'w') outFile.write(buffer) outFile.close() def getFullTextAsSentencesFromDocModel(document): sentences = [] for paragraph in document.paragraphs: cleanP = paragraph.replace('\n', ' ') paragraphSentences = sentence_breaker.tokenize(cleanP) sentences.extend(paragraphSentences) return sentences model = svmlight.read_model('my_model.dat') correct = 0 total = 0 def testDoc(document): global total global correct testVectors = [] docIndex = 1 sentences = getFullTextAsSentencesFromDocModel(document) if len(sentences) <= 1: # early return if no transitions. return goodDoc = DummyDocModel(sentences) goodGrid = TextrazorEntityGrid(goodDoc.cleanSentences())
testing_data_keys = [] testing_data_tuples = [] for key, vector in vectors.iteritems(): vals = [] num = 1 for val in vector: vals.append((num, float(val))) num += 1 testing_data_tuples.append((0,vals)) testing_data_keys.append(key) return testing_data_keys, testing_data_tuples print "Loading Model" model = svmlight.read_model('svm-model.dat') testing_data = [] directory = "tests" filenames = [ f for f in listdir(directory + "/originals/") if isfile(join(directory + "/originals/",f)) and f[0] != "." ] start_time = time.time() counter = 0 for filename in filenames: print "\n-----------------------------------" print directory + "/originals/" + filename + "\n" img = io.imread(directory + "/originals/" + filename, as_grey=True) output = io.imread(directory + "/originals/" + filename, as_grey=False)
def load_classifier(clf_i): clf = svmlight.read_model(os.path.join(clf_directory,str(clf_i))) return clf
def read_model(self, rel_path): abs_path = os.path.abspath(rel_path) model = svmlight.read_model(abs_path) return model
#cachePath = "../cache/asasCache" goldCachePath = "../cache/asasGoldCache" summaryOutputPath = "../outputs" reorderedSummaryOutputPath = summaryOutputPath + "_reordered" evaluationOutputPath = "../results" modelSummaryCachePath = "../cache/modelSummaryCache" documentCachePath = "../cache/documentCache" idfCachePath = "../cache/idfCache" meadCacheDir = "../cache/meadCache" rougeCacheDir = "../cache/rougeCache" rougeDir = "../ROUGE" # rougeDir = "/opt/dropbox/14-15/573/code/ROUGE" rankModel = svmlight.read_model('../cache/svmlightCache/svmlightModel.dat') rouge = RougeEvaluator(rougeDir, args.modelSummaryDir, summaryOutputPath, modelSummaryCachePath, rougeCacheDir) totalClusters = 25 minimumAverageClusterRange = 30 maximumAverageClusterRange = 55 maxWords = 100 topics = [] topicTitles = {} for topic in extract.topicReader.Topic.factoryMultiple(args.topicXml): topics.append(topic)