def predict_proba(self, data): ''' returns the confidence of being included in the positive class ''' dummy_target = np.zeros(data.shape[0]) svm_test_data = npToSVMLightFormat(data, dummy_target) predictions = svmlight.classify(self.model, svm_test_data) return np.array(predictions)
def ball_only_classifier(circles, color_image, bonus_radius): model = svmlight.read_model("./output/best_single_cup_model_for_ball") ff = find_features() # TODO: fix label = 0 best_classification = 0.5 best_circle = None best_circle_pixels = None for c in circles[:6]: pixels, circle = find_pixels(c, color_image, bonus_radius) # create features for that circle features = ff.generate_features(pixels, label) features = parse_one_line(features) print features # run the classifier on that circle classification = svmlight.classify(model, [features]) print classification if classification[0] > best_classification: best_classification = classification best_circle = [c] best_circle_pixels = pixels # make a decision about whether that circle is circly enough # cv2.imshow("Image processed", circle) # cv2.waitKey() # for the strict form of the classifier, I require that all of the detected circles # are in fact circles. other classifiers may be more lenient return best_circle, best_classification, best_circle_pixels
def main_svmlight(): # copied: import svmlight import pdb training_data = syntheticData(30, 1) test_data = syntheticData(30, 1) #training_data = __import__('data').train0 #test_data = __import__('data').test0 print 'HERE 0' print 'training_data is', training_data print 'test_data is', test_data # train a model based on the data #pdb.set_trace() print 'HERE 1' model = svmlight.learn(training_data, type='regression', kernelType=2, verbosity=3) print 'HERE 2' # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') print 'HERE 3' # classify the test data. this function returns a list of numbers, which represent # the classifications. #predictions = svmlight.classify(model, test_data) pdb.set_trace() predictions = svmlight.classify(model, training_data) print 'HERE 4' for p,example in zip(predictions, test_data): print 'pred %.8f, actual %.8f' % (p, example[0])
def predict(self, peptides, **kwargs): if isinstance(peptides, Peptide): pep_seqs = {str(peptides):peptides} else: if any(not isinstance(p, Peptide) for p in peptides): raise ValueError("Input is not of type Protein or Peptide") pep_seqs = {str(p):p for p in peptides} #group peptides by length and result = {self.name:{}} for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name)) continue encoding = self.encode(peps) model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length)) model = svmlight.read_model(model_path) pred = svmlight.classify(model, encoding.values()) result[self.name] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[self.name][pep_seqs[pep]] = score if not result[self.name]: raise ValueError("No predictions could be made with "+self.name+" for given input.") df_result = TAPPredictionResult.from_dict(result) return df_result
def score(self, feats): m = self._model if self._classtype == "classifier": x,_ = svm.gen_svm_nodearray(dict(feats)) return int(svm.libsvm.svm_predict(m, x)) elif self._classtype == "structured": maxscore = -sys.maxint maxidx = None for idx in range(len(feats)): dec_val = svmlight.classify(m, [(0, feats[idx])]) if dec_val > maxscore: maxscore = dec_val maxidx = idx return maxidx elif self._classtype == "percrank": X = [None]*len(feats) Xisd = [0]*len(feats) Xisd[0] = 1 for idx in range(len(feats)): X[idx] = set([f for f,v in feats[idx]]) dec_vals = m.project(X, Xisd) return dec_vals.index(max(dec_vals))
def predict_proba(self, X): y = np.zeros(X.shape[0]).tolist() test_data = self.toSvmlight(X, y) scores = np.array(svmlight.classify(self.model, test_data)) scores = 1 / (1 + np.exp(-scores)) scores.shape = (len(scores),1) scores = np.hstack([1-scores, scores]) return scores
def run_svm(article_count, feature_functions, kernel='polynomial', split=0.9, model_path='svm.model'): # https://bitbucket.org/wcauchois/pysvmlight articles, total_token_count = preprocess_wsj(article_count, feature_functions) dictionary = Dictionary() dictionary.add_one('ZZZZZ') # so that no features are labeled 0 data = [] for article in articles: for sentence in article: for tag, token_features in zip(sentence.def_tags, sentence.data): # only use def / indef tokens if tag in ('DEF', 'INDEF'): features = dictionary.add(token_features) features = sorted(list(set(features))) feature_values = zip(features, [1]*len(features)) data.append((+1 if tag == 'DEF' else -1, feature_values)) train, test = bifurcate(data, split, shuffle=True) # for corpus, name in [(train, 'train'), (test, 'test')]: # write_svm(corpus, 'wsj_svm-%s.data' % name) ##################### # do svm in Python... model = svmlight.learn(train, type='classification', kernel=kernel) # svmlight.learn options # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'. # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'. # verbosity: set the verbosity level (default 0). # C: trade-off between training error and margin. # poly_degree: parameter d in polynomial kernel. # rbf_gamma: parameter gamma in rbf kernel. # coef_lin # coef_const # costratio (corresponds to -j option to svm_learn) svmlight.write_model(model, model_path) gold_labels, test_feature_values = zip(*test) # total = len(gold_labels) test_pairs = [(0, feature_values) for feature_values in test_feature_values] predictions = svmlight.classify(model, test_pairs) correct, wrong = matches( [(gold > 0) for gold in gold_labels], [(prediction > 0) for prediction in predictions]) return dict( total_articles_count=len(articles), # int total_token_count=total_token_count, # int train_count=len(train), # int test_count=len(test), # int kernel=kernel, correct=correct, wrong=wrong, total=correct + wrong, )
def test(test_data, fmodel_name): print ('[ test ] ===================') model = svmlight.read_model(fmodel_name) # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p
def runSVMLight(trainName,testName, kerneltype, c_param = 1.0, gamma_param = 1.0, verbosity = 0): """ converts data to python format only if not already in python format (files in python format are of type list, otherwise they are filenames) inputs: trainName, either the training data in svm-light format or the name of the training data file in LIBSVM/sparse format testName, either the test data in svm-light format or the name of the test data file in LIBSVM/sparse format kerneltype, (str)the type of kernel (linear, polynomial, sigmoid, rbf, custom) c_param, the C parameter (default 1) gamma_param, the gamma parameter (default 1) verbosity, 0, 1, or 2 for less or more information (default 0) outputs: (positiveAccuracy, negativeAccuracy, accuracy) """ if type(trainName) == list: trainingData = trainName else: trainingData = sparseToList(trainName) if type(testName) == list: testData = testName else: testData = sparseToList(testName) if verbosity == 2: print "Training svm......." # train a model based on the data model = svmlight.learn(trainingData, type='classification', verbosity=2, kernel=kerneltype, C = c_param, rbf_gamma = gamma_param ) # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. # if type(trainName) == list: # svmlight.write_model(model, time.strftime('%Y-%m-%d-')+datetime.datetime.now().strftime('%H%M%S%f')+'_model.dat') # else: # svmlight.write_model(model, trainName[:-4]+'_model.dat') if verbosity == 2: print "Classifying........" # classify the test data. this function returns a list of numbers, which represent # the classifications. predictions = svmlight.classify(model, testData) # for p in predictions: # print '%.8f' % p correctLabels = correctLabelRemove(testData) # print 'Predictions:' # print predictions # print 'Correct Labels:' # print correctLabels return predictionCompare(predictions, correctLabels, verbosity)
def tsvm_test0(): # data processing data, target = load_svmlight_file('dataset/following.scale') data, target = shuffle(data, target) target = binarize(target)[:,0] cutoff = int(round(data.shape[0] * 0.8)) train_data = data[:cutoff] train_target = target[:cutoff] transductive_train_data = data transductive_target = target.copy() transductive_target[cutoff:] = 0 test_data = data[cutoff:] test_target = target[cutoff:] # convert the data into svmlight format svm_train_data = npToSVMLightFormat(train_data, train_target) svm_transductive_train_data = npToSVMLightFormat(transductive_train_data, transductive_target) svm_test_data = npToSVMLightFormat(test_data, test_target) print 'labels in the training data' print countLabels(svm_transductive_train_data).most_common() # svmlight routine model = svmlight.learn(svm_train_data, j=3.0, kernel='linear', type='classification', verbosity=0) trans_model = svmlight.learn(svm_transductive_train_data, j=3.0, kernel='linear', type='classification', verbosity=0) predictions = svmlight.classify(model, svm_test_data) trans_predictions = svmlight.classify(trans_model, svm_test_data) print 'inductive learning' print accuracy(predictions, test_target) print '(recall, precision)', recall_precision(predictions, test_target) print 'transductive learning' print accuracy(trans_predictions, test_target) print '(recall, precision)', recall_precision(trans_predictions, test_target)
def trainAndTest(training, test): #trainingNames = [x[0] for x in training] # never used, but might be someday trainingData = [d.dataTuple() for d in training] testNames = [d.name for d in test] testData = [d.dataTuple() for d in test] testLabels = [d.label for d in test] model = svmlight.learn(trainingData) predictions = svmlight.classify(model,testData) return zip(predictions, testLabels, testNames)
def create_classifications(models, test_set): ''' For each supplied model, use svm light to classify the test_set with that model ''' classifications= {} for m in models.keys(): classifications[m]= svmlight.classify(models[m], test_set) return classifications
def test_model(model,ind,n=3): test = [] for i in ind.get_pos_train_ind(): item = os.listdir("pos")[i] test.append((1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("pos/"+item).read()).items() if fmap.hasFeature(item[0])])) for i in ind.get_neg_test_ind(): item = os.listdir("neg")[i] test.append((-1,[(fmap.getID(item[0]),item[1]) for item in ngrams.ngrams(n, open("neg/"+item).read()).items() if fmap.hasFeature(item[0])])) predictions = svmlight.classify(model, test) return predictions
def five_fold_validation(training_sets, validation_sets, c_value): total_accuracy= 0.0 for i in range(len(training_sets)): model= svmlight.learn(training_sets[i], type='classification', C=c_value) classifications= svmlight.classify(model, validation_sets[i]) predictions= change_to_binary_predictions(classifications) accuracy= find_accuracy(validation_sets[i], predictions) total_accuracy += accuracy[0] return total_accuracy/len(training_sets)
def trainAndTest(training, test): #trainingNames = [x[0] for x in training] # never used, but might be someday trainingData = [(d[1],d[2]) for d in training] testNames = [d[0] for d in test] testData = [(d[1],d[2]) for d in test] testLabels = [d[1] for d in test] model = svm.learn(trainingData) predictions = svm.classify(model,testData) return zip(predictions, testLabels, testNames)
def classify_rank_svm(self, features): """Run rank_svm to rank the specified essay features (numpy matrix/array). Returns a vector of scores of the specified essays.""" assert self.model is not None # Convert data into svmlight format [(label, [(feature, value), ...], query_id), ...] test_data = [] for essay_ind,feat_vec in enumerate(features): feature_list = [(feat_ind+1,feat_val) for feat_ind,feat_val in enumerate(feat_vec)] test_data.append((0, feature_list, 1)) return svmlight.classify(self.model, test_data)
def predict(self, x_test): if self.trained != True: raise Exception("first train a model") x = self.svmlfeaturise(x_test) y_score = [] for j in xrange(len(self.models)): m = np.array(svmlight.classify(self.models[j], x)) y_score.append(m) y_predicted = np.argmax(y_score, axis=0) return y_predicted
def predict(self, X): num_data = X.shape[0] scores = np.zeros((num_data, self.num_classes_,), dtype=np.float32) for i in xrange(self.num_classes_): scores[:, i] = svm.classify( self.model_[i], self.__data2docs(X, np.zeros((num_data,), dtype=np.float32))) if self.num_classes_ == 1: indices = (scores.ravel() > 0).astype(np.int) else: indices = scores.argmax(axis=1) return self.classes_()[indices]
def my_cross_val_score(data_fold, train, c_p): scores = [] for x, y in data_fold: data_x = collect_data_qid(x, train) data_y = collect_data_qid(y, train) model = SVC.learn(data_x, C=c_p, kernel='linear', type='ranking') pred = SVC.classify(model, data_y) scores.append( my_accus(data_y, pred) ) return scores
def predict(self, peptides, alleles=None, **kwargs): if isinstance(peptides, Peptide): pep_seqs = {str(peptides):peptides} else: if any(not isinstance(p, Peptide) for p in peptides): raise ValueError("Input is not of type Protein or Peptide") pep_seqs = {str(p):p for p in peptides} if alleles is None: al = [Allele("HLA-"+a) for a in self.supportedAlleles] allales_string = {conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(al), al)} else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") allales_string ={conv_a:a for conv_a, a in itertools.izip(self.convert_alleles(alleles),alleles)} #group peptides by length and result = {} for length, peps in itertools.groupby(pep_seqs.iterkeys(), key= lambda x: len(x)): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name)) continue encoding = self.encode(peps) for a in allales_string.keys(): model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(a,length)) if not os.path.exists(model_path): warnings.warn("No model exists for peptides of length %i or allele %s."%(length, allales_string[a].name)) continue model = svmlight.read_model(model_path) model = svmlight.read_model(model_path) pred = svmlight.classify(model, encoding.values()) result[allales_string[a]] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[allales_string[a]][pep_seqs[pep]] = score if not result: raise ValueError("No predictions could be made for given input. Check your " "epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def predict(self, dataset): assert self.svm_list is not None self._format_test_data(dataset) num_samples = dataset.getNumSamples() num_features = dataset.getNumFeatures() predictions = np.zeros((num_samples, 12)) for month_ind in range(12): # import pdb;pdb.set_trace() predictions[:, month_ind] = svmlight.classify(self.svm_list[month_ind], self.formatted_data) return predictions
def __runSVMModels(self, img): inputfacepixels = list(img.getdata()) inputface = asfarray(inputfacepixels) pixlistmax = max(inputface) inputfacen = inputface / pixlistmax inputface = inputfacen - self.__imgdata.avgvals usub = self.__imgdata.eigenfaces[:self.__numFaces,:] input_wk = dot(usub, inputface.transpose()).transpose() data = [(0, self.__makeWeightTuplesList(input_wk))] predictions = list() for (name, model) in self.__models: pred = svmlight.classify(model, data) predictions.append((name,pred[0])) return predictions
def _get_svm_classification(self, featureset): """ given a set of features, classify them with our trained model and return a signed float :param featureset: a dict of feature/value pairs in NLTK format, representing a single instance """ instance_to_classify = (0, map_features_to_svm(featureset, self._svmfeatureindex)) if self._verbose: print 'instance', instance_to_classify # svmlight.classify expects a list; this should be taken advantage of when writing SvmClassifier.batch_classify / .batch_prob_classify. # it returns a list of floats, too. [prediction] = svmlight.classify(self._model, [instance_to_classify]) return prediction
def predict(self, value, converted=False): """Evaluates a single value against the training data. Args: value: List-like object with same dimensionality used for training or the result of using convert_value if converted=True. converted: If True then the input is in the correct internal format Returns: Sorted (descending) list of (confidence, label) """ if not converted: value = self.convert_value(value) conf = svmlight.classify(self._m, [(0, value)])[0] return [(math.fabs(conf), cmp(conf, 0))]
def get_weather_tweets(self, tweets): weather_tweets = [] if not isinstance(tweets, list): tweets = [tweets] count = 0 for tweet in tweets: count += 1 formatted_tweet = self.parser.stem_sentence_porter(tweet) formatted_tweet = self.format_tweet_for_svmlight(formatted_tweet) c = svmlight.classify(self.is_weather_model, formatted_tweet) if count%100 == 0: print count if c[0] < 0: weather_tweets.append(tweet) return weather_tweets
def predict(self, peptides, **kwargs): """ Returns TAP predictions for given :class:`~Fred2.Core.Peptide.Peptide`. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :return: Returns a :class:`~Fred2.Core.Result.TAPPredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.TAPPredictionResult` """ if isinstance(peptides, Peptide): pep_seqs = {str(peptides):peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p #group peptides by length and chunksize = len(pep_seqs) if 'chunks' in kwargs: chunksize = kwargs['chunks'] result = {self.name: {}} pep_groups = pep_seqs.keys() pep_groups.sort(key=len) for length, peps in itertools.groupby(pep_groups, key=len): #load svm model if length not in self.supportedLength: warnings.warn("Peptide length of %i is not supported by %s"%(length,self.name)) continue peps = list(peps) for i in xrange(0, len(peps), chunksize): encoding = self.encode(peps[i:i+chunksize]) model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s"%self.name, "%s_%i"%(self.name, length)) model = svmlight.read_model(model_path) pred = svmlight.classify(model, encoding.values()) for pep, score in itertools.izip(encoding.keys(), pred): result[self.name][pep_seqs[pep]] = score if not result[self.name]: raise ValueError("No predictions could be made with "+self.name+" for given input.") df_result = TAPPredictionResult.from_dict(result) return df_result
def rec_char(div_img): """ 切割后的单个字符识别 """ result = "" test = binary(div_img) test = chformat(test) for i in range(10): model = svmlight.read_model("model/" + str(i)) prediction = svmlight.classify(model, test) # print prediction if prediction[0] > 0: result = str(i) # print prediction[0] return result
def predict(self, x): if not self.fitted: raise Exception('Not fitted yet') if len(self.models) < 1: raise Exception("len(self.models) < 1") feats = toSVMLightFeatures(x) input = [] for i in range(len(feats)): input.append((0, feats[i])) predictions = [] for i in range(len(self.models)): predictions.append(np.array(svmlight.classify(self.models[i], input))) predictions = np.argmax(np.vstack(tuple(predictions)), axis=0) return predictions
def zrank(aids, topic, fmodel_name): rerank_data = init_rerank_data(aids, topic) print ('[ zrank ] ===================') model = svmlight.read_model(fmodel_name) predictions = svmlight.classify(model, rerank_data) aid_score = zip( [x[0] for x in rerank_data ], predictions) aid_score.sort(key = lambda tup : tup[1], reverse=True) with open(RERANK_RESULT + '_' + topic, 'w') as f : pprint.pprint(aid_score, f) ZC.dump_cache() return [x[0] for x in aid_score]
def predict(self, X): y = np.zeros(X.shape[0]).tolist() test_data = self.toSvmlight(X, y) all_data = self.train_data + test_data if self.class_dist: pos_ratio = self.class_dist[1] self.model = svmlight.learn(all_data, verbosity=1, transduction_posratio=pos_ratio) # self.model = svmlight.learn(all_data) else: self.model = svmlight.learn(self.train_data) predictions = np.array(svmlight.classify(self.model, test_data)) predictions[predictions > 0] = 1 predictions[predictions <= 0] = 0 # from collections import Counter # print Counter(predictions) return predictions
def test_model(model, ind, n=3): test = [] for i in ind.get_pos_train_ind(): item = os.listdir("pos")[i] test.append( (1, [(fmap.getID(item[0]), item[1]) for item in ngrams.ngrams(n, open("pos/" + item).read()).items() if fmap.hasFeature(item[0])])) for i in ind.get_neg_test_ind(): item = os.listdir("neg")[i] test.append((-1, [ (fmap.getID(item[0]), item[1]) for item in ngrams.ngrams(n, open("neg/" + item).read()).items() if fmap.hasFeature(item[0]) ])) predictions = svmlight.classify(model, test) return predictions
def test_svmlight(): training_data = [(1, [(1, 2), (2, 5), (3, 6), (5, 1), (4, 2), (6, 1)]), (1, [(1, 2), (2, 1), (3, 4), (5, 3), (4, 1), (6, 1)]), (1, [(1, 2), (2, 2), (3, 4), (5, 1), (4, 1), (6, 1)]), (1, [(1, 2), (2, 1), (3, 3), (5, 1), (4, 1), (6, 1)]), (-1, [(1, 2), (2, 1), (3, 1), (5, 3), (4, 2), (6, 1)]), (-1, [(1, 1), (2, 1), (3, 1), (5, 3), (4, 1), (6, 1)]), (-1, [(1, 1), (2, 2), (3, 1), (5, 3), (4, 1), (6, 1)]), (-1, [(1, 1), (2, 1), (3, 1), (5, 1), (4, 3), (6, 1)]), (-1, [(1, 2), (2, 1), (3, 1), (5, 2), (4, 1), (6, 5)]), (-1, [(7, 10)])] test_data = [(0, [(1, 2), (2, 6), (3, 4), (5, 1), (4, 1), (6, 1)]), (0, [(1, 2), (2, 6), (3, 4)])] model = svmlight.learn(training_data, type='classification', verbosity=0) svmlight.write_model(model, 'my_model.dat') predictions = svmlight.classify(model, test_data) for p in predictions: print '%.8f' % p
def performDoc2VecJudgement(trainingData, testData): doc2vecModel = Doc2Vec.load("/Users/Matteo/Desktop/doc2vec_models/final_model") trainingFeatureVectors = [(1 if doc[0] == 'POS' else -1, doc2vecModel.infer_vector(doc[2])) for doc in trainingData] testFeatureVectors = [(0, doc2vecModel.infer_vector(doc[2])) for doc in testData] formattedTrainingFeatureVectors = [(v[0], [(i+1,f) for i,f in enumerate(v[1])]) for v in trainingFeatureVectors] formattedTestFeatureVectors = [(v[0], [(i+1,f) for i,f in enumerate(v[1])]) for v in testFeatureVectors] svmModel = svmlight.learn(formattedTrainingFeatureVectors) judgements = svmlight.classify(svmModel, formattedTestFeatureVectors) predictions = [] i = 0 for (sentiment, fileName, features) in testData: predictions.append((judgements[i], sentiment, fileName)) i += 1 return predictions
def evaluate(self, docs): corrects = [] data = [] for classification, fname, wordlist in docs: c = Counter(wordlist) l = [] for word, v in c.iteritems(): if self.word_ids.get(word, 0) == 0: self.word_ids[word] = self.curr_id self.curr_id += 1 l.append((self.word_ids.get(word), v)) l.sort(key=lambda x: x[0]) data.append((1 if classification == "POS" else -1, l)) results = svmlight.classify(self.model, data) for i, r in enumerate(results): if (r < 0 and docs[i][0] == "NEG") or (r > 0 and docs[i][0] == "POS"): corrects.append(True) else: corrects.append(False) return (corrects, float(sum(corrects))/len(corrects))
def main_svmlight(): # copied: import svmlight import pdb training_data = syntheticData(30, 1) test_data = syntheticData(30, 1) #training_data = __import__('data').train0 #test_data = __import__('data').test0 print 'HERE 0' print 'training_data is', training_data print 'test_data is', test_data # train a model based on the data #pdb.set_trace() print 'HERE 1' model = svmlight.learn(training_data, type='regression', kernelType=2, verbosity=3) print 'HERE 2' # model data can be stored in the same format SVM-Light uses, for interoperability # with the binaries. svmlight.write_model(model, 'my_model.dat') print 'HERE 3' # classify the test data. this function returns a list of numbers, which represent # the classifications. #predictions = svmlight.classify(model, test_data) pdb.set_trace() predictions = svmlight.classify(model, training_data) print 'HERE 4' for p, example in zip(predictions, test_data): print 'pred %.8f, actual %.8f' % (p, example[0])
def score(self, websites): """ Note: Use all unlabelled websites as negatives but no more than 20x the positives """ """ neg = np.array([w.get_vsm(self.vectorizer, self.text_type) for w in websites]) neg = self._convert_to_svmlight(neg, -1) if not self.model: max_neg = min(20*len(self.pos), len(neg)) train = neg[:max_neg] print "Number of unlabelled examples: ", len(train) print "Training the classifier..." train.extend(self.pos) self.model = svmlight.learn(train, type='classification', verbosity=0, cost_ratio=self.cost_ratio, C=self.c) """ print "Scoring..." if not self.clf: print "Error. Classifier must be trained" test = np.array( [w.get_vsm(self.vectorizer, self.text_type) for w in websites]) test = self._convert_to_svmlight(test, -1) # The label does not matter predicts = svmlight.classify(self.clf, test) results = [(websites[i], predicts[i]) for i in xrange(len(websites))] return results
def svm(train_docs, train_labels, test_docs, params): kernel, param = params train_docs_svm = to_svmlight_format(train_docs, (1 if l == 1 else -1 for l in train_labels)) test_docs_svm = to_svmlight_format(test_docs, np.zeros(test_docs.shape[0])) if kernel == 'rbf': model = svmlight.learn(train_docs_svm, type='classification', kernel='rbf', rbf_gamma=param) elif kernel == 'poly' or kernel == 'polynomial': model = svmlight.learn(train_docs_svm, type='classification', kernel='polynomial', poly_degree=param) else: raise ValueError('Unsupported svm parameters: ' + str(params)) margins = svmlight.classify(model, test_docs_svm) predict_labels = [1 if p > 0 else 0 for p in margins] return predict_labels
rady_list = [] iteration = 1 max_pred = -1 while len(img[0]) >= window_width and len(img) >= window_height: print iteration hog = feature.hog(img, orientations=orientations, pixels_per_cell=(cell, cell), cells_per_block=(block, block), normalise=True) testing_data_keys, testing_data_tuples = split_vector( hog.tolist(), len(img[0]), len(img)) predictions = svmlight.classify(model, testing_data_tuples) scale = (1.0 / scale_factor)**iteration for i in xrange(len(predictions)): prediction = float(predictions[i]) if prediction >= positive_threshold: max_pred = max(max_pred, prediction) coordinate = testing_data_keys[i].split("^") centerx = int(int(coordinate[0]) * scale) centery = int(int(coordinate[1]) * scale) radx = int((window_width / 2) * scale) rady = int((window_height / 2) * scale) centerx_list.append(centerx) centery_list.append(centery)
def predict(self, sample_): pred = SVC.classify(self.model, sample_) return pred
def run_svm(article_count, feature_functions, kernel='polynomial', split=0.9, model_path='svm.model'): # https://bitbucket.org/wcauchois/pysvmlight articles, total_token_count = preprocess_wsj(article_count, feature_functions) dictionary = Dictionary() dictionary.add_one('ZZZZZ') # so that no features are labeled 0 data = [] for article in articles: for sentence in article: for tag, token_features in zip(sentence.def_tags, sentence.data): # only use def / indef tokens if tag in ('DEF', 'INDEF'): features = dictionary.add(token_features) features = sorted(list(set(features))) feature_values = zip(features, [1] * len(features)) data.append((+1 if tag == 'DEF' else -1, feature_values)) train, test = bifurcate(data, split, shuffle=True) # for corpus, name in [(train, 'train'), (test, 'test')]: # write_svm(corpus, 'wsj_svm-%s.data' % name) ##################### # do svm in Python... model = svmlight.learn(train, type='classification', kernel=kernel) # svmlight.learn options # type: select between 'classification', 'regression', 'ranking' (preference ranking), and 'optimization'. # kernel: select between 'linear', 'polynomial', 'rbf', and 'sigmoid'. # verbosity: set the verbosity level (default 0). # C: trade-off between training error and margin. # poly_degree: parameter d in polynomial kernel. # rbf_gamma: parameter gamma in rbf kernel. # coef_lin # coef_const # costratio (corresponds to -j option to svm_learn) svmlight.write_model(model, model_path) gold_labels, test_feature_values = zip(*test) # total = len(gold_labels) test_pairs = [(0, feature_values) for feature_values in test_feature_values] predictions = svmlight.classify(model, test_pairs) correct, wrong = matches([(gold > 0) for gold in gold_labels], [(prediction > 0) for prediction in predictions]) return dict( total_articles_count=len(articles), # int total_token_count=total_token_count, # int train_count=len(train), # int test_count=len(test), # int kernel=kernel, correct=correct, wrong=wrong, total=correct + wrong, )
def predict(self, X): dataList = self.__createData(X) return numpy.array(svmlight.classify(self.model, dataList))
print("Training it=", it, "cost-factor=", cost_factor + 1) model = svmlight.learn(list(train), type='classification', verbosity=0, costratio=cost_factor + 1) ## costratio = cost-factor if dump == "yes": svmlight.write_model( model, "models/model_" + dataset + "_" + features + "_it" + str(it) + "_cost_fact" + str(cost_factor + 1) + "_" + ts + ".dat") predictions = svmlight.classify(model, test) print("Predicting it=", it, "cost-factor=", cost_factor + 1) tp, tn, fp, fn = evaluate(predictions) accuracies.append( weighted_accuracy(cost_factor + 1, tn, tp, fn, fp) * 100) predictions = np.array(predictions) predictions[predictions < 0] = -1 predictions[predictions > 0] = 1 f1_micro.append( f1_score(val, predictions, average='micro') ) # micro: calculates metrics totally by counting the total true positives, false negatives and false positives cl = f1_score(val, predictions, average=None) # none: returns scores for each class f1_rel.append(cl[0]) f1_unrel.append(cl[1])
predictions_by_code = {} def load_classifier(clf_i): clf = svmlight.read_model(os.path.join(clf_directory,str(clf_i))) return clf ######################################## ## Make predictions for each of the ICD9 codes classifiers = {} for i in range(I): if os.path.exists(os.path.join(clf_directory,str(i))): print i, 'of', I clf = load_classifier(i) preds = np.array(svmlight.classify(clf, zip(np.zeros(len(test_text)),test_text))) predictions_by_code[i] = [j for j in range(len(preds)) if preds[j] > 0] # Invert predictions predictions = [[root] for x in range(TD)] for code, docs in predictions_by_code.iteritems(): for doc in docs: predictions[doc].append(code) # Prune the predictions to respect the conditional classification # constraint (all ancestors must be predicted true for a child to be # predicted true) for i in range(len(predictions)): doc_predictions = set(list(predictions[i])) filtered_predictions = []
def predict(self, peptides, alleles=None, **kwargs): """ Returns predictions for given peptides an alleles. If no alleles are given, predictions for all available models are made. :param peptides: A single :class:`~Fred2.Core.Peptide.Peptide` or a list of :class:`~Fred2.Core.Peptide.Peptide` :type peptides: list(:class:`~Fred2.Core.Peptide.Peptide`) or :class:`~Fred2.Core.Peptide.Peptide` :param alleles: A list of :class:`~Fred2.Core.Allele.Allele` :type alleles: list(:class:`~Fred2.Core.Allele.Allele`) or :class:`~Fred2.Core.Allele.Allele` :param kwargs: optional parameter (not used yet) :return: Returns a :class:`~Fred2.Core.Result.EpitopePredictionResult` object with the prediction results :rtype: :class:`~Fred2.Core.Result.EpitopePredictionResult` """ if isinstance(peptides, Peptide): pep_seqs = {str(peptides): peptides} else: pep_seqs = {} for p in peptides: if not isinstance(p, Peptide): raise ValueError("Input is not of type Protein or Peptide") pep_seqs[str(p)] = p if alleles is None: al = [Allele("HLA-" + a) for a in self.supportedAlleles] allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(al), al)} else: if isinstance(alleles, Allele): alleles = [alleles] if any(not isinstance(p, Allele) for p in alleles): raise ValueError("Input is not of type Allele") allales_string = {conv_a: a for conv_a, a in itertools.izip(self.convert_alleles(alleles), alleles)} # group peptides by length and result = {} model_path = pkg_resources.resource_filename("Fred2.Data.svms.%s" % self.name, "%s" % self.name) # model_path = os.path.abspath("../Data/svms/%s/%s"%(self.name, self.name)) model = svmlight.read_model(model_path) for length, peps in itertools.groupby(pep_seqs.iterkeys(), key=lambda x: len(x)): # load svm model peps = list(peps) if length != 9: warnings.warn("Peptide length of %i is not supported by UniTope" % length) continue for a in allales_string.keys(): if allales_string[a].name in self.supportedAlleles: encoding = self.encode(peps, a) pred = svmlight.classify(model, encoding.values()) result[allales_string[a]] = {} for pep, score in itertools.izip(encoding.keys(), pred): result[allales_string[a]][pep_seqs[pep]] = score if not result: raise ValueError("No predictions could be made for given input. Check your \ epitope length and HLA allele combination.") df_result = EpitopePredictionResult.from_dict(result) df_result.index = pandas.MultiIndex.from_tuples([tuple((i, self.name)) for i in df_result.index], names=['Seq', 'Method']) return df_result
def load_classifier(clf_i): clf = svmlight.read_model(os.path.join(clf_directory, str(clf_i))) return clf ######################################## ## Make predictions for each of the ICD9 codes classifiers = {} for i in range(I): if os.path.exists(os.path.join(clf_directory, str(i))): print i, 'of', I clf = load_classifier(i) preds = np.array( svmlight.classify(clf, zip(np.zeros(len(test_text)), test_text))) predictions_by_code[i] = [j for j in range(len(preds)) if preds[j] > 0] # Invert predictions predictions = [[root] for x in range(TD)] for code, docs in predictions_by_code.iteritems(): for doc in docs: predictions[doc].append(code) # Prune the predictions to respect the conditional classification # constraint (all ancestors must be predicted true for a child to be # predicted true) for i in range(len(predictions)): doc_predictions = set(list(predictions[i])) filtered_predictions = [] for prediction in doc_predictions: