def self_training2(X, y, X_unLabeled, param, th): model = svmutil.svm_train(svmutil.svm_problem(x=X.tolist(), y=y.tolist()), param) obj = model.get_objective_value()[0] itr_num = 0 while True: predicted_labels = np.array(svmutil.svm_predict(x=X_unLabeled.tolist(), y=[1]*len(X_unLabeled), m=model, options="-q")[0]) model = svmutil.svm_train(svmutil.svm_problem(x=np.append(X, X_unLabeled, axis=0).tolist(), y=np.append(y, predicted_labels).tolist()), param) obj_new = model.get_objective_value()[0] itr_num += 1 if abs(obj_new - obj) < th: break else: obj = obj_new y_unlabeled = ma.array(data=np.array(svmutil.svm_predict(x=X_unLabeled.tolist(), y=[1]*len(X_unLabeled), m=model, options="-q")[0]), mask=[True]*len(X_unLabeled)) return model, y_unlabeled, obj_new, itr_num
def predict(self, features, const_intercept=False): """ Run prediction using svm_predict. :param features: List of features, produced by svr_features_from_sequence :param model: A loaded svm model (from load_model) :param const_intercept: if true, add a 1:1 term at the beginning of the matrix. Must match model's term :return: triple of predictions, accuracy, and values from svm_predict. """ feature_size = len(features) if const_intercept: feature_size += 1 # If we are to use a const intercept term, we will have one more feature #if 'size' in self.modeldict and self.modeldict['size'] != feature_size: # # vm Exception: Model size 1536 does not match feature size 384. # raise Exception('Model size {} does not match feature size {}.\nPlease check parameters for width, ' # 'kmers, and const_intercept'.format(self.modeldict['size'], feature_size)) svm_matrix = dict() # Build the dictionary that corresponds to the matrix file offset = 1 # svm_matrix is a dictionary of index to value, starting at 1 if const_intercept: svm_matrix[offset] = 1 offset += 1 for i, feature in enumerate(features): svm_matrix[i + offset] = feature['value'] predictions = svmutil.svm_predict([1.0], [svm_matrix], self.modeldict['model'], '-q') return predictions
def test_libsvm_trains_correctly(heart_scale): y, x = heart_scale m = svm_train(y[:200], x[:200], '-c 4') p_label, p_acc, p_val = svm_predict(y[200:], x[200:], m) assert p_acc == pytest.approx( [84.28571428571429, 0.6285714285714286, 0.463744141163496]) assert p_label[:6] == [-1.0, 1.0, 1.0, -1.0, 1.0, -1.0]
def predict_emotion_paef(): model = svmutil.svm_load_model( "C:/Users/Admin/PycharmProjects/Emotion_Detection/trained_models/paef_models/artphoto_train.txt.model" ) mapping = { 0: "happy", 2: "fear", 3: "excitement", 4: "disgust", 6: "anger", 7: "sad" } with open('image_data.txt', 'r') as feature: for line in feature: line = line.strip() feature_vector = line.split()[1:] feature_vector = [ float(item.split(":")[-1]) for item in feature_vector ] p_labs, p_acc, p_vals = svmutil.svm_predict([0], [feature_vector], model) print("p_labs") print(p_labs) lab = p_labs[0] return mapping[int(lab)]
def sliding_window_search(img, motion_img, svm, method, feature, sbox_height, sbox_width, slide=10, threshold=0.2): detections = [] det_count = 0 height, width = img.shape for i in range(1, (height - sbox_height), slide): for j in range(1, (width - sbox_width), slide): img_patch = img[i:i + sbox_height - 1, j:j + sbox_width - 1] #motion_patch = motion_img[i:i+sbox_height-1, j:j+sbox_width-1] #img_feat = extract(img_patch, motion_patch, method, feature) img_feat = extract(img_patch, None, method, feature) y = [] y.append(0) x = [] x.append(img_feat) plabel, acc, pr = svm_predict(y, x, svm) if pr[0][0] > threshold: #print "pr: " + str(pr[0][0]) detections.append( [i, i + sbox_height - 1, j, j + sbox_width - 1, pr[0][0]]) return detections
def accuracy(self): tweets = common.get_filtered_training_data(self.training_datafile) test_tweets = [] for (t, l) in tweets: words_filtered = [ e.lower() for e in t.split() if (common.is_ascii(e)) ] test_tweets.append(words_filtered) test_feature_vector = helper.get_SVM_feature_vector( self.feature_list, test_tweets) p_labels, p_accs, p_vals = svm_predict([0] * len(test_feature_vector), test_feature_vector, self.classifier) count = 0 total, correct, wrong = 0, 0, 0 self.accuracy = 0.0 for (t, l) in tweets: label = p_labels[count] if (label == 0): label = 'positive' elif (label == 1): label = 'negative' elif (label == 2): label = 'neutral' if (label == l): correct += 1 else: wrong += 1 total += 1 count += 1 accuracy = (float(correct) / total) * 100 print 'Total = {}, Correct = {}, Wrong = {}, Accuracy = {}'.format( total, correct, wrong, accuracy)
def predict(self, X, options=""): if hasattr(X, 'shape'): X = X.tolist() res = svmutil.svm_predict([0 for i in X], list(X), self.model, options=options) return res
def evaluate(self, input_data_path): with commons.PhaseLogger("LIBSVM.evaluate.read_problem"): Y, X = svmutil.svm_read_problem(input_data_path + "\\Test.txt") with commons.PhaseLogger("LIBSVM.evaluate.predict"): p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model) acc, mse, _ = p_acc logging.info("[%s]: evaluate with Acc[%.4f] Mse[%.4f]" % (self._get_class_name(), acc, mse))
def _label(self, x): if isinstance(x, (list, tuple, numx.ndarray)): y = [0] * len(x) p_labs, p_acc, p_vals = libsvmutil.svm_predict( y, x.tolist(), self.model) return numx.array(p_labs) else: msg = "Data must be a sequence of vectors" raise mdp.NodeException(msg)
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array( svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def calcFtrs(seq): ftrs = [] stc, mfe, efe, cstc, cmfe, cdst, frq, div, bpp = FOLDER.fold(seq) kmer = kContent(seq, 1) atcg = (kmer['a'] + kmer['t']) / (kmer['a'] + kmer['t'] + kmer['c'] + kmer['g']) if kmer['a'] + kmer['t'] == 0: at = 0 else: at = kmer['a'] / (kmer['a'] + kmer['t']) if kmer['c'] + kmer['g'] == 0: cg = 0 else: cg = kmer['c'] / (kmer['c'] + kmer['g']) svmftrs = numpy.array((atcg, at, cg)) mfeavg_mdl = svmutil.svm_load_model(mfeavg200) mfestd_mdl = svmutil.svm_load_model(mfestd200) efeavg_mdl = svmutil.svm_load_model(efeavg200) # efestd_mdl = svmutil.svm_load_model(efestd200) mfeavg_avg, mfeavg_std = loadRange(mfeavg200 + '.rng') mfestd_avg, mfestd_std = loadRange(mfestd200 + '.rng') efeavg_avg, efeavg_std = loadRange(efeavg200 + '.rng') # efestd_avg, efestd_std = loadRange(efestd200 + '.rng') mfe_avg = svmutil.svm_predict([0], [((svmftrs - mfeavg_avg) / mfeavg_std).tolist()], mfeavg_mdl, options='-b 1')[0][0] mfe_std = svmutil.svm_predict([0], [((svmftrs - mfestd_avg) / mfestd_std).tolist()], mfestd_mdl, options='-b 1')[0][0] efe_avg = svmutil.svm_predict([0], [((svmftrs - efeavg_avg) / efeavg_std).tolist()], efeavg_mdl, options='-b 1')[0][0] # efe_std = svmutil.svm_predict([0], # [((svmftrs - efestd_avg) / efestd_std).tolist()], efestd_mdl, options='-b 1')[0][0] ftrs.append(mfe - mfe_avg) ftrs.append((mfe - mfe_avg) / mfe_std) ftrs.append(efe - efe_avg) # ftrs.append((efe - efe_avg) / efe_std) return numpy.array(ftrs)
def process_file(source, target): with open(source, 'rb') as i: reader = csv.reader(i) (before, chords) = list_spectrum_data(reader, components=60, allow_no_chord=True) sda_features = through_sda_layers(sda, before) # append chord labels as integer numbers sda_features = [x.tolist() for x in sda_features] (labels, acc, vals) = svmutil.svm_predict([0] * len(sda_features), sda_features, model) result = [chord_list[int(x)] for x in labels] with open(target, 'wb') as o: o.write(','.join(result))
def run(self, mode): if mode <= 2: model = self.Training('-t {} -b 1'.format(mode), self.Plot_Y, self.Plot_X) sv = model.get_sv_indices() (label, _, _) = svmutil.svm_predict(self.Plot_Y, self.Plot_X, model) svmutil.svm_predict(self.Y_test, self.X_test, model) self.Visualization([i - 1 for i in sv], label) # elif mode==3: # (C,gamma)=self.GridSearch() # model=self.Training('-t 2 -c {} -g {}'.format(C,gamma),self.Plot_Y,self.Plot_X) # sv=model.get_sv_indices() # (label,_,_)=svmutil.svm_predict(self.Plot_Y,self.Plot_X,model) # self.Visualization([i-1 for i in sv],label) elif mode == 3: data = self.UserDefineKernel(self.Plot_X, self.Plot_X, 0.1) model = self.Training('-t 4 -c {} -g {} -b 1'.format(0.01, 0.1), self.Plot_Y, data, True) sv = model.get_sv_indices() (label, _, _) = svmutil.svm_predict(self.Plot_Y, data, model) self.Visualization([i - 1 for i in sv], label)
def process(training_file, test_file, check, draw): # Load training data. with open(training_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) # Convert data to lists for libsvm. class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) samples = class_1 + class_2 problem = svmutil.svm_problem(labels, samples) # Don't print to stdout, use radial basis functions. param = svmutil.svm_parameter('-q -t 2') model = svmutil.svm_train(problem, param) # Load test data. with open(test_file) as f: class_1 = pickle.load(f) class_2 = pickle.load(f) labels = pickle.load(f) class_1 = map(list, class_1) class_2 = map(list, class_2) labels = list(labels) if check: # Sadly, this prints to stdout too :-/ svmutil.svm_predict(labels, class_1 + class_2, model) # Prints accuracy. if draw: def classify(x, y, model=model): return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0]) imtools.plot_2d_boundary( [-6, 6, -6, 6], [array(class_1), array(class_2)], classify, [1, -1]) show()
def train_and_predict(x, y, kernel, option=''): kernel_type = { 'linear': '0', 'polynomial': '1', 'rbf': '2', 'sigmoid': '3', 'self-defined': '4' } x_train, x_test = x y_train, y_test = y param = '-q -t ' + kernel_type[ kernel] + option # -q: suppress the output in libsvm m = svm_train(y_train, x_train, param) pred, pred_acc, pred_val = svm_predict(y_test, x_test, m)
def _calculate_score(self, scaled_feature): """Calculate score from scaled brisque feature. :param scaled_feature: Scaled brisque feature. :type scaled_feature: np.ndarray """ x, idx = gen_svm_nodearray( scaled_feature.tolist(), isKernel=(self._model.param.kernel_type == 'PRECOMPUTED') ) nr_classifier = 1 prob_estimates = (c_double * nr_classifier)() return svmutil.svm_predict(self._model, x, prob_estimates)
def train(self, input_data_path, params="-t 0 -c 4 -b 1", is_eval=True): with commons.PhaseLogger("LIBSVM.train.read_problem"): Y, X = svmutil.svm_read_problem(input_data_path + "/Train.txt") prob = svmutil.svm_problem(Y, X) #Y, X = svmutil.svm_read_problem(input_data_path + "\\Train.txt") self._params = svmutil.svm_parameter(params) with commons.PhaseLogger("LIBSVM.train.svm_train"): self._model = svmutil.svm_train(prob, self._params) self._init = True if is_eval is True: p_labels, p_acc, p_vals = svmutil.svm_predict(Y, X, self._model) acc, mse, _ = p_acc logging.info("[%s]: train with Acc[%.4f] Mse[%.4f]" % (self._get_class_name(), acc, mse))
def classify_set(self, featuresets): """ Classifies the specified featuresets. The featuresets parameter must have the format [ [feature] ] Returns the most probable label of each item in according to this classifier, where the returned value has the format [label] """ x = [self.__adapt_featureset(featureset) for featureset in featuresets] # create a fake labels array for the library y = [0] * len(x) p_labels = svm_predict(y, x, self.__model, "-q")[0] # convert the label's ids into the original form return [self.__labels[int(p_label_id)] for p_label_id in p_labels]
def classify_set(self, featuresets): """ Classifies the specified featuresets. The featuresets parameter must have the format [ [feature] ] Returns the most probable label of each item in according to this classifier, where the returned value has the format [label] """ x = [self.__adapt_featureset(featureset) for featureset in featuresets] # create a fake labels array for the library y = [0]*len(x) p_labels = svm_predict(y, x, self.__model, "-q")[0] # convert the label's ids into the original form return [self.__labels[int(p_label_id)] for p_label_id in p_labels]
def sliding_window_search(img, sbox_height, sbox_width, threshold): detections = [] height, width = img.shape for i in range(1, (height - sbox_height), slide): for j in range(1, (width - sbox_width), slide): img_patch = img[i:i+sbox_height-1, j:j+sbox_width-1] img_feat = extract(img_patch) vw_hist = calculate_visual_word(img_feat) plabel, acc, pr = svm_predict([0], [vw_hist], svm) #time.sleep(10000) if pr[0][0] > threshold: detections.append([i, i+sbox_height-1, j, j+sbox_width-1, pr[0][0]]) return detections
def eval(dat): ans = [] for i in range(len(config.MODEL_LABEL) - 1): res = svmutil.svm_predict([], [dat], models[i], options="-q -b 1") #print(res[0][0]) prob = res[2][0][0] #print(config.MODEL_LABEL[i], res) if res[0][0] > 0 and prob > 0.9: ans.append([i, prob, res]) elif i == 2 and res[0][0] > 0 and prob > 0.6: ans.append([i, prob, res]) ans.sort(key=lambda x: x[1]) if ans: print(ans) return (ans[0][0], ans[0][1]) else: return (-1, 0)
def classify(self, data): len_tweets = len(data) results = {} neut_count = [0] * len_tweets pos_count = [0] * len_tweets neg_count = [0] * len_tweets orig_tweets = self.get_uniq_data(data) tweets = self.get_processed_tweets(orig_tweets) for i in tweets: tweet = tweets[i] test_tweets = [] res = {} for words in tweet: words_filtered = [ e.lower() for e in words.split() if (common.is_ascii(e)) ] test_tweets.append(words_filtered) test_feature_vector = helper.get_SVM_feature_vector( self.feature_list, test_tweets) p_labels, p_accs, p_vals = svm_predict( [0] * len(test_feature_vector), test_feature_vector, self.classifier) count = 0 for t in tweet: label = p_labels[count] if (label == 0): label = 'positive' pos_count[i] += 1 elif (label == 1): label = 'negative' neg_count[i] += 1 elif (label == 2): label = 'neutral' neut_count[i] += 1 result = { 'text': t, 'tweet': orig_tweets[i][count], 'label': label } res[count] = result count += 1 results[i] = res return results
def predict(tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr): data_arr = data_format(tr_data_arr) prob = svm_problem(tr_label_arr, data_arr) # 以下参数c和g通过交叉验证得到 param = svm_parameter('-c 2048.0 -g 0.001953125') svm_model = svm_train(prob, param) pred_data_arr = data_format(pred_data_arr) pred_data_len = len(pred_label_arr) wrong = 0 for idx, data in enumerate(pred_data_arr): p_label, p_acc, p_val = svm_predict( [pred_label_arr[idx]], [data], svm_model) if int(p_label[0]) != int(pred_label_arr[idx]): wrong += 1 accuracy = (pred_data_len - wrong) * 100.0 / pred_data_len return pred_data_len, wrong, accuracy
def _predict(self, line): if not self.model: logging.info("model is None") return None prob_y = [] prob_x = [] line = line.split(None, 1) # In case an instance with all zero features if len(line) == 1: line += [''] label, features = line xi = {} for e in features.split(): #logging.info("e %s",e) ind, val = e.split(":") xi[int(ind)] = float(val) prob_y += [float(label)] prob_x += [xi] pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(prob_y,prob_x,self.model) return pred_labels[0]
def svr_prediction(X_features,Y_classes,models=[],predict_options='-b 1'): """ Predict the class members probability """ Y_svfeatures = Y_classes.copy() Y_list,X_list = _convert_arrays2lists(Y_classes,X_features) Y_list = [0]*len(Y_list) labels = [] accur = [] vals = [] for i_model in models: # predict model p_labels,p_accur,p_vals = svm_predict(Y_list,X_list,i_model,predict_options) labels.append(p_labels) return labels
def computeLayer(self, layer): """ @param layer: the output HmaxLayer to store results in. """ LevelFilter.computeLayer(layer) #need to enable learning mode from UI (after S2 trained) #during training, need to pass in class labels vec = layer.array[:,0,0].tolist() if self.isLearning: if len(self.learned)>=100 and self.svmModel==None: #model = svm_train(y, x [, 'training_options']) self.svmModel = svmutil.svm_train(self.classes, self.learned) return self.classes.append(0) self.learned.append(vec) elif self.svmModel!=None: #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options']) pLabs, pAcc, pVals = svmutil.svm_predict([0], vec, self.svmModel) print "SVM Result: ", pLabs, pAcc, pVals
def predict_emotion_paef(): model = svmutil.svm_load_model("C:/Users/Admin/PycharmProjects/Emotion_Detection/trained_models/paef_models/artphoto_train.txt.model") mapping = { 0 : "happy", 2 : "fear", 3 : "excitement", 4 : "disgust", 6 : "anger", 7 : "sad" } with open('image_data.txt', 'r') as feature: for line in feature: line = line.strip() feature_vector = line.split()[1:] feature_vector = [float(item.split(":")[-1]) for item in feature_vector] p_labs, p_acc, p_vals = svmutil.svm_predict([0], [feature_vector], model) print("p_labs") print(p_labs) lab = p_labs[0] return mapping[int(lab)]
def computeLayer(self, layer): """ Override the computeLayer from LevelFilter in order to continue with processing the learning or inferring using our SVM model. If the SVM is trained and we are inferring, then the SVM inference results are stored in the layer (which is assumed to be a LayerC2). The layers are able to render themselves onto a wx canvas for inspection. @param layer: the output HmaxLayer to store results in. """ LevelFilter.computeLayer(self, layer) #need to enable learning mode from UI (after S2 trained) #during training, need to pass in class labels vec = layer.array[:, 0, 0].tolist() #contains vector of C2 maxes if self.isLearning and self.__svmModel == None: #add to count for how many of this class have been learned count = self.__classCounts.get(self.learningClass, 0) self.__classCounts[self.learningClass] = count + 1 #copy base input image to use as example when showing SVM result if count == 0: layer.saveExampleImage(self.learningClass) self.classes.append(self.learningClass) self.learned.append(vec) #print "learned svm ",len(self.learned),self.learningClass elif self.__svmModel != None: #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options']) pLabs, pAcc, pVals = svmutil.svm_predict([0], [vec], self.__svmModel, "-b 1") pVals = pVals[0] #sort ids in case SVM classIDs not consecutive ids = sorted(self.__classCounts.keys()) layer.setAccuracyResult(sorted(zip(pVals, ids), reverse=True)) if HMAX.DEBUG: print "SVM Result: ", pLabs, pAcc, pVals
def computeLayer(self, layer): """ Override the computeLayer from LevelFilter in order to continue with processing the learning or inferring using our SVM model. If the SVM is trained and we are inferring, then the SVM inference results are stored in the layer (which is assumed to be a LayerC2). The layers are able to render themselves onto a wx canvas for inspection. @param layer: the output HmaxLayer to store results in. """ LevelFilter.computeLayer(self, layer) #need to enable learning mode from UI (after S2 trained) #during training, need to pass in class labels vec = layer.array[:,0,0].tolist() #contains vector of C2 maxes if self.isLearning and self.__svmModel==None: #add to count for how many of this class have been learned count = self.__classCounts.get(self.learningClass, 0) self.__classCounts[self.learningClass] = count+1 #copy base input image to use as example when showing SVM result if count==0: layer.saveExampleImage(self.learningClass) self.classes.append(self.learningClass) self.learned.append(vec) #print "learned svm ",len(self.learned),self.learningClass elif self.__svmModel!=None: #p_labs, p_acc, p_vals = svm_predict(y, x, model [,'predicting_options']) pLabs, pAcc, pVals = svmutil.svm_predict([0], [vec], self.__svmModel, "-b 1") pVals = pVals[0] #sort ids in case SVM classIDs not consecutive ids = sorted(self.__classCounts.keys()) layer.setAccuracyResult(sorted(zip(pVals, ids),reverse=True)) if HMAX.DEBUG: print "SVM Result: ", pLabs, pAcc, pVals
def main(args): ref_path = args.ref_path height = args.height width = args.width ref_fps = args.ref_fps bit_depth = args.bit_depth if bit_depth == 8: pix_format = 'yuv420p' else: pix_format = 'yuv420p10le' fps = args.dist_fps #frame rate of distorted sequence #Obtain pseudo reference video by frame dropping using ffmpeg cmd = 'ffmpeg -r '+ str(ref_fps) +' -pix_fmt ' + pix_format + ' -s ' + str(width) +\ 'x' + str(height) + ' -i '+ ref_path + ' -filter:v fps=fps=' +\ str(fps) + ' pseudo_reference.yuv' os.system(cmd) GREED_feat = greed_feat(args) #load svm model model = svm_load_model('model_params/' + args.temp_filt + '.model') #load parameter of trained features feat_param = scipy.io.loadmat('model_params/' + args.temp_filt + '_params.mat') low = feat_param['low'][0, :] high = feat_param['high'][0, :] GREED_feat = (GREED_feat - low) / (high - low) #Predict score score, _, _ = svm_predict([0.0], GREED_feat[None, :], model, '-q') print(score)
def run_kfold(param_dict, rows, numfold, kmers=[1,2,3]): """ Run k KFold Args: param_dict: dictionary mapping param string to its value rows: input rows numfold: k for cross validation kmers: list of kmers, default [1,2,3] Return: dictionary of model performance (SCC, MSE) if benchmark is True, else return predictions for each fold """ kf = KFold(numfold, shuffle=True) splitted = kf.split(rows) param_str = "-s 3 -b 1 -q " # epsilon-SVR, prob estimate true, quiet mode param_str += " ".join(["-{} {}".format(k,v) for k,v in param_dict.items()]) params = svmutil.svm_parameter(param_str) foldidx = 1 fold_results = [] for train_idx, test_idx in splitted: train_list = [rows[i] for i in train_idx] test_list = [rows[i] for i in test_idx] y_train, x_train = libsvm_generate_matrix(train_list, kmers) y_test, x_test = libsvm_generate_matrix(test_list, kmers) train_prob = svmutil.svm_problem(y_train, x_train) model = svmutil.svm_train(train_prob, params) #svmutil.svm_save_model('model_name.model', m) # y is only needed when we need the model performance svmpred = svmutil.svm_predict(y_test, x_test, model, options="-q") fold_results.append({"test":test_list, "svmpred":svmpred}) return fold_results
def run(self, mode): ''' 0-Linear Kernel 1-Polynomial Kernel 2-RBF Kernel 3-Best parameter RBF kernel 4-User define kernel ''' if mode <= 2: model = self.Training('-t {} -b 1'.format(mode), self.Y_train, self.X_train) svmutil.svm_predict(self.Y_test, self.X_test, model) elif mode == 3: (C, gamma) = self.GridSearch() model = self.Training('-t 2 -c {} -g {}'.format(C, gamma), self.Y_train, self.X_train) svmutil.svm_predict(self.Y_test, self.X_test, model) elif mode == 4: data = self.UserDefineKernel(self.X_train, self.X_train, 0.01) print('{} {}'.format(len(data), len(self.Y_train))) model = self.Training('-t 4 -c {} -g {} -b 1'.format(1000, 0.01), self.Y_train, data, True) test = self.UserDefineKernel(self.X_test, self.X_train, 0.01) svmutil.svm_predict(self.Y_test, test, model) '''
f.close() train_data = train_true[::] train_data.extend(train_fake) tmp = list() for li in train_data: tmp.append(dict(enumerate(li))) train_data = tmp train_label = [1 for i in range(len(train_true)) ] + [-1 for i in range(len(train_fake))] eva_data = eva_true[::] eva_data.extend(eva_fake) tmp = list() for li in eva_data: tmp.append(dict(enumerate(li))) eva_data = tmp eva_label = [1 for i in range(len(eva_true)) ] + [-1 for i in range(len(eva_fake))] model = svmutil.svm_train([1 for i in range(len(train_true))] + [-1 for i in range(len(train_fake))], train_data, '-c 0.03125 -g 0.25') print type(model) p_label, p_acc, p_val = svmutil.svm_predict([1 for i in range(len(eva_true))] + [-1 for i in range(len(eva_fake))], eva_data, model) print p_acc
def main(): m = svmutil.svm_load_model('trained_models/svm.model') relation_tag = {0: 'None', 1: 'has_value', 2: 'has_temp', 3: 'modified_by'} match = re.search('^(.*)\.txt', sys.argv[2]) filename = sys.argv[2] if match: filename = match.group(1) input_dir = sys.argv[1] + '/' + filename + '_NER.xml' output_dir = sys.argv[1] + '/' + filename + '_Parsed.xml' print "Reading NER results from ", input_dir tree = ET.ElementTree(file=input_dir) root = tree.getroot() relations = {} index = [] for child in root: syn_features = codecs.open('Tempfile/relation_scale', 'w') temp_pairs = relation_features.generate_pairs(child, syn_features) if temp_pairs: try: y, x = svmutil.svm_read_problem('Tempfile/relation_scale') p_label, p_acc, p_val = svmutil.svm_predict(y, x, m) except ValueError: for child2 in child.findall('text'): print child2.text continue #print len(p_label),len(temp_pairs) else: p_label = [] temp_pairs = [] for j in range(0, len(p_label)): #print j relations[temp_pairs[j]] = p_label[j] indexes = temp_pairs[j].split("_") index.append(indexes[0]) index.append(indexes[1]) for child2 in child.findall('entity'): node_index = child2.attrib['index'] child2.attrib['relation'] = 'None' if node_index in index: right_pattern = '^(\w+)_' + node_index left_pattern = node_index + '_(\w+)$' for relation in relations: match1 = re.search(left_pattern, relation) match2 = re.search(right_pattern, relation) other_index = None if match1: other_index = match1.group(1) else: if match2: other_index = match2.group(1) else: continue relation_type = relation_tag[relations[relation]] if relation_type == 'None': continue if child2.attrib['relation'] is 'None': child2.attrib[ 'relation'] = other_index + ":" + relation_type else: child2.attrib['relation'] = child2.attrib[ 'relation'] + "|" + other_index + ":" + relation_type #print child2.text,child2.attrib['index'],child2.attrib['relation'] relation_excuted = os.path.exists("in.parse") if relation_excuted: os.system('rm in.parse') os.system('rm Tempfile/relation_scale') print "Writing Relation xml to ", output_dir new_tree = codecs.open(output_dir, 'w') tree.write(new_tree) print "Finished!"
def get_dataset(fname,k): labels = [] features = [] for prot_id,seq in seq2feature.parse_fasta(fname): labels.append(prot_id) features.append(k_spec(seq,k)) return labels,features if __name__ == "__main__": parser = argparse.ArgumentParser(description='Predict X binding proteins.') parser.add_argument('-model',action="store",dest="model") parser.add_argument('-thr',action="store",dest="thr",type = float) parser.add_argument('-fname',action="store",dest="fname") model = parser.parse_args().model fname = parser.parse_args().fname thr = parser.parse_args().thr labels,features = get_dataset(fname,2) model = svmutil.svm_load_model(model) plbl, pacc, pvals = svmutil.svm_predict([0]*len(features),features,model,"") for cnt,(prot_id,seq) in enumerate(seq2feature.parse_fasta(fname)): pval = pvals[cnt][0] if pval >= thr: print "> %s:%f" % (prot_id,pval) print seq
sum1 = sum1/len(headlines[h]) # print(sum1) sum1 = np.concatenate((sum1,cat_vec[combine_f["Category"][h]],cat_vec[combine_f["Post_Type"][h]], arrays[h])) head_vec.append(sum1) head_vec = np.array(head_vec) feat_len = int(0.8*len(head_vec)) train = head_vec[:feat_len] res_train = combine_f["class"][:feat_len].tolist() test = head_vec[feat_len:] res_test = combine_f["class"][feat_len:].tolist() print(len(test), len(res_test)) model_svm = svmutil.svm_train(res_train, train,'-t 2 -c 2') y = svmutil.svm_predict(res_test, test, model_svm) confusion_matrix(res_test, y[0], labels=[0,1]) tn, fp, fn, tp = confusion_matrix(res_test, y[0], labels=[0,1]).ravel() recall = tp/(tp+fn) recall precision = tp/(tp+fp) precision f1 = precision*recall*2/(precision+recall) f1 fpr, tpr, thresholds = roc_curve(res_test, y[0])
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } prob = svm_problem(y_train,X_train_pca) param = svm_parameter("-q") param.kernel='rbf' #param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], # 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } param.C=32 param.gamma=0.0001 print("Pass") #parameters = GridSearchCV( param_grid) model= svm_train(prob,param) #clf = clf.fit(X_train_pca, y_train) #print("Best estimator found by grid search:") #print(m.best_estimator_) y_pred, pred_acc, pred_val = svm_predict(y_test,X_test_pca,model) ################################################################################ ## Quantitative evaluation of the model quality on the test set print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) print("Predicting people's names on the test set") ################################################################################ ## Qualitative evaluation of the predictions using matplotlib # def plot_gallery(images, titles, h, w, n_row=4, n_col=8): """Helper function to plot a gallery of portraits"""
def classify(x, y, model=model): return array( svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0])
def classify(x, y, model=model): return array(svmutil.svm_predict([0] * len(x), map(list, zip(x, y)), model)[0])
def detectcontact(photolist, n, savesize=20, delsize=15, thresholds=[9, 0.75, 6], historysize=10, blocksize=10, Npatches=20): """ photolist = list of photoitems (these are in the files saved by the tracking system). n = index from this list to compute the locations for. savesize = controls the size of the patch that is saved into the 'contact' object. delsize = controls the size of the patch that is deleted from the search image around a maximum. thresholds = thresholds for 'non-ML' decision boundary for if a maximum is a reflector historysize = how far back through the list to go, when computing blocksize = how much to dilate the current no-flash image compared to the current flash image TODO Fix Bug: The code relies on the savesize = 20, as that places the peak at 20,20 in the patch. Returns: contact = This is a list of dictionaries, each associated with a candidate peak in the search image, with these fields: x and y - position of this maximum [ESSENTIAL] patch - the current flash photo minus the current no-flash photo searchpatch - the difference between current pairs and previous pairs of photos (variously dilated) which is searched for its maximum values. mean, searchmax, centremax - various features. confident - a boolean measure of whether the system thinks this is the dot prediction - a real value reporting confidence in being a true retroreflector (NEGATIVE=More likely). the current system works well with a threshold of zero. [ESSENTIAL] found = whether a confident dot has been found. searchimg = more for debugging, the searchimg used for finding maximums. Npatches = number of patches to consider (each patch is centred on a maximum) """ from time import time unsortedsets = [] startn = n - historysize if startn < 0: startn = 0 for i in range(startn, n + 1): #photoitem = q.read(i) photoitem = photolist[i] if photoitem is None: continue if photoitem['img'] is None: continue assert not isinstance( photoitem['img'][0, 0], numbers.Integral), "Need image array to be float not integers." if 'mean' not in photoitem: photoitem['mean'] = np.mean(photoitem['img'][::5, ::5]) #photoitem['img'] = photoitem['img'].astype(np.float) #already done tt = photoitem['record']['triggertime'] chosenset = None for s in unsortedsets: if np.abs( tt - np.mean([photoi['record']['triggertime'] for photoi in s])) < 0.5: chosenset = s if chosenset is None: unsortedsets.append([photoitem]) else: chosenset.append(photoitem) starttime = time() sets = [] for s in unsortedsets: if len(s) < 2: #if the set only has one photo in, skip. continue newset = {'flash': [], 'noflash': []} setmean = np.mean([ photoitem['mean'] for photoitem in s if photoitem['img'] is not None ]) for photoitem in s: if photoitem['img'] is not None: if photoitem['mean'] > setmean + 0.1: newset['flash'].append(photoitem) else: newset['noflash'].append(photoitem) if len(newset['flash']) == 0: continue #no point including sets without a flash sets.append(newset) starttime = time() last_diff = None this_diff = None if len(sets) < 2: print("Fewer than two photo sets available") return None, False, None #we can't do this if we only have one photo set for i, s in enumerate(sets): this_set = i == len( sets ) - 1 #whether the set is the one that we're looking for the bee in. for s_nf in s['noflash']: if this_set: intertime = time() diff = detect(s['flash'][0]['img'], s_nf['img'], blocksize=blocksize ) #for the current search image we dilate if this_diff is None: this_diff = diff else: this_diff = np.minimum(diff, this_diff) else: intertime = time() if 'nodilationdiff' in s_nf: diff = s_nf['nodilationdiff'] else: diff = detect(s['flash'][0]['img'], s_nf['img'], dilate=None) #for the past ones we don't if diff is not None: s_nf['nodilationdiff'] = diff if last_diff is None: last_diff = diff else: last_diff = np.maximum( diff, last_diff) #TODO: Need to align to other sets if (last_diff is None) or (this_diff is None): print("Insufficient data") return None, False, None starttime = time() #if there are large changes in the image the chances are the camera's moved... remove those sets before then keepafter = 0 for i in range(len(sets) - 1): if np.mean( np.abs(sets[i]['noflash'][0]['img'][::5, ::5] - sets[-1]['noflash'][0]['img'][::5, ::5])) > 3: keepafter = i sets = sets[keepafter:] # #we just align to the first of the old sets. imgcorrection = 20 # shift = ensemblegetshift(sets[-1]['noflash'][0]['img'],sets[0]['noflash'][0]['img'],searchbox=imgcorrection,step=2,searchblocksize=50,ensemblesizesqrt=3) # #res = alignandsubtract(last_diff,shift,this_diff,margin=10) res = detect(this_diff, last_diff, blocksize=10, offset=3, searchbox=imgcorrection) #get simple image difference to save as patch. img = sets[-1]['flash'][0]['img'] - sets[-1]['noflash'][0]['img'] searchimg = res.copy() contact = [] found = False for i in range(Npatches): y, x = np.unravel_index(searchimg.argmax(), searchimg.shape) searchmax = searchimg[y, x] #if (x<savesize) or (y<savesize) or (x>searchimg.shape[1]-savesize-1) or (y>searchimg.shape[0]-savesize-1): continue #target = 1*(((y-truey+alignmentcorrection)**2 + (x-truex+alignmentcorrection)**2)<10**2) #print(x,truex,y,truey) patch = img[y - savesize + imgcorrection:y + savesize + imgcorrection, x - savesize + imgcorrection:x + savesize + imgcorrection].astype(np.float32) searchpatch = searchimg[y - savesize:y + savesize, x - savesize:x + savesize].astype(np.float32) searchimg[max(0, y - delsize):min(searchimg.shape[0], y + delsize), max(0, x - delsize):min(searchimg.shape[1], x + delsize)] = 0 patimg = patch.copy() centreimg = patimg[17:24, 17:24].copy() patimg[37:44, 37:44] = 0 centremax = np.max(centreimg.flatten()) mean = np.mean(patimg.flatten()) #Possible contact if (searchmax > thresholds[0]) & (mean < thresholds[1]) & ( centremax > thresholds[2]): confident = True else: confident = False if confident: found = True if model is not None: outersurround = max(patch[16, 20], patch[20, 16], patch[24, 20], patch[20, 24], patch[16, 16], patch[16, 24], patch[24, 16], patch[24, 24]) innersurround = max(patch[18, 20], patch[20, 18], patch[22, 20], patch[20, 22], patch[18, 18], patch[18, 22], patch[22, 18], patch[22, 22]) centre = np.sum([ patch[20, 20], patch[20, 21], patch[20, 19], patch[19, 20], patch[21, 20] ]) res = np.array([[ searchmax, centremax, mean, outersurround, innersurround, centre ]]) _, _, pred = svm_predict([], res, model, '-q') else: pred = None contact.append({ 'x': x + imgcorrection, 'y': y + imgcorrection, 'patch': patch, 'searchpatch': searchpatch, 'mean': mean, 'searchmax': searchmax, 'centremax': centremax, 'confident': confident, 'prediction': pred[0][0] }) return contact, found, searchimg
acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames) # FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced # enough. Probably some float underflow due to things not using log # probabilities? # Test SVM. features = map(list, features) test_features = map(list, test_features) str_int_map = {} # libSVM needs int labels. for i, c in enumerate(classnames): str_int_map[c], str_int_map[i] = i, c def convert_labels(labels, str_int_map): return [str_int_map[l] for l in labels] problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features) # Use a linear kernel, radial basis functions have horrible results (~20% acc) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) res = svmutil.svm_predict(convert_labels(test_labels, str_int_map), test_features, model)[0] res = convert_labels(res, str_int_map) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'SVM Accuracy:', acc print_confusion(res, test_labels, classnames)
def predict_label( self, feature ): options = self.get_predict_options() y = [-1] (p_labels, p_acc, p_vals) = svm_predict( y, [feature], self._model, options ) return p_labels[0]
vername = os.path.join(SUDOKU_PATH, 'sudoku18.sud') im = numpy.array(Image.open(imname).convert('L')) x = sudoku.find_sudoku_edges(im, axis=0) y = sudoku.find_sudoku_edges(im, axis=1) tic.k('found edges') # Extract cells, run OCR. OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/' features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training')) problem = svmutil.svm_problem(labels, map(list, features)) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) tic.k('built OCR model') crops = [] for col in range(9): for row in range(9): crop = im[y[col]:y[col + 1], x[row]:x[row + 1]] crops.append(ocr.compute_feature(crop)) tic.k('extracted cells') res = svmutil.svm_predict(numpy.loadtxt(vername), map(list, crops), model)[0] tic.k('recognized cells') res = numpy.array(res).reshape(9, 9) print 'Recognized board:' print res
res = bc.classify(test_features)[0] acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'Bayes Accuracy:', acc print_confusion(res, test_labels, classnames) # FIXME: Bayes accuracy gets very bad if the input dimensions aren't reduced # enough. Probably some float underflow due to things not using log # probabilities? # Test SVM. features = map(list, features) test_features = map(list, test_features) str_int_map = {} # libSVM needs int labels. for i, c in enumerate(classnames): str_int_map[c], str_int_map[i] = i, c def convert_labels(labels, str_int_map): return [str_int_map[l] for l in labels] problem = svmutil.svm_problem(convert_labels(labels, str_int_map), features) # Use a linear kernel, radial basis functions have horrible results (~20% acc) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) res = svmutil.svm_predict( convert_labels(test_labels, str_int_map), test_features, model)[0] res = convert_labels(res, str_int_map) acc = numpy.sum(1.0 * (res == test_labels)) / len(test_labels) print 'SVM Accuracy:', acc print_confusion(res, test_labels, classnames)
def main(): m=svmutil.svm_load_model('trained_models/svm.model') relation_tag={0:'None',1:'has_value',2:'has_temp',3:'modified_by'} match=re.search('^(.*)\.txt',sys.argv[2]) filename=sys.argv[2] if match: filename=match.group(1) input_dir=sys.argv[1]+'/'+filename+'_NER.xml' output_dir=sys.argv[1]+'/'+filename+'_Parsed.xml' print "Reading NER results from ", input_dir tree = ET.ElementTree(file=input_dir) root = tree.getroot() relations={} index=[] for child in root: syn_features=codecs.open('Tempfile/relation_scale','w') temp_pairs=relation_features.generate_pairs(child,syn_features) if temp_pairs: try: y,x=svmutil.svm_read_problem('Tempfile/relation_scale') p_label,p_acc,p_val=svmutil.svm_predict(y,x,m) except ValueError: for child2 in child.findall('text'): print child2.text continue #print len(p_label),len(temp_pairs) else: p_label=[] temp_pairs=[] for j in range(0,len(p_label)): #print j relations[temp_pairs[j]]=p_label[j] indexes=temp_pairs[j].split("_") index.append(indexes[0]) index.append(indexes[1]) for child2 in child.findall('entity'): node_index=child2.attrib['index'] child2.attrib['relation']='None' if node_index in index: right_pattern='^(\w+)_'+node_index left_pattern=node_index+'_(\w+)$' for relation in relations: match1=re.search(left_pattern,relation) match2=re.search(right_pattern,relation) other_index= None if match1: other_index=match1.group(1) else: if match2: other_index=match2.group(1) else: continue relation_type=relation_tag[relations[relation]] if relation_type == 'None': continue if child2.attrib['relation'] is 'None': child2.attrib['relation']=other_index+":"+relation_type else: child2.attrib['relation']=child2.attrib['relation']+"|"+other_index+":"+relation_type #print child2.text,child2.attrib['index'],child2.attrib['relation'] relation_excuted=os.path.exists("in.parse") if relation_excuted: os.system('rm in.parse') os.system('rm Tempfile/relation_scale') print "Writing Relation xml to ", output_dir new_tree=codecs.open(output_dir,'w') tree.write(new_tree) print "Finished!"
def _prob(self, x): y = [0] * len(x) p_labs, p_acc, p_vals = libsvmutil.svm_predict(y, x.tolist(), self.model, "-b 1") labels = self.model.get_labels() return [dict(list(zip(labels, ps))) for ps in p_vals]
import os from libsvm import svmutil import ocr OCR_PATH = '/Users/thakis/Downloads/data/sudoku_images/ocr_data/' features, labels = ocr.load_ocr_data(os.path.join(OCR_PATH, 'training')) test_features, test_labels = \ ocr.load_ocr_data(os.path.join(OCR_PATH, 'testing')) features = map(list, features) test_features = map(list, test_features) problem = svmutil.svm_problem(labels, features) param = svmutil.svm_parameter('-q -t 0') model = svmutil.svm_train(problem, param) print 'Training data fit:' svmutil.svm_predict(labels, features, model) print 'Testing data fit:' svmutil.svm_predict(test_labels, test_features, model)