class SVMModel(object): def __init__(self): self.clf = OneVsOneClassifier(SVC()) self.name = 'SVM' def get_params(self): return self.clf.get_params() def train(self, dframe): X = get_featues(dframe) y = get_label(dframe) self.clf.fit(X, y) def predict(self, X): y_pred = self.clf.predict(X) return y_pred def save(self, fname): with open(fname, 'wb') as ofile: pickle.dump(self.clf, ofile, pickle.HIGHEST_PROTOCOL) def load(self, fname): with open(fname, 'rb') as ifile: self.clf = pickle.load(ifile)
def RVMTraining(XEstimate, XValidate, Parameters, class_labels): clf = OneVsOneClassifier( GridSearchCV(RVC(kernel='rbf', n_iter=1), Parameters)) clf.fit(XEstimate, class_labels) Yvalidate = clf.predict(XValidate) EstParameters = clf.get_params() return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
def RVMTraining(XEstimate, XValidate, Parameters, class_labels): clf = OneVsOneClassifier( GridSearchCV(RVC(kernel='rbf', n_iter=2), Parameters)) # One vs One classifier used clf.fit(XEstimate, class_labels) # RVM object trained with training data Yvalidate = clf.predict(XValidate) # RVM object predicting labels EstParameters = clf.get_params() return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
def SVMTraining(XEstimate, XValidate, Parameters, class_labels): svcClassifier = SVC(kernel='rbf', probability=True) gridSearcher = GridSearchCV(svcClassifier, Parameters) clf = OneVsOneClassifier(gridSearcher) # One vs One classifier used clf.fit(XEstimate, class_labels) # SVM object trained with training data Yvalidate = clf.predict(XValidate) # SVM object predicting labels EstParameters = clf.get_params() return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
def SVMTraining(XEstimate, XValidate, Parameters, class_labels): svcClassifier = SVC(kernel='rbf', probability=True) gridSearcher = GridSearchCV(svcClassifier, Parameters) clf = OneVsOneClassifier(gridSearcher) clf.fit(XEstimate, class_labels) Yvalidate = clf.predict(XValidate) EstParameters = clf.get_params() return {"Yvalidate": Yvalidate, "EstParameters": EstParameters, "clf": clf}
modelPath = featuresDirPath + 'svmModelsOneVsRest/svmTrainModel.pkl' startTime = time.ctime() start = time.time() X_train, y_train = load_svmlight_file(train_feature_path) X_test, y_test = load_svmlight_file(test_feature_path) #X = np.array([[1,1], [2,2], [-1,2], [-2,3], [-1,-1], [-2,-3], [2,-4], [3,-5]]) #y = np.array([0, 0, 1, 1, 2, 2, 3, 3]) print('start at %s' % startTime) print('start training...') clf = OneVsOneClassifier(LinearSVC(random_state=0)) #clf = OneVsRestClassifier(LinearSVC(random_state = 0)) clf = clf.fit(X_train, y_train) print(clf.get_params()) #joblib.dump(clf, modelPath) # save the trained model #lists =[[5, -1], [-2, -6], [2,1], [-2, 5]] #test = np.array(lists) #test_label = np.array([3, 2, 0, 1]) print("start predicting...") #clf = joblib.load(modelPath) # load the model score = clf.score(X_test, y_test) print('accuracy is {0}'.format(score)) #============================================================================== # count = 0 # predictions = clf.predict(X_test) # lens = len(predictions) # for i in xrange(lens):
class Classifier: def __init__(self, name, load_model=True, c=1, gamma="scale", kernel="linear"): self.model = None self.model_file = "model.joblib" self.path = "model/" + name + "/" self.model_char_size = 15 if load_model: if os.path.isfile(self.path + self.model_file): self.model = load(self.path + self.model_file) self.model_char_size = load(self.path + "X.joblib").shape[1] if not self.model: #self.model = OneVsOneClassifier(svm.SVC(C=c,kernel=kernel,gamma=gamma,verbose=True)) self.model = OneVsOneClassifier( svm.SVC(C=c, kernel=kernel, gamma=gamma)) self.model.classes_ = None X = [] Y = [] with open(self.path + name + ".csv", 'r') as cs: reader = csv.reader(cs) for row in reader: X.append(row[:-1]) Y.append(row[-1]) X = np.array(X).astype('int') X = np.where(X == 255, 1, X) Y = np.array(Y) self.len_x = len(X) self.len_y = len(np.unique(Y)) dump(X, self.path + "X.joblib") dump(Y, self.path + "Y.joblib") self.model.fit(X, Y) dump(self.model, self.path + self.model_file) self.kernel = self.model.get_params()['estimator__kernel'] self.prev_model = None self.X_temp = None self.Y_temp = None np.set_printoptions(precision=3) def prediction_test(self, data, label): return str(self.model.predict(data)[0]) == label.rstrip() def train(self, data, label): info = {} #prev dataset if os.path.isfile(self.path + 'X.joblib'): arr = load(self.path + "X.joblib") else: arr = np.array([]) if os.path.isfile(self.path + 'Y.joblib'): arr_y = load(self.path + "Y.joblib") else: arr_y = np.array([]) if arr.size == 0: arr = data else: arr = np.concatenate((arr, data), axis=0) arr_y = np.append(arr_y, label) self.X_temp = arr self.Y_temp = arr_y if self.model.classes_ is not None: info["classes_before"] = self.model.classes_ info["classes_len_before"] = len(self.model.classes_) info["count_model_before"] = len(self.model.estimators_) self.prev_model = copy.copy(self.model) self.model.fit(arr, arr_y) info["classes_after"] = self.model.classes_ info["classes_len"] = len(self.model.classes_) info["count_model"] = len(self.model.estimators_) return info def save_model(self): if self.X_temp is not None and self.Y_temp is not None: dtset = np.column_stack((self.X_temp, self.Y_temp)) with open(self.path + "dataset1.csv", "w") as f: writer = csv.writer(f) writer.writerows(dtset) dump(self.X_temp, self.path + "X.joblib") dump(self.Y_temp, self.path + "Y.joblib") dump(self.model, self.path + self.model_file) self.X_temp = None self.Y_temp = None def rollback(self): self.model = self.prev_model self.X_temp = None self.Y_temp = None def prediction(self, data, verbose=False): X = np.array([data]).astype("int") indices = self.model.pairwise_indices_ pjg = len(self.model.estimators_) if indices is None: Xs = [X] * pjg else: Xs = [X[:, idx] for idx in indices] predictions = np.vstack( [est.predict(Xi) for est, Xi in zip(self.model.estimators_, Xs)]).T confidences = np.vstack([ self.predict_binary(est, Xi) for est, Xi in zip(self.model.estimators_, Xs) ]).T Y = self.votes_count(predictions, confidences) info = {} info["prediction"] = self.model.classes_[Y["sum_conf"].argmax()] if verbose: info["votes"] = Y['votes'] info["n_model"] = pjg info["model_ex"] = {} info["model_ex"]["negative_class"] = self.model.classes_[0] est = self.model.estimators_[Y["sum_conf"].argmax() - 1] info["model_ex"]["no"] = Y["sum_conf"].argmax() - 1 info["model_ex"]["bias"] = est.intercept_ info["model_ex"]["df"] = est.decision_function(X) info["model_ex"]["n_support"] = est.n_support_ info["model_ex"]["kernel_type"] = est.get_params()['kernel'] if info["model_ex"]["kernel_type"] == "linear": info["model_ex"]["kernel"] = linear_kernel( est.support_vectors_, X) info["model_ex"]["w"] = np.around(est.coef_, 2) else: info["model_ex"]["gamma"] = est._gamma info["model_ex"]["kernel"] = rbf_kernel(est.support_vectors_, X, gamma=est._gamma) info["model_ex"]["w"] = est.dual_coef_ return info def prediction_bulk(self, X): return "".join(self.model.predict(X)) def predict_binary(self, estimator, X): """Make predictions using a single binary estimator.""" return sklearn.multiclass._predict_binary(estimator, X) def votes_count(self, pred, conf): n_samples = pred.shape[0] info = {} n_classes = len(self.model.classes_) votes = np.zeros((n_samples, n_classes)) sum_of_confidences = np.zeros((n_samples, n_classes)) k = 0 for i in range(n_classes): for j in range(i + 1, n_classes): sum_of_confidences[:, i] -= conf[:, k] sum_of_confidences[:, j] += conf[:, k] votes[pred[:, k] == 0, i] += 1 votes[pred[:, k] == 1, j] += 1 k += 1 transformed_confidences = (sum_of_confidences / (3 * (np.abs(sum_of_confidences) + 1))) info["votes"] = votes info["sum_conf"] = votes + transformed_confidences return info def accuracy(self, filename, answ=None): score = 0 score1 = 0 if answ: filename = filename.replace("\n", "").replace(" ", "") answ = answ.replace("\n", "").replace(" ", "") #f = list(filename) #an = list(answ) #print(confusion_matrix(f, an, labels=self.model.classes_)) for i in range(0, max(len(filename), len(answ))): if i >= len(filename) or i >= len(answ): break if filename[i].lower() == answ[i].lower(): score1 += 1 if filename[i] == answ[i]: score += 1 count = len(answ) else: test = open("static/image/character_test/" + filename, 'r') lines = test.readlines() count = len(lines) ch = int(math.sqrt(self.model_char_size)) rs = self.model_char_size lst = {} for i in lines: a = i.split(",") data = np.array(a[0:-1]).astype('uint8') label = a[-1].rstrip() im = [data] ori_size = int(math.sqrt(len(data))) if len(data) != rs: im = np.array( Image.fromarray(data.reshape( (ori_size, ori_size))).resize( (ch, ch))).reshape(rs).reshape(1, -1) asd = self.model.predict(im) if str(asd[0]).lower() == label.lower(): score1 += 1 if str(asd[0]) == label: score += 1 return { "sensitive": score / count * 100, "insensitive": score1 / count * 100 }
startTime = time.ctime() start = time.time() X_train, y_train = load_svmlight_file(train_feature_path) X_test, y_test = load_svmlight_file(test_feature_path) #X = np.array([[1,1], [2,2], [-1,2], [-2,3], [-1,-1], [-2,-3], [2,-4], [3,-5]]) #y = np.array([0, 0, 1, 1, 2, 2, 3, 3]) print('start at %s' % startTime) print('start training...') clf = OneVsOneClassifier(LinearSVC(random_state = 0)) #clf = OneVsRestClassifier(LinearSVC(random_state = 0)) clf = clf.fit(X_train, y_train) print(clf.get_params()) #joblib.dump(clf, modelPath) # save the trained model #lists =[[5, -1], [-2, -6], [2,1], [-2, 5]] #test = np.array(lists) #test_label = np.array([3, 2, 0, 1]) print("start predicting...") #clf = joblib.load(modelPath) # load the model score = clf.score(X_test, y_test) print('accuracy is {0}'.format(score)) #============================================================================== # count = 0 # predictions = clf.predict(X_test) # lens = len(predictions) # for i in xrange(lens):
svm_model_linear = OneVsOneClassifier( SVC(max_iter=-1, C=10, gamma=100, kernel='rbf', degree=15)) svm_model_linear.fit(X_train, y_train) # learn svm_predictions_train = svm_model_linear.predict(X_train) svm_predictions = svm_model_linear.predict(X_test) # predict print(svm_predictions_train) print(X_train) print(y_train) #print(svm_predictions) #print(y_test) # model accuracy for X_test. accuracy_train = svm_model_linear.score(X_train, y_train) accuracy_test = svm_model_linear.score(X_test, y_test) print("Training accuracy:", accuracy_train) print("Test accuracy:", accuracy_test) print("Params:", svm_model_linear.get_params()) # creating a confusion matrix. We should have a nice diagonal line. cm = confusion_matrix(y_test, svm_predictions) print("Confusion matrix:\n", cm) if args.quiet: sys.exit(0) # pass in with -q flag to skip graphing # These are settings for drawing. Do whatever with these. widths = {2 + ix: 9.9 for ix, col in enumerate(colnamesX[2:])} values = {2 + ix: 10 for ix, col in enumerate(colnamesX[2:])} figs, ax = plt.subplots(2, 1, figsize=(6, 8)) print("Features shown in graph:", colnamesX[:2]) print("Features flattened in graph:", colnamesX[2:]) #figs, ax = plt.subplots(len(colnamesX) + 1, len(colnamesX), figsize=(3, 4))