def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph for each GPU. result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() # Generate images. latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **Gs_kwargs) images = Gs_clone.get_output_for(latents, None, **Gs_kwargs) # Downsample to 256x256. The attribute classifiers were built for 256x256. if images.shape[2] > 256: factor = images.shape[2] // 256 images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) images = tf.reduce_mean(images, axis=[3, 5]) # Run classifier for each attribute. result_dict = dict(latents=latents, dlatents=dlatents[:,-1]) for attrib_idx in self.attrib_indices: classifier = misc.load_pkl(classifier_urls[attrib_idx]) logits = classifier.get_output_for(images, None) predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1)) result_dict[attrib_idx] = predictions result_expr.append(result_dict) # Sampling loop. results = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) results += tflib.run(result_expr) results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()} # Calculate conditional entropy for each attribute. conditional_entropies = defaultdict(list) for attrib_idx in self.attrib_indices: # Prune the least confident samples. pruned_indices = list(range(self.num_samples)) pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) pruned_indices = pruned_indices[:self.num_keep] # Fit SVM to the remaining samples. svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) for space in ['latents', 'dlatents']: svm_inputs = results[space][pruned_indices] try: svm = sklearn.svm.LinearSVC() svm.fit(svm_inputs, svm_targets) svm.score(svm_inputs, svm_targets) svm_outputs = svm.predict(svm_inputs) except: svm_outputs = svm_targets # assume perfect prediction # Calculate conditional entropy. p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)] conditional_entropies[space].append(conditional_entropy(p))
def leave_one_out_cv(gram_matrix, labels, alg = 'SVM'): """ leave-one-out cross-validation """ scores = [] preds = [] loo = sklearn.cross_validation.LeaveOneOut(len(labels)) for train_index, test_index in loo: X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index] y_train, y_test = labels[train_index], labels[test_index] if(alg == 'SVM'): svm = sklearn.svm.SVC(kernel = 'precomputed') svm.fit(X_train, y_train) preds += svm.predict(X_test).tolist() score = svm.score(X_test, y_test) elif(alg == 'kNN'): knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, y_train) preds += knn.predict(X_test).tolist() score = knn.score(X_test, y_test) scores.append(score) print "Mean accuracy: %f" %(np.mean(scores)) print "Stdv: %f" %(np.std(scores)) return preds, scores
def train(df_train, df_test): X_train = df_train.drop('income', axis=1) y_train = df_train['income'] X_test = df_train.drop('income', axis=1) y_test = df_train['income'] start = time.time() lr = LogisticRegression(penalty='l1', tol=0.01) lr.fit(X_train, y_train) start = cost_times(start, 'lr.fit') score = lr.score(X_test, y_test) start = cost_times(start, 'lr.score') print("LR : ", score) from sklearn.ensemble import RandomForestClassifier rfc = RandomForestClassifier() rfc.fit(X_train, y_train) start = cost_times(start, 'rfc.fit') score = rfc.score(X_test, y_test) start = cost_times(start, 'rfc.score') print("RF : ", score) from sklearn import svm svm = svm.SVC() svm.fit(X_train, y_train) start = cost_times(start, 'svc.fit') score = svm.score(X_test, y_test) start = cost_times(start, 'svc.score') print("SVM : ", score)
def k_fold_cv(gram_matrix, labels, folds = 10, alg = 'SVM', shuffle = True): """ K-fold cross-validation """ pdb.set_trace() scores = [] preds = [] loo = sklearn.cross_validation.KFold(len(labels), folds, shuffle = shuffle, random_state = random.randint(0,100)) #loo = sklearn.cross_validation.LeaveOneOut(len(labels)) for train_index, test_index in loo: X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index] y_train, y_test = labels[train_index], labels[test_index] if(alg == 'SVM'): svm = sklearn.svm.SVC(kernel = 'precomputed') svm.fit(X_train, y_train) preds += svm.predict(X_test).tolist() score = svm.score(X_test, y_test) elif(alg == 'kNN'): knn = sklearn.neighbors.KNeighborsClassifier() knn.fit(X_train, y_train) preds += knn.predict(X_test).tolist() score = knn.score(X_test, y_test) scores.append(score) print "Mean accuracy: %f" %(np.mean(scores)) print "Stdv: %f" %(np.std(scores)) return preds, scores
def SVM(X_train, X_test, y_train, y_test): filenames = [ 'SVM1.sav', 'SVM2.sav', 'SVM3.sav', 'SVM4.sav', 'SVM5.sav', 'SVM7.sav', 'SVM6.sav', 'SVM8.sav', 'SVM9.sav' ] c = [1, 1000, 1000000] ker = ['linear', 'poly', 'rbf'] training_time, testing_time, max_accuracy = 0, 0, 0 for i in range(len(c)): for j in range(len(ker)): k = i * len(c) + j filename = filenames[k] if os.path.exists(filename): svm = pickle.load(open(filename, 'rb')) else: t1 = time.time() svm = OneVsRestClassifier(SVC(kernel=ker[j], C=c[i])).fit(X_train, y_train) t2 = time.time() training_time = max(training_time, t2 - t1) pickle.dump(svm, open(filename, 'wb')) t1 = time.time() accuracy = svm.score(X_test, y_test) * 100 t2 = time.time() testing_time = max(testing_time, t2 - t1) max_accuracy = max(accuracy, max_accuracy) print('One VS Rest SVM accuracy with kernel={} and c={} is : {}%'. format(ker[j], c[i], accuracy)) return training_time, training_time, max_accuracy
def train_svm(train, labels_train, test, labels_test, dims): if dims == 0: #Train as is new_train = train new_test = test else: svd_t = skd.TruncatedSVD(n_components=dims) pca = PCA(n_components=dims) pca.fit(train) pca_train = pca.transform(train) inv_pca_train = pca.inverse_transform(pca_train) #pca.fit(test); #pca_test=pca.transform(test); svd_t.fit(train) new_train = svd_t.transform(train) #new_train=svd.transform(train); svd_t.fit(test) new_test = svd_t.transform(test) #new_test=svd.transform(test); svm = SVC(decision_function_shape='ovo') # svm=LinearSVC(max_iter=10000); svm.fit(inv_pca_train, labels_train) predicted = svm.predict(test) ac = svm.score(test, labels_test) print("Scoring ", ac) #print("predicted", predicted.shape); return ac
def train_model(self, X_train_cv, y_train, X_test_cv, y_test): """ This function will train_model & retun accuracy, f1_score. :param X_train_cv, y_train, X_test_cv, y_test: Training data :return: f1, accuracy """ svm = sklearn.svm.LinearSVC(C=0.1) svm.fit(X_train_cv, y_train) pred = svm.predict(X_test_cv) f1 = sklearn.metrics.f1_score(pred, y_test, average='weighted') accuracy = int(round(svm.score(X_test_cv, y_test) * 100)) return svm, f1, accuracy
def SVR_rbf(dates, prices, test_date, sl_df, forcastingDays): svm = SVR(kernel='rbf', C=1e3, gamma=0.1) sl_trainX, sl_trainY, sl_testX, sl_testY, sl_trainX_close, sl_trainY_close, sl_testX_close, sl_testY_close = create_sl_stock_preprocessed_Dataset( sl_df) X_train, X_test, y_train, y_test = train_test_split(sl_trainX, sl_trainY, test_size=0.33, random_state=42) X_train_close, X_test_close, y_train_close, y_test_close = train_test_split( sl_trainX_close, sl_trainY_close, test_size=0.33, random_state=42) df = sl_df[['close']] forecast_out = int( forcastingDays) # predicting forcastingDays days into future df['Prediction'] = df[['close']].shift(-forecast_out) X = np.array(df.drop(['Prediction'], 1)) X = preprocessing.scale(X) X_forecast = X[-forecast_out:] # set X_forecast equal to last 30 X = X[:-forecast_out] # remove last forcastingDays from X y = np.array(df['Prediction']) y = y[:-forecast_out] X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(X, y, test_size=0.2) svm.fit(X_train_f, y_train_f) confidence = svm.score(X_test_f, y_test_f) forecast_prediction = svm.predict(X_forecast) svm.fit(sl_trainX, sl_trainY) svm.fit(sl_trainX_close, sl_trainY_close) svm_decision_boundary = svm.predict(sl_trainX) predict_decision_boundary_close = svm.predict(sl_trainX_close) svm_y_pred = svm.predict(X_test) svm_reg_y_pred_close = svm.predict(X_test_close) svm_test_score = mean_squared_error(y_test, svm_y_pred) mean_squared_error_test_score_close = mean_squared_error( y_test_close, svm_reg_y_pred_close) svm_prediction = svm.predict(sl_testX)[0] prediction_of_svm_close = svm.predict(sl_testX_close)[0] return svm_decision_boundary, svm_prediction, svm_test_score, prediction_of_svm_close, forecast_prediction
def svm_inference(self, data, confidence, svm, norm=True, in_test=False): print('\tPerforming SVM inference') Nt = len(data) print(Nt) acc1 = 0 acc2 = 0 total1 = 0 total2 = 0 conf_new = np.zeros(confidence.shape) cur_line = 0 for i in range(Nt): word = data[i] word_len = word.shape[0] # print(word.shape) Y = word[:, -1] if in_test: self.test_labels[cur_line:cur_line + word_len] = Y # TODO: implemented iterative context inference W_prime = np.zeros( (word_len, self.dtr + self.n_classes * self.window_size * 2)) # W_prime : [X | extended context] W_prime[:, :self.dtr] = word[:, :self.dtr] W_prime[:, self.dtr:] = self.extend_context( confidence[cur_line:(cur_line + word_len), :]) # y_hat = svm.predict(W_prime) # Predictions conf = svm.decision_function( W_prime) # Confidence measures of predictions if norm: conf = (1 + np.exp( -1 * conf))**-1 # Sigmoid function --> Normalization conf_new[cur_line:cur_line + word_len, :] = conf cur_line += word_len # Calculate accuracy rates total1 += word_len total2 += 1 subtask_acc = svm.score(W_prime, Y) acc2 += subtask_acc acc1 += subtask_acc * word_len # print('\t\tShort-term accuracy: ' + str(subtask_acc)) return acc1 / total1, acc2 / total2, conf_new
def run(attrib_idx): results = np.load("principal_directions/wspace_att_%d.npy" % attrib_idx).item() pruned_indices = list(range(results['latents'].shape[0])) # pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) # keep = int(results['latents'].shape[0] * 0.95) # print('Keeping: %d' % keep) # pruned_indices = pruned_indices[:keep] # Fit SVM to the remaining samples. svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) space = 'dlatents' svm_inputs = results[space][pruned_indices] svm = sklearn.svm.LinearSVC(C=1.0, dual=False, max_iter=10000) svm.fit(svm_inputs, svm_targets) svm.score(svm_inputs, svm_targets) svm_outputs = svm.predict(svm_inputs) w = svm.coef_[0] np.save("principal_directions/direction_%d" % attrib_idx, w)
def svm_inference(self, data, confidence, svm, norm=True, in_test=False): print('\tPerforming SVM inference') Nt = len(data) print(Nt) acc1 = 0 acc2 = 0 total1 = 0 total2 = 0 conf_new = np.zeros(confidence.shape) cur_line = 0 for i in range(Nt): word = data[i] word_len = word.shape[0] # print(word.shape) Y = word[:, -1] if in_test: self.test_labels[cur_line:cur_line+word_len] = Y # TODO: implemented iterative context inference W_prime = np.zeros((word_len, self.dtr + self.n_classes * self.window_size * 2)) # W_prime : [X | extended context] W_prime[:, :self.dtr] = word[:, :self.dtr] W_prime[:, self.dtr:] = self.extend_context(confidence[cur_line:(cur_line + word_len), :]) # y_hat = svm.predict(W_prime) # Predictions conf = svm.decision_function(W_prime) # Confidence measures of predictions if norm: conf = (1 + np.exp(-1*conf))**-1 # Sigmoid function --> Normalization conf_new[cur_line : cur_line+word_len, :] = conf cur_line += word_len # Calculate accuracy rates total1 += word_len total2 += 1 subtask_acc = svm.score(W_prime, Y) acc2 += subtask_acc acc1 += subtask_acc * word_len # print('\t\tShort-term accuracy: ' + str(subtask_acc)) return acc1/total1, acc2/total2, conf_new
def train_svm(cls, train_x, train_y): """ train the binary SVM :param cls: the class that this SVM is about :param train_x: descriptors :param train_y: corresponding labels :return: the weight and bias of the SVM """ svm = sklearn.svm.LinearSVC(max_iter=1000) svm.C = C[cls] svm.fit(train_x, train_y) error = 1 - svm.score(train_x, train_y) print('Training Error for "{}" : {:.4f}'.format(cls, error)) w = svm.coef_ b = svm.intercept_ return w, b
def SVM(X_train, X_test, y_train, y_test, withpca, pca): filenames = [] if withpca == 1: filenames = [ 'SVM1pca' + str(pca) + '.sav', 'SVM2pca' + str(pca) + '.sav', 'SVM3pca' + str(pca) + '.sav', 'SVM4pca' + str(pca) + '.sav', 'SVM5pca' + str(pca) + '.sav', 'SVM7pca' + str(pca) + '.sav', 'SVM6pca' + str(pca) + '.sav', 'SVM8pca' + str(pca) + '.sav', 'SVM9pca' + str(pca) + '.sav' ] else: filenames = [ 'SVM1.sav', 'SVM2.sav', 'SVM3.sav', 'SVM4.sav', 'SVM5.sav', 'SVM7.sav', 'SVM6.sav', 'SVM8.sav', 'SVM9.sav' ] c = [0.1, 1, 1000] ker = ['linear', 'poly', 'rbf'] training_time, testing_time, max_accuracy = 0, 0, 0 max_model = None for i in range(len(c)): for j in range(len(ker)): k = i * len(c) + j filename = filenames[k] if os.path.exists(filename): svm = pickle.load(open(filename, 'rb')) else: t1 = time.time() svm = OneVsRestClassifier(SVC(kernel=ker[j], C=c[i])).fit(X_train, y_train) t2 = time.time() training_time = max(training_time, t2 - t1) pickle.dump(svm, open(filename, 'wb')) t1 = time.time() accuracy = svm.score(X_test, y_test) * 100 t2 = time.time() testing_time = max(testing_time, t2 - t1) if accuracy > max_accuracy: max_accuracy = accuracy max_model = svm max_accuracy = max(accuracy, max_accuracy) # print('One VS Rest SVM accuracy with kernel={} and c={} is : {}%'.format(ker[j], c[i], accuracy)) y_pred = max_model.predict(X_test) conf_mat = confusion_matrix(y_test, y_pred) print("conf_mat of SVM ") print(conf_mat) return training_time, testing_time, max_accuracy
def SVM(request): pk = request.user.id student = Student.objects.get(user=pk) df = pd.DataFrame(list(Student.objects.all().values())) df = df.loc[df['user_id'] == pk] df = df.drop(["user_id", "id", "Department_DS", "Department_SVM","Department_KNN" ,"DS_acc", "SVM_acc" , "KNN_acc"], axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True) svm = SVC(kernel="linear", C=0.025, random_state=101) svm.fit(X_train, y_train) y_pred = svm.predict(X_test) Dep_pred = svm.predict(df) accuracy = svm.score(X_test, y_test) student.Department_SVM = list(Dep_pred)[0] student.SVM_acc = format(accuracy * 100,".2f") form = StudentObj(instance=student) form = StudentObj(request.POST, instance=student) if form.is_valid(): form.save() return request
def svm_classifiation_score(self, X, y, C=1.0, kernel='rbf', gamma='auto', degree=3): """ Returns the classifiation score on the learnt encoding using a SVM with specified parameters. The data X and the labels y are splitted into training and tesing sets. """ H = self.encode(X) H_train, H_test, Y_train, Y_test = train_test_split(H, y, test_size=0.1) svm = svm.SVC(C=C, kernel=kernel, gamma=gamma, degree=degree, cache_size=600) svm.fit(H_train, Y_train) return svm.score(H_test, Y_test)
def chapter_13(): cancer = load_breast_cancer() X_train, X_test, y_train, y_test = \ train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state=66) if 0: # Cross validation tree = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=0) scores = cross_val_score(tree, cancer.data, cancer.target, cv=5) print("Cross validation scores :{}".format(scores)) print("Cross validation scores(mean):{:.5}".format(scores.mean())) if 0: # Grid search ex best_score = 0 best_param = {} for gamma in [0.001, 0.01, 0.1, 1, 10, 100]: for C in [0.001, 0.01, 0.1, 1, 10, 100]: svm = SVC(gamma=gamma, C=C) svm.fit(X_train, y_train) score = svm.score(X_test, y_test) if score > best_score: best_score = score best_param = {'C': C, 'gamma': gamma} print("Best score:{:.2f}".format(best_score)) print("Parameters in best score:{}".format(best_param)) if 0: param_grid = { 'C': [10**i for i in range(-3, 2)], 'gamma': [10**i for i in range(-3, 2)] } grid_search = GridSearchCV(SVC(), param_grid, cv=5) grid_search.fit(X_train, y_train) print("Test set score:{:.2f}".format(grid_search.score(X_test, y_test))) print("Best parameters:{}".format(grid_search.best_params_)) print("Best cross-validation score:{:.2f}".format( grid_search.best_score_)) if 0: model = SVC(gamma=0.001, C=1) model.fit(X_train, y_train) print("train:", clf.__class__.__name__, model.score(X_train, y_train)) print("test :", clf.__class__.__name__, model.score(X_test, y_test)) pred_svc = model.predict(X_test) confusion_m = confusion_matrix(y_test, pred_svc) print("Confution matrix:\n{}".format(confusion_m)) print("Precision score: {:.3f}".format( precision_score(y_true=y_test, y_pred=pred_svc))) print("Recall score : {:.3f}".format( recall_score(y_true=y_test, y_pred=pred_svc))) print("F1 score : {:.3f}".format( f1_score(y_true=y_test, y_pred=pred_svc))) if 0: model = LogisticRegression() lrmodelfit = model.fit(X_train, y_train) prob_result = pd.DataFrame(lrmodelfit.predict_proba(X_test)) prob_result.columns = ["malignant", "benign"] for threhold, flg in zip([0.4, 0.3, 0.15, 0.05], ["flg_04", "flg_03", "flg_015", "flg_005"]): prob_result[flg] = prob_result["benign"].map( lambda x: 1 if x > threhold else 0) print(prob_result.head(15)) fig, ax = plt.subplots() for flg in ["flg_04", "flg_03", "flg_015", "flg_005"]: confusion_m = confusion_matrix(y_test, prob_result[flg]) print("◆ThreholdFlg:", flg) print("Confution matrix:\n{}".format(confusion_m)) fpr = (confusion_m[0, 1]) / (confusion_m[0, 0] + confusion_m[0, 1]) tpr = (confusion_m[1, 1]) / (confusion_m[1, 0] + confusion_m[1, 1]) plt.scatter(fpr, tpr) plt.xlabel("fpr") plt.ylabel("tpr") ax.annotate(flg, (fpr, tpr)) # plt.show() if 0: X = cancer.data y = cancer.target y = label_binarize(y, classes=[0, 1]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0) classifier = OneVsRestClassifier( SVC(kernel='linear', probability=True, random_state=0)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # fpr:偽陽性率、tpr:真陽性率を計算 fpr, tpr, _ = roc_curve(y_test, y_score) roc_auc = auc(fpr, tpr) plt.plot(fpr, tpr, color='red', lw=2, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='black', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="best") plt.show() if 0: # jack-knife random.seed(0) norm_random_sample_data = random.randn(1000) mean_array = np.array([]) for i in range(0, len(norm_random_sample_data)): ind = np.ones(1000, dtype=bool) extract_num = [i] ind[i] = False mean_array = np.append(mean_array, norm_random_sample_data[ind].mean()) x = (mean_array - mean_array.mean())**2 print(np.sqrt(x.sum() * 999 / 1000)) mean_array_boot = np.array([]) for i in range(0, len(norm_random_sample_data)): mean_array_boot = \ np.append(mean_array_boot, random.choice(norm_random_sample_data, 500, replace=True).mean()) x = (mean_array_boot - mean_array_boot.mean())**2 print(np.sqrt(x.sum() / 1000)) if 0: # Bagging bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5) clf = bagging clf.fit(X_train, y_train) print("train:", clf.__class__.__name__, clf.score(X_train, y_train)) print("test:", clf.__class__.__name__, clf.score(X_test, y_test)) if 0: # Boosting clf = AdaBoostClassifier(learning_rate=1.0) clf.fit(X_train, y_train) print("train:", clf.__class__.__name__, clf.score(X_train, y_train)) print("test:", clf.__class__.__name__, clf.score(X_test, y_test)) # 若干、オーバーフィッティングしている score_list = [] for r in np.arange(0.00001, 2, 0.01): clf = AdaBoostClassifier(learning_rate=r) clf.fit(X_train, y_train) score_list.append([r, clf.score(X_test, y_test)]) score_list_df = pd.DataFrame(score_list) score_list_df.columns = ["r", "score"] plt.plot(score_list_df.r, score_list_df.score) plt.xlabel("learning rate") plt.ylabel("test score") plt.grid(True) plt.show() if 1: # Random forest f_model = RandomForestClassifier(random_state=0) clf = f_model.fit(X_train, y_train) print("train:", clf.__class__.__name__, clf.score(X_train, y_train)) print("test:", clf.__class__.__name__, clf.score(X_test, y_test)) importances = f_model.feature_importances_ indi = np.argsort(importances)[::-1] label = cancer.feature_names plt.bar(range(X_train.shape[1]), importances[indi]) plt.xticks(range(X_train.shape[1]), label[indi], rotation=90) plt.grid(True) plt.tight_layout() plt.show()
df['Size'] = df['Size'].apply(lambda x: x.strip('M')) df[df['Size'] == 'Varies with device'] = 0 df['Size'] = df['Size'].astype(float) #take specific attributes features = ['Size','Type', 'Price', 'Content Rating', 'Genres'] X = df[features] y = df['Rating'].astype(int) #spite for train and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 10) svm = svm.SVR() svm.fit(X_train, y_train) accuracy = svm.score(X_test,y_test) print('Accuracy: ' + str(np.round(accuracy*100, 2)) + '%') window = tk.Tk() window.title('Predict Rate App') window.geometry('800x350') window.configure(background='gray') def calculate_number(): df['Size'] = size_entry.get() df['Type'] = type_entry.get()
X_test = [(x, np.empty((0, 2), dtype=np.int)) for x in x_test] print len(x_test) for i in range(len(test_labels)): test_labels = test_labels.astype(int) """ print len(test_labels) pbl = GraphCRF(inference_method='ad3') svm = NSlackSSVM(pbl, C=1,n_jobs = 1,verbose = 1) start = time() print len(X_valid) print len(valid_Y) svm.fit(X_valid, valid_Y) print "fit finished" time_svm = time() - start print X_test[i][0].shape print svm.score(X_valid,valid_Y) print svm.score(X_test,test_Y) y_pred = np.vstack(svm.predict(np.array(X_valid))) print("Score with pystruct crf svm: %f (took %f seconds)" % (np.mean(y_pred == valid_Y), time_svm)) y_predt = np.vstack(svm.predict(np.array(X_test))) print("Score with pystruct crf svm: %f (took %f seconds)" % (np.mean(y_predt == test_Y), time_svm)) #we throw away void superpixels and flatten everything #y_pred, y_true = np.hstack(y_pred), np.hstack(valid_Y) #y_pred = y_pred[y_true != 255] #y_true = y_true[y_true != 255] #print("Score on test set: %f" % np.mean(y_true == y_pred))
knn = neighbors.KNeighborsClassifier(n_neighbors = 100) knn.fit(x_train,y_train) prediction = knn.predict(x_test) print("knn score:", knn.score(x_test,y_test)) print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction))) #%% svm from sklearn import svm svm = svm.SVC(random_state=1) svm.fit(x_train,y_train) prediction_svm = svm.predict(x_test) print("svm accuary: ",svm.score(x_test,y_test)) print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction_svm)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction_svm)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction_svm))) #%% rf classification from sklearn import ensemble rf= ensemble.RandomForestClassifier(n_estimators=10,random_state=1) rf.fit(x_train,y_train) prediction_rf = rf.predict(x_test) print("rf accuracy: ",rf.score(x_test,y_test)) print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, prediction_rf)) print('Mean Squared Error:', metrics.mean_squared_error(y_test, prediction_rf)) print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, prediction_rf)))
print('\n Testing set Accuracy:' + str(100 * np.mean((predicted_label == test_label).astype(float))) + '%') print(confusion_matrix(test_label, predicted_label, labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) stop_time_LR = time.time() - start_time_LR print("Time taken for Logistic Regression {}.seconds\n".format(str(stop_time_LR))) # Code for SVM print("Learning SVM Using Linear Kernel") svm = SVC(kernel = 'linear') #train_label = train_label.flatten() indexes = np.random.randint(50000, size = 10000) sample_data = train_data[indexes, :] sample_label = train_label[indexes, :] svm.fit(sample_data, sample_label.flatten()) traning_accuracy = svm.score(train_data, train_label) traning_accuracy = str(100*traning_accuracy) print("Traning data Accuracy for Linear Kernel: {}%\n".format(traning_accuracy)) validation_accuracy = svm.score(validation_data, validation_label) validation_accuracy = str(100*validation_accuracy) print("Validation data Accuracy for Linear Kernel: {}%\n".format(validation_accuracy)) test_accuracy = svm.score(test_data, test_label) test_accuracy = str(100*test_accuracy) print("Test data Accuracy for Linear Kernel: {}%\n".format(test_accuracy)) time_linear_kernel = time.time() - start_time_linear_kernel print("Time taken for SVM using Linear Kernel {}.seconds\n\n\n".format(str(time_linear_kernel))) print("SVM with radial basis function with value of gamma setting to 1 ") start_time_rbf = time.time()
X = scaler.transform(X) #split dataset into train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y) # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y) # Create SVM classifier svm.fit(X_train, y_train) score_vals = dict() score_vals['bins'] = biin score_vals['score'] = svm.score(X_test, y_test) scores.append(score_vals) print(score_vals) # calculate best run parameters print(scores) scores_list = [record['score'] for record in scores] # best_run = [record for record in scores if record['score'] == np.max(scores)] best_run = [ record for record in scores if record['score'] == np.max(scores_list) ] print(best_run) # save best trained model biin = best_run[0]['bins'] print('Running to save the best model for bin = {}'.format(biin))
# Sample 3.3: Bagging import pandas as pd from sklearn import svm from sklearn.model_selection import train_test_split from sklearn.ensemble import BaggingClassifier from sklearn.linear_model import LogisticRegression data = pd.read_csv('wifi.txt', header=None) X = data.values[:, :-1] y = data.values[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) bagging = BaggingClassifier() svm = svm.SVC() logreg = LogisticRegression() bagging.fit(X_train, y_train) svm.fit(X_train, y_train) logreg.fit(X_train, y_train) print("Bagging Accuracy: %.2f%s" % (bagging.score(X_test, y_test) * 100, '%')) print("SVM Accuracy: %.2f%s" % (svm.score(X_test, y_test) * 100, '%')) print("LogReg Accuracy: %.2f%s" % (logreg.score(X_test, y_test) * 100, '%'))
def score(test_data, test_labels): global svm return svm.score(test_data, test_labels)
def main(): mnist = fetch_openml(name='mnist_784') echantillon = np.random.randint(70000, size=5000) data = mnist.data[echantillon] target = mnist.target[echantillon] xtrain, xtest, ytrain, ytest = train_test_split(data, target, train_size=0.7) classifier = svm.SVC(kernel='linear') classifier.fit(xtrain, ytrain) error = 1 - classifier.score(xtest, ytest) print(f"Score SVM linéaire : {error}") kernels = [] print("Modification du kernel : ") for kernel in ['linear', 'poly', 'rbf', 'sigmoid']: classifier = svm.SVC(kernel=kernel) start_training = time.time() classifier.fit(xtrain, ytrain) final_training = time.time() - start_training start_prediction = time.time() ypred = classifier.predict(xtest) final_prediction = time.time() - start_prediction error = metrics.zero_one_loss(ytest, ypred) kernels.append((kernel, final_training, final_prediction, error)) print(f"\t {kernels[-1]}") kernels_liste = list(zip(*kernels)) plot_fig(kernels_liste) tol = [] print("Evolution de la tolérance : ") for tolerance in np.linspace(0.1, 1.0, num=5): svm = svm.SVC(C=tolerance) start_training = time.time() svm.fit(xtrain, ytrain) final_training = time.time() - start_training start_prediction = time.time() ypred = svm.predict(xtest) final_prediction = time.time() - start_prediction error = metrics.zero_one_loss(ytest, ypred) error_training = svm.score(xtrain, ytrain) tol.append((tolerance, final_training, final_prediction, error, error_training)) print(f"\t {tol[-1]}") tol_list = list(zip(*tol)) plot_fig(tol_list) plt.figure(figsize=(19, 9)) plt.plot(tol_list[0], tol_list[3], 'x-', color='blue') # erreur de test plt.plot(tol_list[0], tol_list[-1], 'x-', color='orange') # erreur d'entrainement plt.grid(True) plt.show() best_kernel = 'rbf' best_tolerance = 1.0 best_svm = svm.SVC(kernel=best_kernel, C=best_tolerance) start_training = time.time() best_svm.fit(xtrain, ytrain) best_final_entrainement = time.time() - start_training start_prediction = time.time() ypred = best_svm.predict(xtest) best_final_prediction = time.time() - start_prediction cross_val = model_selection.cross_val_score(best_svm, data, target, cv=10) meilleure_erreur = 1 - np.mean(cross_val) print(f"Durée de l'entraînement : {best_final_entrainement}") print(f"Durée de la prédiction : {best_final_prediction}") print(f"Erreur : {meilleure_erreur}") cm = confusion_matrix(ytest, ypred) df_cm = pd.DataFrame(cm, columns=np.unique(ytest), index=np.unique(ytest)) df_cm.index.name = 'Valeur réelle' df_cm.columns.name = 'Valeur prédite' plt.figure(figsize=(16, 9)) sn.heatmap(df_cm, cmap="Blues", annot=True) plt.show()
print('Accuracy of GNB classifier on training set: {:.2f}'.format( gnb.score(X_train, Y_train))) print('Accuracy of GNB classifier on test set: {:.2f}'.format( gnb.score(X_test, Y_test))) def svc_param_selection(X, y, nfolds): from sklearn import svm import numpy as np GridSearchCV = sklearn.model_selection.GridSearchCV Cs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1] # gammas = [0.001, 0.01, 0.1, 1] kernels = ['linear', 'rbf'] param_grid = {'C': Cs, 'kernel': kernels} grid_search = GridSearchCV(svm.SVC(), param_grid, cv=nfolds) grid_search.fit(X, y) return grid_search.best_params_ # Learning params = svc_param_selection(X_train, Y_train, 3) print params svm = SVC(**params) # svm = SVC() svm.fit(X_train, Y_train) print('Accuracy of SVM classifier on training set: {:.2f}'.format( svm.score(X_train, Y_train))) print('Accuracy of SVM classifier on test set: {:.2f}'.format( svm.score(X_test, Y_test)))
#svm, X1, Y1, cv=bs)#, score_func=metrics.f1_score) #print 'score: %f +- %f' % (scores.mean(), scores.std()) #pred_Y = svm.predict(test_X) #print metrics.precision_score(test_Y, pred_Y) #print metrics.recall_score(test_Y, pred_Y) #print metrics.f1_score(test_Y, pred_Y) #pred_Y = svm.predict(X1) #print metrics.precision_score(Y1, pred_Y) #print metrics.recall_score(Y1, pred_Y) #print metrics.f1_score(Y1, pred_Y) alpha_arr = [] for label in np.unique(labels): n = np.sum(labels == label) alpha_arr.append(n / float(labels.shape[0])) alpha_arr = np.array(alpha_arr) alpha = np.max(alpha_arr) print alpha bs = cross_validation.Bootstrap(data.shape[0], 3) for train_indices, test_indices in bs: svm.fit(data[train_indices], labels[train_indices]) score = svm.score(data[test_indices], labels[test_indices]) print score, (score - alpha) / (1 - alpha) #pred = svm.predict(data[test_indices])
# Splitting data X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=44, shuffle=True ) # ---------------------------------------------------- # Applying LogisticRegression Model svm = svm.SVC() svm.fit(X_train, y_train) # Calculating Details print("svm Train Score is : ", svm.score(X_train, y_train)) print("svm Test Score is : ", svm.score(X_test, y_test)) print("svm Classes are : ", svm.classes_) print("----------------------------------------------------") # Calculating Prediction y_pred = svm.predict(X_test) print("Predicted Value for svm is : ", y_pred[:10]) # ---------------------------------------------------- # Calculating Confusion Matrix CM = confusion_matrix(y_test, y_pred) print("Confusion Matrix is : \n", CM) # drawing confusion matrix sns.heatmap(CM, center=True)
#One fight at a time le = LabelEncoder() cat = ['genre','certificate', 'distributor'] for col in cat: train[col] = le.fit_transform(train[col]) test[col] = le.fit_transform(test[col]) #no shirts, no shoes train_X = train.drop(['year','oscar', 'movie_name', 'actor_name', 'href'], axis=1) test_X = test.drop(['year','oscar', 'movie_name', 'actor_name', 'href'], axis = 1) train_Y = train['oscar'] #Fights will go on as long as they want to svm =svm.SVC(kernel='rbf',C=1).fit(train_X,train_Y) svm.score(train_X, train_Y) #If this is your first night at Fight Club, you have to fight. pred_svm = svm.predict_proba(test_X)[:,1] svm_prediction = pd.DataFrame(pred_svm, test['movie_name'])
X = data.values[:, :-1] y = data.values[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) svm = svm.SVC(probability=True) logreg = LogisticRegression() tree = tree.DecisionTreeClassifier() svm.fit(X_train, y_train) logreg.fit(X_train, y_train) tree.fit(X_train, y_train) print("SVM Accuracy: %.2f%s" % (svm.score(X_test, y_test) * 100, '%')) print("LogReg Accuracy: %.2f%s" % (logreg.score(X_test, y_test) * 100, '%')) print("Tree Accuracy: %.2f%s" % (tree.score(X_test, y_test) * 100, '%')) w = [1, 1, 1] ensemble = VotingClassifier( estimators=[('svm', svm), ('logreg', logreg), ('tree', tree)], voting='hard', weights=w) ensemble.fit(X_train, y_train) print("Ensemble Accuracy: %.2f%s" % (ensemble.score(X_test, y_test) * 100, '%'))
def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu inception = misc.load_pkl( 'https://drive.google.com/uc?id=1MzTY44rLToO5APn8TZmfR7_ENSe5aZUn' ) # inception_v3_features.pkl real_activations = np.empty( [self.num_images, inception.output_shape[1]], dtype=np.float32) fake_activations = np.empty( [self.num_images, inception.output_shape[1]], dtype=np.float32) # Construct TensorFlow graph. self._configure(self.minibatch_per_gpu, hole_range=self.hole_range) real_img_expr = [] fake_img_expr = [] real_result_expr = [] fake_result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() inception_clone = inception.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) reals, labels = self._get_minibatch_tf() reals_tf = tflib.convert_images_from_uint8(reals) masks = self._get_random_masks_tf() fakes = Gs_clone.get_output_for(latents, labels, reals_tf, masks, **Gs_kwargs) fakes = tflib.convert_images_to_uint8(fakes[:, :3]) reals = tflib.convert_images_to_uint8(reals_tf[:, :3]) real_img_expr.append(reals) fake_img_expr.append(fakes) real_result_expr.append(inception_clone.get_output_for(reals)) fake_result_expr.append(inception_clone.get_output_for(fakes)) for begin in tqdm(range(0, self.num_images, minibatch_size)): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) real_results, fake_results = tflib.run( [real_result_expr, fake_result_expr]) real_activations[begin:end] = np.concatenate(real_results, axis=0)[:end - begin] fake_activations[begin:end] = np.concatenate(fake_results, axis=0)[:end - begin] # Calculate FID conviniently. mu_real = np.mean(real_activations, axis=0) sigma_real = np.cov(real_activations, rowvar=False) mu_fake = np.mean(fake_activations, axis=0) sigma_fake = np.cov(fake_activations, rowvar=False) m = np.square(mu_fake - mu_real).sum() s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) dist = m + np.trace(sigma_fake + sigma_real - 2 * s) self._report_result(np.real(dist), suffix='-FID') svm = sklearn.svm.LinearSVC(dual=False) svm_inputs = np.concatenate([real_activations, fake_activations]) svm_targets = np.array([1] * real_activations.shape[0] + [0] * fake_activations.shape[0]) svm.fit(svm_inputs, svm_targets) self._report_result(1 - svm.score(svm_inputs, svm_targets), suffix='-U') real_outputs = svm.decision_function(real_activations) fake_outputs = svm.decision_function(fake_activations) self._report_result(np.mean(fake_outputs > real_outputs), suffix='-P')
#%% svm = sklearn.svm.SVR() Cs = numpy.logspace(0, 2, 8) gammas = numpy.logspace(-6, -4, 10) accuracies = numpy.zeros((8, 10)) for i, c in enumerate(Cs): for j, gamma in enumerate(gammas): svm = sklearn.svm.SVR(C = c, gamma = gamma) svm.fit(trainingx[:1000], trainingy[:1000]) accuracies[i, j] = svm.score(testingx, testingy) print i, j plt.imshow(accuracies, interpolation = 'NONE') plt.xticks(range(10), ['{:2.2e}'.format(v) for v in gammas]) plt.yticks(range(8), ['{:2.1e}'.format(v) for v in Cs]) plt.gcf().set_size_inches((10, 10)) plt.colorbar() plt.show() #%% svm = sklearn.svm.SVR(C = 5.0, gamma = 1e-5) svm.fit(trainingx[:1000], trainingy[:1000]) print svm.score(testingx, testingy)
df.drop(['scan_date', 'dod', 'Measure.volume', 'Braak_Lewy', 'Braak_NFT', 'Braak_AB'], axis=1, inplace=True) X_train, X_test, y_train, y_test = train_test_split(df.drop('Group', axis=1), df['Group'], test_size=0.30, random_state=101) #logistic regression model lr=LogisticRegression() lr.fit(X_train, y_train) y_pred = lr.predict(X_test) print("LR Training:", lr.score(X_train, y_train)) print("LR Test:", lr.score(X_test, y_test)) print(classification_report(y_test, y_pred)) #SVM model svm = svm.SVC(kernel='linear') svm.fit(X_train, y_train) y_pred = svm.predict(X_test) print("SVM Training:", svm.score(X_train, y_train)) print("SVM Test:", svm.score(X_test, y_test)) print(classification_report(y_test, y_pred)) from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from sklearn.neighbors import KNeighborsClassifier # Define the classifiers classifiers = [LogisticRegression(), LinearSVC(), SVC(), KNeighborsClassifier()] # Fit the classifiers for c in classifiers: c.fit(X_train, y_train) # Plot the classifiers
from sklearn import svm #Don't forget to change this value AND also The filename on the second to last line! experiment_number = 10 print "Loading datasets..." #train_samples = pickle.load(open("Models/SC/trainset%d.pkl"%experiment_number,'rb')) #train_samples = pickle.load(open("Models/AutoE/trainset%d.pkl"%experiment_number,'rb')) train_samples = pickle.load(open("Models/RBM/trainset%d.pkl"%experiment_number,'rb')) train_outputs = pickle.load(open("Models/train_outputs.pkl",'rb'))[:1000] #valid_samples = pickle.load(open("Models/SC/validset%d.pkl"%experiment_number,'rb')) #valid_samples = pickle.load(open("Models/AutoE/validset%d.pkl"%experiment_number,'rb')) valid_samples = pickle.load(open("Models/RBM/validset%d.pkl"%experiment_number,'rb')) valid_outputs = pickle.load(open("Models/valid_outputs.pkl",'rb'))[:1000] print "Training the svm" svm = svm.SVC() # Default uses RBF svm.fit(train_samples, train_outputs) print "Predicting..." train_score = svm.score(train_samples,train_outputs) valid_score = svm.score(valid_samples,valid_outputs) print "Training accuracy: %.3f, validation accuracy¸: %.3f"%(train_score, valid_score) #Save the output to file. with open("Outputs/RBM_param_tests.txt", "a") as myfile: myfile.write("Experiment %d, training accuracy: %.3f, validation accuracy: %.3f \n"%(experiment_number,train_score, valid_score))
"""#> **SVM**""" from sklearn import svm svm = svm.SVC(C=1000) svm.get_params svm.fit(X_train,y_train) y_svm = svm.predict(X_test) svm.score(X_train,y_train) svm.score(X_test,y_test) generate_model_report(y_test,y_svm) from yellowbrick.classifier.rocauc import roc_auc roc_auc(svm, X_train, y_train, X_test=X_test, y_test=y_test, classes=["Low_damage","Medium_damage","High_damage"]) # from sklearn.model_selection import GridSearchCV # parameters={"C":[1,10,100,500,1000],'gamma':['auto','scale']} # sv = GridSearchCV(svm, parameters,scoring= 'f1', # cv=5) # sv.fit(X_train,y_train) # params = sv.best_params_ # svm2 = svm.SVC()
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) for svm, title, ax in zip(models, titles, sub.flatten()): Z = svm.predict(np.c_[xx.ravel(), yy.ravel()]) plot_contours(ax, svm, xx, yy, cmap=plt.cm.brg, alpha=0.8) #Z = Z.reshape(xx.shape) #plt.figure(1, figsize=(4, 3)) #plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) #ax.scatter(X_train[:, 0], X_train[:, 1], c=Y_train, cmap=plt.cm.brg, s=20, edgecolors='k') ax.scatter(X_test[:, 0], X_test[:, 1], c=Y_test, cmap=plt.cm.brg, s=20, edgecolors='k', marker='D') ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xlabel('Sepal length') ax.set_ylabel('Sepal width') ax.set_xticks(()) ax.set_yticks(()) ax.set_title(title + ' \n traning error: ' + str(round(100 * (1 - svm.score(X_train, Y_train)), 2)) + '%' ' \n test error: ' + str(round(100 * (1 - svm.score(X_test, Y_test)), 2)) + '%') plt.show()
svm.fit(X_train, y_train) predictions = svm.predict(X_test) # KNN knn = KNeighborsClassifier() knn.fit(X_train, y_train) predictions = knn.predict(X_test) plt.title("KNN Predictions vs Actual") plt.scatter(y_test, predictions) plt.xlabel("Actual Values") plt.ylabel("Predictions") plt.show() # Adapted from https://towardsdatascience.com/solving-a-simple-classification-problem-with-python-fruits-lovers-edition-d20ab6b071d2 print('Accuracy of Logistic regression classifier on training set: {:.2f}' .format(lm.score(X_train, y_train))) print('Accuracy of Logistic regression classifier on test set: {:.2f}' .format(lm.score(X_test, y_test))) print('Accuracy of SVM classifier on training set: {:.2f}' .format(svm.score(X_train, y_train))) print('Accuracy of SVM classifier on test set: {:.2f}' .format(svm.score(X_test, y_test))) print('Accuracy of K-NN classifier on training set: {:.2f}' .format(knn.score(X_train, y_train))) print('Accuracy of K-NN classifier on test set: {:.2f}' .format(knn.score(X_test, y_test)))
cv = CountVectorizer(token_pattern=r"(?u)\b\w+\b") X = cv.fit_transform(split_corpus).toarray() #构造标签向量,垃圾标签为0,正常标签为1 y = [0] * 5000 + [1] * 5000 #将特征集 分为训练集和测试集 X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.4, random_state = 0) #使用SVM训练分类模型 svm = svm.SVC(kernel='rbf', gamma=0.7, C = 1.0) svm.fit(X_train, y_train) #SVM分类性能 y_pred_svm = svm.predict(X_test) print("SVM accuracy:\n",svm.score(X_test, y_test)) print("SVM report:\n",metrics.classification_report(y_test, y_pred_svm)) print("SVM matrix:\n",metrics.confusion_matrix(y_test, y_pred_svm)) #使用朴素贝叶斯训练分类模型,给出分类效果 gnb = GaussianNB() gnb.fit(X_train,y_train) y_pred = gnb.predict(X_test) #朴素贝叶斯模型分类效果 print("naive_bayes accuracy:\n",gnb.score(X_test, y_test)) print("naive_bayes report:\n",metrics.classification_report(y_test, y_pred)) print("naive_bayes matrix:\n",metrics.confusion_matrix(y_test, y_pred))