def get_CV_acc(X, Y, clf): kf = KFold(n_splits=10) acc = [] for train, test in kf.split(X): X_train, X_test = X[train], X[test] Y_train, Y_test = Y[train], Y[test] clf.fit(X_train, Y_train) acc.append(ACC(clf.predict(X_test), Y_test)) return np.mean(acc)
def plot_confusion_matrix(test_label, pred): mapping = { 1: 'co2', 2: 'humidity', 3: 'pressure', 4: 'rmt', 5: 'status', 6: 'stpt', 7: 'flow', 8: 'HW sup', 9: 'HW ret', 10: 'CW sup', 11: 'CW ret', 12: 'SAT', 13: 'RAT', 17: 'MAT', 18: 'C enter', 19: 'C leave', 21: 'occu', 30: 'pos', 31: 'power', 32: 'ctrl', 33: 'fan spd', 34: 'timer' } cm_ = CM(test_label, pred) cm = normalize(cm_.astype(np.float), axis=1, norm='l1') fig = pl.figure() ax = fig.add_subplot(111) cax = ax.matshow(cm, cmap=Color.YlOrBr) fig.colorbar(cax) for x in range(len(cm)): for y in range(len(cm)): ax.annotate(str("%.3f(%d)" % (cm[x][y], cm_[x][y])), xy=(y, x), horizontalalignment='center', verticalalignment='center', fontsize=9) cm_cls = np.unique(np.hstack((test_label, pred))) cls = [] for c in cm_cls: cls.append(mapping[c]) pl.yticks(range(len(cls)), cls) pl.ylabel('True label') pl.xticks(range(len(cls)), cls) pl.xlabel('Predicted label') pl.title('Confusion Matrix (%.3f)' % (ACC(pred, test_label))) pl.show()
def metrics(label_list, pred_list, pos_prob_list): metric_dict = dict() for m in config['metric']: if m == 'fbs': metric_dict[m] = FBS(label_list, pred_list, 1) elif m == 'acc': metric_dict[m] = ACC(label_list, pred_list) elif m == 'auc': metric_dict[m] = AUC(label_list, pos_prob_list) else: print('Error : No such metric. Implement it.') raise return metric_dict
#print(type(train[0])) vect = TfidfVectorizer() trainfeat = vect.fit_transform(train[0]) testfeat = vect.transform(test[0]) #print(type(testfeat)) #print(trainfeat.shape) #print(len(train[1])) nb = MultinomialNB() nb.fit(trainfeat, train[1]) predict = nb.predict(testfeat) #print(predict) #print(type(predict)) print("Accuracy:-{ACC}%".format(ACC=100 * ACC(test[1], predict))) while True: review = [] line = input("Enter a sentence('Quit' to quit):\n") if line != 'Quit': #print("Words Type:"+type(review).__name__) review.append(line) #print(review) a = words(review) #print(a) #print("Words Type:"+type(a).__name__) data = vect.transform(words(review)) #print(data) #print(type(data)) predict = nb.predict(data)
def run_auto(self): ''' test direct data feature based transfer accuracy on the new building ''' rf = RFC(n_estimators=100, criterion='entropy') rf.fit(self.train_fd, self.train_label) pred = rf.predict(self.test_fd) print('direct data feature-based transfer acc on tgt_bldg:', ACC(pred, self.test_label)) #plot_confusion_matrix(self.test_label, pred) ''' step1: train base models from bldg1 ''' self.get_base_learners() ''' step2: TL with name feature on bldg2 ''' label = self.test_label class_ = np.unique(self.train_label) for b in self.bl: print(b.score(self.test_fd, label)) n_class = 32 c = KMeans(init='k-means++', n_clusters=n_class, n_init=10) c.fit(self.test_fn) dist = np.sort(c.transform(self.test_fn)) ex_id = DD(list) #example id for each C for i, j, k in zip(c.labels_, range(len(self.test_fn)), dist): ex_id[i].append(int(j)) #getting neighors for each ex nb_c = DD() #nb from clustering results for exx in ex_id.values(): exx = np.asarray(exx) for e in exx: nb_c[e] = exx[exx != e] nb_f = [DD(), DD(), DD()] #nb from classification results for b, n in zip(self.bl, nb_f): preds = b.predict(self.test_fd) ex_ = DD(list) for i, j in zip(preds, range(len(self.test_fd))): ex_[i].append(int(j)) for exx in ex_.values(): exx = np.asarray(exx) for e in exx: n[e] = exx[exx != e] #use base learners' predicitons acc_ = [] cov_ = [] #for delta in np.linspace(0.1, 0.5, 5): for delta in np.linspace(self.agreement_threshold, self.agreement_threshold, 1): print('running TL with agreement threshold =', delta) labeled_id = [] confidence = [] output = DD() preds = np.array([999 for i in range(len(self.test_fd))]) for i in range(len(self.test_fn)): #get the weight for each bl: by computing sim btw cluster and clf w = [] v_c = set(nb_c[i]) for n in nb_f: v_f = set(n[i]) cns = len(v_c & v_f) / float( len(v_c | v_f)) #original count based weight #print (len(v_c & v_f) , len(v_c | v_f)) inter = v_c & v_f union = v_c | v_f d_i = 0 d_u = 0 for it in inter: d_i += np.linalg.norm(self.test_fn[i] - self.test_fn[it]) #print (np.linalg.norm(self.test_fn[i]-self.test_fn[it])) #input('...') for u in union: d_u += np.linalg.norm(self.test_fn[i] - self.test_fn[u]) if len(inter) != 0: sim = 1 - (d_i / d_u) / cns #sim = (d_i/d_u)/cns if i in output: output[i].extend( ['%s/%s' % (len(inter), len(union)), 1 - sim]) else: output[i] = [ '%s/%s' % (len(inter), len(union)), 1 - sim ] w.append(sim) output[i].append(np.mean(w)) if np.mean(w) >= delta: confidence.append(np.mean(w)) w[:] = [float(j) / sum(w) for j in w] pred_pr = np.zeros(len(class_)) for wi, b in zip(w, self.bl): pr = b.predict_proba(self.test_fd[i].reshape(1, -1)) pred_pr = pred_pr + wi * pr preds[i] = class_[np.argmax(pred_pr)] labeled_id.append(i) acc_.append(ACC(preds[preds != 999], label[preds != 999])) cov_.append(1.0 * len(preds[preds != 999]) / len(label)) print('acc =', acc_, ';') print('cov =', cov_, ';') return preds[preds != 999], labeled_id, confidence
# scale continuous data scaler = StandardScaler() scaler.fit(x_train_cont) x_train_cont = scaler.transform(x_train_cont) x_test_cont = scaler.transform(x_test_cont) # fill scaled data with pd.option_context('mode.chained_assignment', None): for l, f in enumerate(features_to_extract): x_train.loc[:, f] = x_train_cont[:, l] x_test.loc[:, f] = x_test_cont[:, l] model.fit(x_train, y_train) y_pred = model.predict(x_test) accuracy_kfold[k] = ACC(y_test, y_pred) acc_mean[i, j] = accuracy_kfold.mean() acc_std[i, j] = accuracy_kfold.std() plt.errorbar(penalties, acc_mean[i, :], yerr=acc_std[i, :], label=kernel, fmt="o", capsize=5, markersize=7) best_penalties[i] = penalties[np.argmax(acc_mean[i, :])] print("Best penalties:", best_penalties) plt.legend() plt.xscale("log")
rf = RFC(n_estimators=100, criterion='entropy') bldg = ['rice', 'sdh', 'soda'] # for i in range(len(X_fd)): i = 0 source = [X_fd[j] for j in range(len(X_fd)) if j != i] train = np.vstack(source) train_fd = train[:, :-1] train_label = train[:, -1] test_fd, test_label = X_fd[i][:, :-1], X_fd[i][:, -1] #print (train_fd.shape, train_label.shape, test_fd.shape, test_label.shape) rf.fit(train_fd, train_label) preds = rf.predict(test_fd) print(ACC(preds, test_label)) assert (len(test_label) == len(X_fn[i])) sourceName = bldg[1] targetName = bldg[0] dataDir = "../../dataset/sensorType/sdh_soda_rice" transferLabelFileName = "transferLabel_" + sourceName + "--" + targetName + ".txt" transferLabelFileName = os.path.join(dataDir, transferLabelFileName) f = open(transferLabelFileName, "w") totalInstanceNum = len(test_label) f.write("auditorLabel" + "\t" + "transferLabel" + "\t" + "trueLabel\n") for instanceIndex in range(totalInstanceNum): transferLabel = preds[instanceIndex] trueLabel = test_label[instanceIndex]
penalty, activation="tanh", regression=False) NN.set_learning_params(a1, a2) NN.fit(X_train, Z_train, n_minibatches, n_epochs, std_W=std_W, const_b=const_b, track_cost=[X_test, Z_test]) Z_pred = NN.classify(X_test) print(f"Neural Network with penalty lambda = {penalty}") print(" Accuracy score =", ACC(Z_test, Z_pred)) plt.plot(np.arange(1, n_epochs + 1), NN.cost, label=f"$\lambda={penalty:.2f}$") plt.xlabel("Number of epochs", fontsize=12) plt.ylabel("Cost function", fontsize=12) plt.title("Evolution of cost function", fontsize=15) plt.legend() plt.savefig("Figures/NNcla_sgd_cost_function.png", dpi=300) plt.show() # grid search learning parameters n_hidden_layers = 5
def score(self, X, Y): Y_hat = self.predict(X) return ACC(Y_hat.cpu(), Y.cpu())
label = torch.tensor(list(batch[:, 1])).to(DEVICE) data, mask = data.to(DEVICE), mask.to(DEVICE) output = model(data, mask) logit, loss = classifier(output, label) pred = torch.argmax(torch.softmax(logit, dim=1), dim=1).data.cpu().numpy() label = label.data.cpu().numpy() preds = np.concatenate((pred, preds)) labels = np.concatenate((label, labels)) loss = loss.mean() valid_loss += loss.item() print(len(preds[preds == 1]), len(labels[labels == 1])) acc = ACC(preds, labels) pre = P(preds, labels) rec = R(preds, labels) f1 = F1(preds, labels) print( 'acc:{:.4f}, precision:{:.4f}, recall:{:.4f}, f1:{:.4f}, train_loss:{:.4f}, valid_loss:{:.4f}' .format(acc, pre, rec, f1, total_loss / len(train_dataloader), valid_loss / len(valid_dataloader))) model.eval() classifier.eval() with torch.no_grad(): preds, ids = [], [] for i, batch in enumerate(test_dataloader): data, mask = tensorized(batch[:, 0], vocab) id = np.array(list(batch[:, 1]))