def onTrainEpoch(epoch, loss): global acc global train_matrix global test_matrix train_accuracy = accuracy(train_matrix) print("Epoch #" + str(epoch) + " - Train Loss: " + str(loss) + "\tAccuracy: " + str(train_accuracy * 100)) train_ca = class_accuracy(train_matrix) for i in range(len(classes)): print(classes[i][0] + ": \t" + str(train_ca[i] * 100)) loss = test(model, test_loader, criterion, args.device, onBatch=onTestBatch) test_accuracy = accuracy(test_matrix) print("Epoch #" + str(epoch) + " - Test Loss: " + str(loss) + "\tAccuracy: " + str(test_accuracy * 100)) test_ca = class_accuracy(test_matrix) for i in range(len(classes)): print(classes[i][0] + ": \t" + str(test_ca[i] * 100)) train_matrix = np.zeros((len(classes), len(classes))) test_matrix = np.zeros((len(classes), len(classes))) if test_accuracy > acc: print("Saving model...") torch.save(model, args.model) print("Model saved!") acc = test_accuracy
def train(self, X, y, epochs=0, batch_size=1, lr=0.1, verbose=True): num_samples = len(X) for e in range(epochs): idx = np.random.permutation(num_samples) X_, y_ = X[idx], y[idx] metrics = dict(accuracies=[], losses=[]) for i in range(0, len(X), batch_size): # NOTE: last batch size not equalized X_batch = X_[i:i + batch_size].T y_batch = y_[i:i + batch_size].T # input X_batch has shape (num_features, num_samples), with the batched index last # target y_batch has shape (num_classes, num_samples), one-hot encoding y_pred, cache = self.forward(X_batch) loss = self.criterion(y_pred, y_batch) self.update(y_pred, y_batch, cache, num_samples, lr) acc = accuracy(y_pred, y_batch) metrics['accuracies'].append(acc) metrics['losses'].append(loss) if verbose: print('[epoch {}] acc: {:.3f}, loss: {:.3f}'.format(e, np.mean(metrics['accuracies']), np.mean(metrics['losses']) ))
def val_iter(epoch, net, criterion, loader, checkpoint_dict, use_cuda, cfg, writer=None): net.eval() net.training = False run_loss = 0 val_loss, output_list, target_list = 0, [], [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) #outputs = net(inputs)[0] if cfg['CONFIG']['RECONSTRUCT'] else net(inputs) outputs = net(inputs) loss = criterion(outputs, targets) val_loss += loss.item() output_list.append(outputs.data) target_list.append(targets.data) if writer is not None: writer.add_scalar('Loss/Val', val_loss / len(loader), epoch + 1) output_cat = torch.cat(output_list, dim=0) target_cat = torch.cat(target_list, dim=0) acc1, acc5 = accuracy(output_cat, target_cat, topk=(1, 5)) # Save checkpoint when best model print("\n| Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" % (epoch, val_loss / (batch_idx + 1), acc1)) if acc1 > checkpoint_dict['acc']: print('| Saving Best model...\t\t\tTop1 = %.2f%%\tTop5 = %.2f%%' % (acc1, acc5)) checkpoint_dict['net'] = net.module.state_dict( ) if use_cuda else net.state_dict() checkpoint_dict['acc'] = acc1 checkpoint_dict['epoch'] = epoch if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') save_point = './checkpoint/{}/'.format(checkpoint_dict['dataset']) if not os.path.isdir(save_point): os.mkdir(save_point) torch.save( checkpoint_dict, '{}/{}.t7'.format(save_point, checkpoint_dict['file_name']))
def test(net, testloader, use_cuda, cfg): print('\n[Test Phase] : Model setup') assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!' net.eval() # for BatchNorm net.training = False # for Dropout output_list, target_list = [], [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) #outputs = net(inputs)[0] if cfg['CONFIG']['RECONSTRUCT'] else net(inputs) outputs = net(inputs) output_list.append(outputs.data) target_list.append(targets.data) output_cat = torch.cat(output_list, dim=0) target_cat = torch.cat(target_list, dim=0) acc1, acc5 = accuracy(output_cat, target_cat, topk=(1, 5)) print("| Test Result\tAcc@1: %.2f%%\tAcc@5: %.2f%%" % (acc1, acc5))
f, minibatch_size=200, epoch=20, learning_rate=0.01) best_net = mf.plot_history(e_loss, e_accuracy, e_validate, e_loss_val) mb = mf.batchdata(x_test, 1000) pred = [] for j in range(len(mb)): pred.append(e_nnet[best_net[0]].predict(mb[j])) pv = np.concatenate(pred, axis=0) # y_pred = e_nnet[best_net[0]].predict(x_test) print('Test Set Accuracy with best model parameters: {}'.format( mf.accuracy(y_test, pv))) f.write('\n\n') f.write('Test Set Accuracy with best model parameters: {}\n'.format( mf.accuracy(y_test, pv))) # Print classification report print("Classification report \n=======================") print(classification_report(y_true=y_test, y_pred=pv)) print("Confusion matrix \n=======================") print(confusion_matrix(y_true=y_test, y_pred=pv)) f.write("Classification report \n=======================\n") f.write(classification_report(y_true=y_test, y_pred=pv) + '\n') # Compute confusion matrix cnf_matrix = confusion_matrix(y_true=y_test, y_pred=pv)
test_matrix = np.zeros((len(classes), len(classes))) def onTestBatch(batch_id, features, labels, output, loss): global test_matrix output = torch.argmax(output, dim=1) mat = confusion_matrix(labels, output, len(classes)) test_matrix = np.add(test_matrix, mat) acc = 0 print("Testing model...") loss = test(model, test_loader, criterion, args.device, onBatch=onTestBatch) _acc = accuracy(test_matrix) if loaded: acc = _acc _test_ca = class_accuracy(test_matrix) for i in range(len(classes)): print(classes[i][0] + ": \t" + str(_test_ca[i] * 100)) print("Before Training - Loss: " + str(loss) + "\tAccuracy: " + str(_acc * 100)) test_matrix = np.zeros((len(classes), len(classes))) print("Training model...") def onTrainEpoch(epoch, loss): global acc global train_matrix global test_matrix
y_pred_mlp = mlp.predict(X_test_ml) end_test = time.time() print("MLP test time: ", end_test - start_test) file.write("MLP test time: " + str(end_test - start_test) + "\n") file.write("MLP Final loss: " + str(mlp.loss_) + "\n") print(mlp.loss_) start_test = time.time() y_train_pred_mlp = mlp.predict(X_train_ml) end_test = time.time() print("Train_accuracy test time: ", end_test - start_test) cm = confusion_matrix(y_pred_mlp, y_test_ml) train_cm = confusion_matrix(y_train_pred_mlp, y_train_ml) print("Test Accuracy of MLPClassifier : ", fu.accuracy(cm)) print("Train Accuracy of MLPClassifier : ", fu.accuracy(train_cm)) file.write("MLP Classifier Test Accuracy: " + str(fu.accuracy(cm)) + "\n") file.write("MLP Classifier Train Accuracy: " + str(fu.accuracy(train_cm)) + "\n") stat_res = precision_recall_fscore_support(y_test_ml, y_pred_mlp, labels=labels_nums) print(stat_res) # fu.print_confusion_matrix(cm,labels_nums) file.write("MLP Precision, Recall, F1 score \n\n") for i, met in enumerate(metrics_list): file.write(met + "\n") file.write(str(stat_res[i]) + "\n")
test_loader = DataLoader(datasets.MNIST('data', train=False, transform=transforms.Compose([ transforms.ToTensor()])), batch_size=1) model = CapsNet() optimizer = optim.Adam(model.parameters()) margin_loss = DigitMarginLoss() reconstruction_loss = torch.nn.MSELoss(size_average=False) model.train() for epoch in range(1, 11): epoch_tot_loss = 0 epoch_tot_acc = 0 for batch, (data, target) in enumerate(train_loader, 1): data = Variable(data) target = Variable(target) digit_caps, reconstruction = model(data, target) loss = margin_loss(digit_caps, target) + 0.0005 * reconstruction_loss(reconstruction, data.view(-1)) epoch_tot_loss += loss optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() acc = accuracy(digit_caps, target) epoch_tot_acc += acc template = '[Epoch {}] Loss: {:.4f} ({:.4f}), Acc: {:.2f}%' print(template.format(epoch, loss.data[0], (epoch_tot_loss / batch).data[0], 100 * (epoch_tot_acc / batch)))
# sys.exit() ### lamda = 0.005 #Learning rate iteration = 2500 ### W1, b1, W2, b2, W3, b3 = functions.initialize_parameters(h1=128,h2=64) for i in range(0, iteration): A1, cache_1 = functions.linear_propagate(train_x, W1, b1, 'relu') A2, cache_2 = functions.linear_propagate(A1, W2, b2, 'relu') Y_hat, cache_3 = functions.linear_propagate(A2, W3, b3, 'softmax') cost = functions.compute_cost(Y_hat, train_y) pb = functions.accuracy(Y_hat, train_y) print(cost) # dA3 = -1 * (np.divide(train_y, Y_hat) - np.divide(1 - train_y, 1 - Y_hat)) dA3 = Y_hat - train_y dA2, dW3, db3 = functions.linear_backpropagate(dA3, cache_3, 'softmax') dA1, dW2, db2 = functions.linear_backpropagate(dA2, cache_2, 'relu') dA0, dW1, db1 = functions.linear_backpropagate(dA1, cache_1, 'relu') #Update W W1 = W1 - (lamda * dW1) W2 = W2 - (lamda * dW2)
def main(): # get arguments regularization, feature_type, path = functions.read_argv() epochs, mini_batch_size = 200, 10000 stop_criteria = 0.0005 # get data train_data = functions.read_gz_idx(path+'train-images-idx3-ubyte.gz') train_label = functions.read_gz_idx(path+'train-labels-idx1-ubyte.gz') test_data = functions.read_gz_idx(path+'t10k-images-idx3-ubyte.gz') test_label = functions.read_gz_idx(path+'t10k-labels-idx1-ubyte.gz') # data preprocessing train_data, train_label, test_data, test_label = data_preprocess(train_data, train_label, test_data, test_label, feature_type) # model initialization model = StochasticGradientDescent(len(train_data[0]), regularization, mini_batch_size) # initialize list for plotting accuracy_train = [] accuracy_test = [] # start training prev_loss = 0 # for stopping criteria epoch = epochs # for plotting for e in range(epochs): # shuffle training data if batch if mini_batch_size == len(train_data): train_data, train_label = functions.unison_shuffle(train_data, train_label) # model fitting loss = model.fit(train_data, train_label, 0) # test the accuracy acc_train = functions.accuracy(model.classify(train_data), train_label)/100 acc_test = functions.accuracy(model.classify(test_data), test_label)/100 # record for plotting accuracy_train.append(acc_train) accuracy_test.append(acc_test) # log print ("epoch {0:3d}:\t Train loss: {1:8.4f},\t Train acc: {2:8.4f}, \tTest acc: {3:8.4f}".format( e+1, loss, acc_train, acc_test)) # stopping criteria if np.absolute(prev_loss-loss)<stop_criteria: epoch = e+1 break prev_loss = loss print ('End of Train & Test') print ('Plotting ... ') # plot to graph if regularization: title = 'GD Regularize '+feature_type else: title = 'GD '+feature_type functions.plot(title, [e for e in range(1, epoch+1)], accuracy_train, accuracy_test) print ("End Of The Program")
p2 = fn.h(X_test_llenado, w2) y_hat2 = fn.predict(p2, 0.5) p3 = fn.h(X_test_sllenado, w3) y_hat3 = fn.predict(p3, 0.5) # ---------------------------- Desempeño ----------------------------------------------------------- V1 = fn.get_values(y_test_org.reshape(-1), y_hat1) V2 = fn.get_values(y_test_llenado.reshape(-1), y_hat2) V3 = fn.get_values(y_test_sllenado.reshape(-1), y_hat3) medidas_d = pd.DataFrame( columns=['Precisión', 'Sensibilidad', 'Especificidad'], index=parametros.index) medidas_d['Precisión'] = [fn.accuracy(i) for i in [V1, V2, V3]] medidas_d['Sensibilidad'] = [fn.sensitivity(i) for i in [V1, V2, V3]] medidas_d['Especificidad'] = [fn.specificity(i) for i in [V1, V2, V3]] print(medidas_d) # --------------------------- Desempeño por cohortes ----------------------------------------------- cohorte_p13 = data_techo_llenado[data_techo_llenado['p13'] == 1] y_cp13 = cohorte_p13['p131'] cohorte_p13.drop('p131', axis=1, inplace=True) cohorte_p17 = data_techo_llenado[data_techo_llenado['p17'].isin([0, 4])] y_cp17 = cohorte_p17['p131'] cohorte_p17.drop('p131', axis=1, inplace=True) cohorte_p44 = data_techo_llenado[data_techo_llenado['p44'].isin([1, 2, 3])] y_cp44 = cohorte_p44['p131'] cohorte_p44.drop('p131', axis=1, inplace=True)
tag_scores = torch.sigmoid(tag_scores) # print("after:",tag_scores) k = 0 for i in tag_scores.squeeze(0): if (i > x): for m in target_dict: if (target_dict[m] == k): # print(m + " ",end="") save_loc.write(m + " ") k += 1 save_loc.write("\n") t += 1 save_loc.close() # print(x,functions.accuracy()[0]) if (functions.accuracy()[0] > max_accuracy): max_accuracy = functions.accuracy() max_accuracy_index = x # exit() save_loc = open(settings.shengcheng_result, "w+", encoding="utf-8") break # 阈值已确定时用break跳出 print("最大准确率:", max_accuracy[0]) print("此时阈值:", max_accuracy_index) # 根据阈值进行标签分类 save_loc.close() save_loc = open(settings.shengcheng_result, "w+", encoding="utf-8") for amount in range(n): inputs = test_comment[amount]
eta = 0.01 gamma = 0.1 # learning rate? # Calculating the beta values based og the training set betas_train = func.steepest(X_train, y_train, gamma) #betas_train = func.SGD_beta(X_train, y_train, eta, gamma) # Calculating ytilde and the model of logistic regression z = X_test @ betas_train # choosing best beta here? model = func.logistic_function(z) model = func.IndicatorFunc(model) acc_scikit, TPR_scikit, precision_scikit, f1_score_scikit, AUC_scikit, predict_proba_scikit = func.scikit( X_train, X_test, y_train, y_test, model) Acc = func.accuracy(model, y_test) Acc_sklearn = acc_scikit F1 = func.F1_score(y_test, model) F1_sklearn = f1_score_scikit Rec = func.recall(y_test, model) Rec_sklearn = TPR_scikit #precision = func.precision(y_test, model) #------------------------------------------------------------------------------ # We can test Accuracy score against scikit learn: #------------------------------------------------------------------------------ def test_Accuracy(): assert Acc == Acc_sklearn, \ print("Our Accuracy score is not equal to the scikit learn Accuracy score.\
y_pred = mlp_clf.predict(X_test) end_test = time.time() print("MLP test time: ", end_test - start_test) file.write("MLP test time: " + str(end_test - start_test) + "\n") file.write("MLP Final loss: " + str(mlp_clf.loss_) + "\n") print(mlp_clf.loss_) start_test = time.time() y_train_pred = mlp_clf.predict(X_train) end_test = time.time() print("Train_accuracy test time: ", end_test - start_test) cm = confusion_matrix(y_pred, y_test) train_cm = confusion_matrix(y_train_pred, y_train) print("MLP Classifier Test Accuracy: ", fu.accuracy(cm)) print("MLP Classifier Train Accuracy: ", fu.accuracy(train_cm)) file.write("MLP Classifier Test Accuracy: " + str(fu.accuracy(cm)) + "\n") file.write("MLP Classifier Train Accuracy: " + str(fu.accuracy(train_cm)) + "\n") stat_res = precision_recall_fscore_support(y_test, y_pred, labels=unique_labels) print(stat_res) file.write("MLP Precision, Recall, F1 score \n\n") for i, met in enumerate(metrics_list): file.write(met + "\n") file.write(str(stat_res[i]) + "\n") # fu.print_confusion_matrix(cm,np.unique(selected_crops_array))
# Plot treshold plot: #threshold_plot = func.threshold_plot(X_train, X_test, y_train, y_test, gamma, thresholds) # Calculating ytilde and the model of logistic regression z = X_test @ betas_train # choosing best beta here? model = func.logistic_function(z) model = func.IndicatorFunc(model, threshold=0.44) # Get AUC score and predict_proba_scikit. Used for plots and terminal print acc_scikit, TPR_scikit, precision_scikit, f1_score_scikit, AUC_scikit, predict_proba_scikit \ = func.scikit(X_train, X_test, y_train, y_test, model) # Calculating the different metrics: print('\n-------------------------------------------') print('The accuracy is : %.3f' % func.accuracy(model, y_test)) print('The F1 score is : %.3f' % func.F1_score(y_test, model)) print('The precision is : %.3f' % func.precision(y_test, model)) print('The recall is : %.3f' % func.recall(y_test, model)) print('The AUC is : %.3f' % AUC_scikit) print('-------------------------------------------') # Make Cumulative gain and ROC plot P.Cumulative_gain_plot(y_test, model) P.ROC_plot(y_test, predict_proba_scikit) # Creating a Confusion matrix using pandas and pandas dataframe P.Confusion_matrix(y_test, model) elif arg == "NN":
# Gradient descent method with specified # epochs for e in range(args.numepoch): # Get list of sum (Z) values for training and validation data sums = F.sum_function(weights, train_data, bias) valid_sums = F.sum_function(weights, valid_data, bias) # Run summations (Z) through activation function to get array Y train_act = F.activation(args.actfunction, sums) valid_act = F.activation(args.actfunction, valid_sums) # Record loss and accuracy at end of each epoch on training and validation data train_loss[e] = F.loss(train_act, train_label) valid_loss[e] = F.loss(valid_act, valid_label) train_acc[e] = F.accuracy(train_label, train_act) valid_acc[e] = F.accuracy(valid_label, valid_act) # Calculate weights gradient for each of 9 weights weight_grad = [] for w in range(0, 9, 1): weight_grad += F.weights_gradient(args.actfunction, train_act, train_data, train_label, w) # Calculate bias gradient for current bias bias_grad = F.bias_gradient(args.actfunction, train_act, train_label) # Adjust weights for w in range(0, 9, 1): weights[w] = weights[w] - (weight_grad[w] * args.learningrate)
for j in range(int(train_x.shape[0] / BATCH)): x_batch = train_x[j * BATCH:(j + 1) * BATCH] y_batch = train_y[j * BATCH:(j + 1) * BATCH] diff = y_batch - f(w, x_batch) dw = -np.dot(x_batch.T, diff) + ALPHA * np.vstack( (np.array([[0.]]), w[1:])) if sys.argv[3]: dw2 += dw**2 # accumulate dw^2 for estimation of second derivative else: dw2 += 1 w = w - LR / np.sqrt(dw2) * (dw) pred_y = f(w, train_x) loss = np.around(BCE(pred_y, train_y) / train_x.shape[0], decimals=4) acc = np.around(accuracy(np.around(pred_y), train_y), decimals=4) if val_data != None: pred_y = f(w, val_data[0]) val_loss = np.around(BCE(pred_y, val_data[1]) / val_data[0].shape[0], decimals=4) val_acc = np.around(accuracy(np.where(pred_y >= 0.5, 1, 0), val_data[1]), decimals=4) print( "iteration = {}, loss = {}, acc = {}, val_loss = {}, val_acc = {}" .format(i, loss, acc, val_loss, val_acc)) if val_acc >= best_w[-1]: best_w = (w, i, acc, val_acc) else: print("iteration = {}, loss = {}, acc = {}".format(i, loss, acc))
if use_cuda: inputs = inputs.cuda() labels = labels.cuda() # Get predictions output = net(inputs) preds = torch.max(input=output, dim=1)[1] if use_cuda: preds = preds.data.cpu().numpy() else: preds = preds.data.numpy() # Calculate validation loss test_losses += criterion(output, labels).item() * n_samples test_accs += f.accuracy(y_true=labels, y_pred=output) * n_samples test_lengths += n_samples # Save predictions and labels test_preds += preds.tolist() test_targs += labels.tolist() # Save example inputs if len(examples) < n_examples: for n in range(n_examples): examples.append([inputs[n], labels[n].item(), preds[n].item()]) # Print percentage run pbar_test.set_postfix(loss=test_losses / test_lengths, perp=np.exp(test_losses / test_lengths), acc=test_accs / test_lengths)
if __name__ == "__main__": # Data Preprocessing _, train_x, train_y, _ = train_data(sys.argv[1], sys.argv[2], sys.argv[3], val_split=0) # Computing num = np.zeros((2)) mean = np.zeros((2, train_x.shape[-1])) cov = np.zeros((2, train_x.shape[-1], train_x.shape[-1])) for i in range(2): target = np.where(train_y == i)[0] num[i] = target.shape[0] mean[i] = (np.sum(train_x[target], axis=0) / num[i]) cov[i] = np.dot((train_x[target] - mean[i]).T, (train_x[target] - mean[i])) cov = np.sum(cov, axis=0) / train_x.shape[0] # Because cov might be singular, so np.linalg.inv() may cause a large numerical error cov_inv = np.linalg.pinv(cov) w = np.dot(cov_inv.T, mean[1] - mean[0]) b = -0.5 * np.dot(np.dot(mean[1].T, cov_inv), mean[1]) + 0.5 * np.dot(np.dot(mean[0].T, cov_inv), mean[0]) + np.log(num[1] / num[0]) w = np.vstack((b.reshape((1, 1)), w.reshape(w.shape[0], 1))) np.save("model.lda", w) # Evaluating train_x = np.concatenate((np.ones((train_x.shape[0], 1)).astype(float), train_x), axis=1) train_y = train_y.reshape(train_y.shape[0], 1) pred_y = f(w, train_x) acc = np.around(accuracy(np.around(pred_y), train_y), decimals=4) print(acc)