def evaluate(config): # define model if config.model.name == 'mlp': model = MLP(config) elif config.model.name == 'cnn': model = CNN(config) # load model & statistics model = load_model(config, model) loss, accuracy = load_statistics(config) # print performance graphs display_model_performance(loss, accuracy) # load mnist dataset train_loader, test_loader = load_mnist(config) test_iter = iter(test_loader) images, labels = test_iter.next() # evaluate accuracy and loss on test data logits = model.forward(images) test_loss = nn.CrossEntropyLoss()(logits, labels).detach().numpy() test_acc = calculate_accuracy(logits.detach().numpy(), labels) print("test loss: ", test_loss) print("test accuracy: ", test_acc)
def train(config): # load mnist dataset train_loader, test_loader = load_mnist(config) # define model if config.model.name == 'mlp': model = MLP(config) elif config.model.name == 'cnn': model = CNN(config) # define loss criteria & optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=config.optimizer.params.lr, momentum=config.optimizer.params.momentum, weight_decay=config.optimizer.params.regularization) BATCH_SIZE = config.data.mnist.batch_size MAX_EPOCH = config.optimizer.epochs BATCHES_PER_EPOCH = len(train_loader) loss_batch = [] acc_batch = [] running_loss, running_acc = 0, 0 for epoch in range(MAX_EPOCH): batch_loss, batch_acc = 0, 0 for i, data in enumerate(train_loader, 0): inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward pass logits = model.forward(inputs) # calculate softmax cross-entropy loss loss = criterion(logits, labels) # backward pass & gradient descent loss.backward() optimizer.step() # fetch statistics for the current batch acc = calculate_accuracy(logits.detach().numpy(), labels) running_acc += acc running_loss += loss.item() batch_loss += loss.item() batch_acc += acc # print running statistics every 50 batches if i % 50 == 49: print( 'Epoch [{}]/[{}]\t Batch [{}]/[{}]\t loss: [{:.3f}]\t accuracy: [{:.4f}]' .format(epoch + 1, MAX_EPOCH, i + 1, BATCHES_PER_EPOCH, running_loss / 50, running_acc / 50)) running_loss, running_acc = 0, 0 # save batch statistics loss_batch.append(batch_loss / BATCH_SIZE) acc_batch.append(batch_acc / BATCH_SIZE) # save model and loss & accuracy curves save_model(model, config) save_statistics(loss_batch, acc_batch, config)
def train(lr=args.lr, n_hidden=args.n_hidden, batch_size=args.batch_size, dropout=args.dropout, valid_freq=3000, disp_freq=1000, save_freq=100000, max_epochs=args.n_epoch, patience=15, save_name=args.save_name, save_dir=args.save_dir, device=args.device): # Load train and valid dataset print('loading train') with open(args.train_path, 'rb') as f: train_val_y = pickle.load(f) train_val_x = pickle.load(f) print('loading english test') with open(args.en_test_path, 'rb') as f: en_test_y = pickle.load(f) en_test_x = pickle.load(f) print('loading french test') with open(args.fr_test_path, 'rb') as f: fr_test_y = pickle.load(f) fr_test_x = pickle.load(f) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1125) for train_index, test_index in sss.split(train_val_x, train_val_y): train_y = train_val_y[train_index] train_x = train_val_x[train_index] valid_y = train_val_y[test_index] valid_x = train_val_x[test_index] print('Number of training sample: %d' % train_x.shape[0]) print('Number of validation sample: %d' % valid_x.shape[0]) print('Number of english testing sample: %d' % en_test_x.shape[0]) print('Number of french testing sample: %d' % fr_test_x.shape[0]) print('-' * 100) kf_valid = get_minibatches_idx(len(valid_y), batch_size) kf_en_test = get_minibatches_idx(len(en_test_y), batch_size) kf_fr_test = get_minibatches_idx(len(fr_test_y), batch_size) # Loader parameter: use CUDA pinned memory for faster data loading pin_memory = (device == args.device) # Test set n_emb = train_x.shape[1] n_class = len(set(train_y)) best_valid_acc = None bad_counter = 0 uidx = 0 # the number of update done estop = False # early stop switch net = MLP(n_mlp_layer=args.n_mlp_layers, n_hidden=args.n_hidden, dropout=args.dropout, n_class=n_class, n_emb=n_emb, device=args.device) if args.load_net != '': assert os.path.exists( args.load_net), 'Path to pretrained net does not exist' net.load_state_dict(torch.load(args.load_net)) print('Load exists model stored at: ', args.load_net) if args.device == 'gpu': net = net.cuda() # Begin Training net.train() print('-' * 100) print('Model structure: ') print('MLP baseline') print(net.main) print('-' * 100) print('Parameters for tuning: ') print(net.state_dict().keys()) print('-' * 100) # Define optimizer assert args.optimizer in [ 'SGD', 'Adam', "RMSprop", "LBFGS", "Rprop", "ASGD", "Adadelta", "Adagrad", "Adamax" ], 'Please choose either SGD or Adam' if args.optimizer == 'SGD': optimizer = optim.SGD(lr=lr, params=filter(lambda p: p.requires_grad, net.parameters()), momentum=0.9) else: optimizer = getattr(optim, args.optimizer)(params=filter( lambda p: p.requires_grad, net.parameters()), lr=lr) #lambda1 = lambda epoch: epoch // 30 lambda2 = lambda epoch: 0.98**epoch scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda2]) #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max') try: for eidx in range(max_epochs): scheduler.step() # print('Training mode on: ' ,net.training) start_time = time.time() n_samples = 0 # Get new shuffled index for the training set kf = get_minibatches_idx(len(train_y), batch_size, shuffle=True) for _, train_index in kf: # Remove gradient from previous batch #net.zero_grad() optimizer.zero_grad() uidx += 1 y_batch = torch.autograd.Variable( torch.from_numpy(train_y[train_index]).long()) x_batch = torch.autograd.Variable( torch.from_numpy(train_x[train_index]).float()) if net.device == 'gpu': y_batch = y_batch.cuda() scores = net.forward(x_batch) loss = net.loss(scores, y_batch) loss.backward() optimizer.step() n_samples += len(x_batch) gradient = 0 # For logging gradient information for name, w in net.named_parameters(): if w.grad is not None: w_grad = torch.norm(w.grad.data, 2)**2 gradient += w_grad gradient = gradient**0.5 if np.mod(uidx, disp_freq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', loss.data[0], 'Gradient ', gradient) if save_name and np.mod(uidx, save_freq) == 0: print('Saving...') torch.save( net.state_dict(), '%s/%s_epoch%d_update%d.net' % (save_dir, save_name, eidx, uidx)) if np.mod(uidx, valid_freq) == 0: print("=" * 50) print('Evaluation on validation set: ') kf_valid = get_minibatches_idx(len(valid_y), batch_size) top_1_acc, top_n_acc = eval.net_evaluation( net, kf_valid, valid_x, valid_y) #scheduler.step(top_1_acc) # Save best performance state_dict for testing if best_valid_acc is None: best_valid_acc = top_1_acc best_state_dict = net.state_dict() torch.save(best_state_dict, '%s/%s_best.net' % (save_dir, save_name)) else: if top_1_acc > best_valid_acc: print( 'Best validation performance so far, saving model parameters' ) print("*" * 50) bad_counter = 0 # reset counter best_valid_acc = top_1_acc best_state_dict = net.state_dict() torch.save( best_state_dict, '%s/%s_best.net' % (save_dir, save_name)) else: bad_counter += 1 print('Validation accuracy: ', 100 * top_1_acc) print('Getting worse, patience left: ', patience - bad_counter) print('Best validation accuracy now: ', 100 * best_valid_acc) # Learning rate annealing lr /= args.lr_anneal print('Learning rate annealed to: ', lr) print('*' * 100) if args.optimizer == 'SGD': optimizer = optim.SGD( lr=lr, params=filter(lambda p: p.requires_grad, net.parameters()), momentum=0.9) else: optimizer = getattr(optim, args.optimizer)( params=filter(lambda p: p.requires_grad, net.parameters()), lr=lr) if bad_counter > patience: print('-' * 100) print('Early Stop!') estop = True break epoch_time = time.time() - start_time print('Epoch processing time: %.2f s' % epoch_time) print('Seen %d samples' % n_samples) if estop: break print('-' * 100) print('Training finish') best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name)) torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name)) net.load_state_dict(best_state_dict) # add self connection print('Evaluation on validation set: ') kf_valid = get_minibatches_idx(len(valid_y), batch_size) eval.net_evaluation(net, kf_valid, valid_x, valid_y) # Evaluate model on test set print('Evaluation on test set: ') print('Evaluation on English testset: ') eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y) print('Evaluation on French testset: ') eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y) except KeyboardInterrupt: print('-' * 100) print("Training interrupted, saving final model...") best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name)) torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name)) net.load_state_dict(best_state_dict) print('Evaluation on validation set: ') kf_valid = get_minibatches_idx(len(valid_y), batch_size) eval.net_evaluation(net, kf_valid, valid_x, valid_y) # Evaluate model on test set print('Evaluation on English testset: ') eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y) print('Evaluation on French testset: ') eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)
def test(n, run_number): df_4_40 = pd.read_csv('./test_{}/merged_config_test_4_40.csv'.format(run_number)) df_4_60 = pd.read_csv('./test_{}/merged_config_test_4_60.csv'.format(run_number)) df_4_80 = pd.read_csv('./test_{}/merged_config_test_4_80.csv'.format(run_number)) df_4_100 = pd.read_csv('./test_{}/merged_config_test_4_100.csv'.format(run_number)) df_8_40 = pd.read_csv('./test_{}/merged_config_test_8_40.csv'.format(run_number)) df_8_60 = pd.read_csv('./test_{}/merged_config_test_8_60.csv'.format(run_number)) df_8_80 = pd.read_csv('./test_{}/merged_config_test_8_80.csv'.format(run_number)) df_8_100 = pd.read_csv('./test_{}/merged_config_test_8_100.csv'.format(run_number)) best_config = pd.read_csv('./test_{}/best_config_file.csv'.format(run_number)) df_keys = {0: df_4_40, 1: df_4_60, 2: df_4_80, 3: df_4_100, 4: df_8_40, 5: df_8_60, 6: df_8_80, 7: df_8_100} min_rows = 0 min_rows = get_min_rows(df_keys, min_rows) if n == 1: model = MLP(15, 16, 8) else: model = MLP(7, 16, 8) model.load_state_dict(torch.load('checkpoint/MLP_model_19_train.pwf', map_location='cpu')) model.eval() data_point = list(df_8_100.iloc[0, [1, 2, 3, 5, 6, 7, 8]].values) if n == 1: one_hot_y = [0, 0, 0, 0, 0, 0, 0, 0] data_point = torch.Tensor(data_point + one_hot_y) else: data_point = torch.Tensor(data_point) with open("parameters.txt", "w") as f: f.write("Parameters \n") for i, param in enumerate(list(model.parameters())): if i % 2 == 0: weight = "weight for {} layer: ".format(i / 2 + 1) + str(param) + "\n" f.write(weight) else: bias = "bias for {} layer: ".format(int(i / 2) + 1) + str(param) + "\n" f.write(bias) cycles = df_8_100.iloc[0, 4] cycles_complete = df_8_100.iloc[0, 4] best_cycles = df_keys[best_config.iloc[0, -1]].iloc[0, 4] predicted = model.forward(data_point.reshape(1, -1)) predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1) cycles_array = [int(cycles)] cores = [8] llc = [100] x_pos = [0] for i in range(1, min_rows): data_point = list(df_keys[predicted[0]].iloc[i, [1, 2, 3, 5, 6, 7, 8]].values) if n == 1: one_hot_y = oneHotEncoding(predicted)[0] data_point = torch.Tensor(data_point + one_hot_y) else: data_point = torch.Tensor(data_point) x_pos.append(cycles) cycles_array.append(int(df_keys[predicted[0]].iloc[i, 4])) cores.append(cores_llc_dict[predicted[0]]['cores']) llc.append(cores_llc_dict[predicted[0]]['llc']) cycles = cycles + df_keys[predicted[0]].iloc[i, 4] predicted = model.forward(data_point.reshape(1, -1)) predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1) cycles_complete = cycles_complete + df_8_100.iloc[i, 4] best_cycles = best_cycles + df_keys[best_config.iloc[i, -1]].iloc[i, 4] print('About to plot the graphs for run_number: {}'.format(run_number)) font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 32, } widths = [cycle * 10**-8*0.8 for cycle in cycles_array] x_pos_reduced = [x * 10**-8 for x in x_pos] plot_test_results(cores, font, run_number, widths, x_pos_reduced, 'Cores') plot_test_results(llc, font, run_number, widths, x_pos_reduced, 'LLC') print('run number:', run_number) print('cycles calculated:', cycles) print('cycles for complete configuration:', cycles_complete) print('best configuration cycles:', best_cycles) print('complete cycle percentage', cycles/cycles_complete * 100) print('best cycle percentage', cycles/best_cycles*100) print('\n')