Exemple #1
0
def evaluate(config):

    # define model
    if config.model.name == 'mlp':
        model = MLP(config)
    elif config.model.name == 'cnn':
        model = CNN(config)

    # load model & statistics
    model = load_model(config, model)
    loss, accuracy = load_statistics(config)

    # print performance graphs
    display_model_performance(loss, accuracy)

    # load mnist dataset
    train_loader, test_loader = load_mnist(config)
    test_iter = iter(test_loader)
    images, labels = test_iter.next()

    # evaluate accuracy and loss on test data
    logits = model.forward(images)

    test_loss = nn.CrossEntropyLoss()(logits, labels).detach().numpy()
    test_acc = calculate_accuracy(logits.detach().numpy(), labels)

    print("test loss:      ", test_loss)
    print("test accuracy:  ", test_acc)
Exemple #2
0
def train(config):

    # load mnist dataset
    train_loader, test_loader = load_mnist(config)

    # define model
    if config.model.name == 'mlp':
        model = MLP(config)
    elif config.model.name == 'cnn':
        model = CNN(config)

    # define loss criteria & optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=config.optimizer.params.lr,
                          momentum=config.optimizer.params.momentum,
                          weight_decay=config.optimizer.params.regularization)

    BATCH_SIZE = config.data.mnist.batch_size
    MAX_EPOCH = config.optimizer.epochs
    BATCHES_PER_EPOCH = len(train_loader)

    loss_batch = []
    acc_batch = []
    running_loss, running_acc = 0, 0

    for epoch in range(MAX_EPOCH):
        batch_loss, batch_acc = 0, 0

        for i, data in enumerate(train_loader, 0):
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward pass
            logits = model.forward(inputs)

            # calculate softmax cross-entropy loss
            loss = criterion(logits, labels)

            # backward pass & gradient descent
            loss.backward()
            optimizer.step()

            # fetch statistics for the current batch
            acc = calculate_accuracy(logits.detach().numpy(), labels)
            running_acc += acc
            running_loss += loss.item()
            batch_loss += loss.item()
            batch_acc += acc

            # print running statistics every 50 batches
            if i % 50 == 49:
                print(
                    'Epoch [{}]/[{}]\t Batch [{}]/[{}]\t loss: [{:.3f}]\t accuracy: [{:.4f}]'
                    .format(epoch + 1, MAX_EPOCH, i + 1, BATCHES_PER_EPOCH,
                            running_loss / 50, running_acc / 50))
                running_loss, running_acc = 0, 0

        # save batch statistics
        loss_batch.append(batch_loss / BATCH_SIZE)
        acc_batch.append(batch_acc / BATCH_SIZE)

    # save model and loss & accuracy curves
    save_model(model, config)
    save_statistics(loss_batch, acc_batch, config)
Exemple #3
0
def train(lr=args.lr,
          n_hidden=args.n_hidden,
          batch_size=args.batch_size,
          dropout=args.dropout,
          valid_freq=3000,
          disp_freq=1000,
          save_freq=100000,
          max_epochs=args.n_epoch,
          patience=15,
          save_name=args.save_name,
          save_dir=args.save_dir,
          device=args.device):
    # Load train and valid dataset
    print('loading train')
    with open(args.train_path, 'rb') as f:
        train_val_y = pickle.load(f)
        train_val_x = pickle.load(f)

    print('loading english test')
    with open(args.en_test_path, 'rb') as f:
        en_test_y = pickle.load(f)
        en_test_x = pickle.load(f)

    print('loading french test')
    with open(args.fr_test_path, 'rb') as f:
        fr_test_y = pickle.load(f)
        fr_test_x = pickle.load(f)

    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1125)
    for train_index, test_index in sss.split(train_val_x, train_val_y):
        train_y = train_val_y[train_index]
        train_x = train_val_x[train_index]
        valid_y = train_val_y[test_index]
        valid_x = train_val_x[test_index]

    print('Number of training sample: %d' % train_x.shape[0])
    print('Number of validation sample: %d' % valid_x.shape[0])
    print('Number of english testing sample: %d' % en_test_x.shape[0])
    print('Number of french testing sample: %d' % fr_test_x.shape[0])
    print('-' * 100)

    kf_valid = get_minibatches_idx(len(valid_y), batch_size)
    kf_en_test = get_minibatches_idx(len(en_test_y), batch_size)
    kf_fr_test = get_minibatches_idx(len(fr_test_y), batch_size)

    # Loader parameter: use CUDA pinned memory for faster data loading
    pin_memory = (device == args.device)
    # Test set

    n_emb = train_x.shape[1]
    n_class = len(set(train_y))
    best_valid_acc = None
    bad_counter = 0

    uidx = 0  # the number of update done
    estop = False  # early stop switch
    net = MLP(n_mlp_layer=args.n_mlp_layers,
              n_hidden=args.n_hidden,
              dropout=args.dropout,
              n_class=n_class,
              n_emb=n_emb,
              device=args.device)

    if args.load_net != '':
        assert os.path.exists(
            args.load_net), 'Path to pretrained net does not exist'
        net.load_state_dict(torch.load(args.load_net))
        print('Load exists model stored at: ', args.load_net)

    if args.device == 'gpu':
        net = net.cuda()

    # Begin Training
    net.train()
    print('-' * 100)
    print('Model structure: ')
    print('MLP baseline')
    print(net.main)
    print('-' * 100)
    print('Parameters for tuning: ')
    print(net.state_dict().keys())
    print('-' * 100)

    # Define optimizer
    assert args.optimizer in [
        'SGD', 'Adam', "RMSprop", "LBFGS", "Rprop", "ASGD", "Adadelta",
        "Adagrad", "Adamax"
    ], 'Please choose either SGD or Adam'
    if args.optimizer == 'SGD':
        optimizer = optim.SGD(lr=lr,
                              params=filter(lambda p: p.requires_grad,
                                            net.parameters()),
                              momentum=0.9)
    else:
        optimizer = getattr(optim, args.optimizer)(params=filter(
            lambda p: p.requires_grad, net.parameters()),
                                                   lr=lr)

    #lambda1 = lambda epoch: epoch // 30
    lambda2 = lambda epoch: 0.98**epoch
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda2])
    #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
    try:
        for eidx in range(max_epochs):
            scheduler.step()
            # print('Training mode on: ' ,net.training)
            start_time = time.time()
            n_samples = 0
            # Get new shuffled index for the training set
            kf = get_minibatches_idx(len(train_y), batch_size, shuffle=True)

            for _, train_index in kf:
                # Remove gradient from previous batch
                #net.zero_grad()
                optimizer.zero_grad()
                uidx += 1
                y_batch = torch.autograd.Variable(
                    torch.from_numpy(train_y[train_index]).long())
                x_batch = torch.autograd.Variable(
                    torch.from_numpy(train_x[train_index]).float())
                if net.device == 'gpu':
                    y_batch = y_batch.cuda()
                scores = net.forward(x_batch)
                loss = net.loss(scores, y_batch)

                loss.backward()
                optimizer.step()
                n_samples += len(x_batch)
                gradient = 0

                # For logging gradient information
                for name, w in net.named_parameters():
                    if w.grad is not None:
                        w_grad = torch.norm(w.grad.data, 2)**2
                        gradient += w_grad
                gradient = gradient**0.5
                if np.mod(uidx, disp_freq) == 0:
                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ',
                          loss.data[0], 'Gradient ', gradient)

                if save_name and np.mod(uidx, save_freq) == 0:
                    print('Saving...')
                    torch.save(
                        net.state_dict(), '%s/%s_epoch%d_update%d.net' %
                        (save_dir, save_name, eidx, uidx))

                if np.mod(uidx, valid_freq) == 0:
                    print("=" * 50)
                    print('Evaluation on validation set: ')
                    kf_valid = get_minibatches_idx(len(valid_y), batch_size)
                    top_1_acc, top_n_acc = eval.net_evaluation(
                        net, kf_valid, valid_x, valid_y)
                    #scheduler.step(top_1_acc)

                    # Save best performance state_dict for testing
                    if best_valid_acc is None:
                        best_valid_acc = top_1_acc
                        best_state_dict = net.state_dict()
                        torch.save(best_state_dict,
                                   '%s/%s_best.net' % (save_dir, save_name))
                    else:
                        if top_1_acc > best_valid_acc:
                            print(
                                'Best validation performance so far, saving model parameters'
                            )
                            print("*" * 50)
                            bad_counter = 0  # reset counter
                            best_valid_acc = top_1_acc
                            best_state_dict = net.state_dict()
                            torch.save(
                                best_state_dict,
                                '%s/%s_best.net' % (save_dir, save_name))
                        else:
                            bad_counter += 1
                            print('Validation accuracy: ', 100 * top_1_acc)
                            print('Getting worse, patience left: ',
                                  patience - bad_counter)
                            print('Best validation accuracy  now: ',
                                  100 * best_valid_acc)
                            # Learning rate annealing
                            lr /= args.lr_anneal
                            print('Learning rate annealed to: ', lr)
                            print('*' * 100)
                            if args.optimizer == 'SGD':
                                optimizer = optim.SGD(
                                    lr=lr,
                                    params=filter(lambda p: p.requires_grad,
                                                  net.parameters()),
                                    momentum=0.9)
                            else:
                                optimizer = getattr(optim, args.optimizer)(
                                    params=filter(lambda p: p.requires_grad,
                                                  net.parameters()),
                                    lr=lr)
                            if bad_counter > patience:
                                print('-' * 100)
                                print('Early Stop!')
                                estop = True
                                break

            epoch_time = time.time() - start_time
            print('Epoch processing time: %.2f s' % epoch_time)
            print('Seen %d samples' % n_samples)
            if estop:
                break
        print('-' * 100)
        print('Training finish')
        best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name))
        torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name))
        net.load_state_dict(best_state_dict)

        # add self connection
        print('Evaluation on validation set: ')
        kf_valid = get_minibatches_idx(len(valid_y), batch_size)
        eval.net_evaluation(net, kf_valid, valid_x, valid_y)

        # Evaluate model on test set
        print('Evaluation on test set: ')
        print('Evaluation on English testset: ')
        eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y)
        print('Evaluation on French testset: ')
        eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)
    except KeyboardInterrupt:
        print('-' * 100)
        print("Training interrupted, saving final model...")
        best_state_dict = torch.load('%s/%s_best.net' % (save_dir, save_name))
        torch.save(net.state_dict(), '%s/%s_final.net' % (save_dir, save_name))
        net.load_state_dict(best_state_dict)
        print('Evaluation on validation set: ')
        kf_valid = get_minibatches_idx(len(valid_y), batch_size)
        eval.net_evaluation(net, kf_valid, valid_x, valid_y)

        # Evaluate model on test set
        print('Evaluation on English testset: ')
        eval.net_evaluation(net, kf_en_test, en_test_x, en_test_y)
        print('Evaluation on French testset: ')
        eval.net_evaluation(net, kf_fr_test, fr_test_x, fr_test_y)
def test(n, run_number):
    df_4_40 = pd.read_csv('./test_{}/merged_config_test_4_40.csv'.format(run_number))
    df_4_60 = pd.read_csv('./test_{}/merged_config_test_4_60.csv'.format(run_number))
    df_4_80 = pd.read_csv('./test_{}/merged_config_test_4_80.csv'.format(run_number))
    df_4_100 = pd.read_csv('./test_{}/merged_config_test_4_100.csv'.format(run_number))
    df_8_40 = pd.read_csv('./test_{}/merged_config_test_8_40.csv'.format(run_number))
    df_8_60 = pd.read_csv('./test_{}/merged_config_test_8_60.csv'.format(run_number))
    df_8_80 = pd.read_csv('./test_{}/merged_config_test_8_80.csv'.format(run_number))
    df_8_100 = pd.read_csv('./test_{}/merged_config_test_8_100.csv'.format(run_number))
    best_config = pd.read_csv('./test_{}/best_config_file.csv'.format(run_number))
    df_keys = {0: df_4_40, 1: df_4_60, 2: df_4_80, 3: df_4_100,
               4: df_8_40, 5: df_8_60, 6: df_8_80, 7: df_8_100}

    min_rows = 0
    min_rows = get_min_rows(df_keys, min_rows)

    if n == 1:
        model = MLP(15, 16, 8)
    else:
        model = MLP(7, 16, 8)
    model.load_state_dict(torch.load('checkpoint/MLP_model_19_train.pwf', map_location='cpu'))
    model.eval()

    data_point = list(df_8_100.iloc[0, [1, 2, 3, 5, 6, 7, 8]].values)

    if n == 1:
        one_hot_y = [0, 0, 0, 0, 0, 0, 0, 0]
        data_point = torch.Tensor(data_point + one_hot_y)
    else:
        data_point = torch.Tensor(data_point)

    with open("parameters.txt", "w") as f:
        f.write("Parameters \n")
        for i, param in enumerate(list(model.parameters())):
            if i % 2 == 0:
                weight = "weight for {} layer: ".format(i / 2 + 1) + str(param) + "\n"
                f.write(weight)
            else:
                bias = "bias for {} layer: ".format(int(i / 2) + 1) + str(param) + "\n"
                f.write(bias)

    cycles = df_8_100.iloc[0, 4]
    cycles_complete = df_8_100.iloc[0, 4]
    best_cycles = df_keys[best_config.iloc[0, -1]].iloc[0, 4]
    predicted = model.forward(data_point.reshape(1, -1))
    predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1)
    cycles_array = [int(cycles)]
    cores = [8]
    llc = [100]
    x_pos = [0]
    for i in range(1, min_rows):
        data_point = list(df_keys[predicted[0]].iloc[i, [1, 2, 3, 5, 6, 7, 8]].values)
        if n == 1:
            one_hot_y = oneHotEncoding(predicted)[0]
            data_point = torch.Tensor(data_point + one_hot_y)
        else:
            data_point = torch.Tensor(data_point)
        x_pos.append(cycles)
        cycles_array.append(int(df_keys[predicted[0]].iloc[i, 4]))
        cores.append(cores_llc_dict[predicted[0]]['cores'])
        llc.append(cores_llc_dict[predicted[0]]['llc'])
        cycles = cycles + df_keys[predicted[0]].iloc[i, 4]
        predicted = model.forward(data_point.reshape(1, -1))
        predicted = np.argmax(predicted.detach().cpu().numpy(), axis=-1)
        cycles_complete = cycles_complete + df_8_100.iloc[i, 4]
        best_cycles = best_cycles + df_keys[best_config.iloc[i, -1]].iloc[i, 4]

    print('About to plot the graphs for run_number: {}'.format(run_number))
    font = {'family': 'serif',
            'color': 'darkred',
            'weight': 'normal',
            'size': 32,
            }

    widths = [cycle * 10**-8*0.8 for cycle in cycles_array]
    x_pos_reduced = [x * 10**-8 for x in x_pos]
    plot_test_results(cores, font, run_number, widths, x_pos_reduced, 'Cores')
    plot_test_results(llc, font, run_number, widths, x_pos_reduced, 'LLC')

    print('run number:', run_number)
    print('cycles calculated:', cycles)
    print('cycles for complete configuration:', cycles_complete)
    print('best configuration cycles:', best_cycles)
    print('complete cycle percentage', cycles/cycles_complete * 100)
    print('best cycle percentage', cycles/best_cycles*100)
    print('\n')