def calculateWithK(test_set_by_user=[], k=-1, trainingSet=[]):
    if (k == -1):
        k = raw_input("Please input K: ")
        k = int(k)

    global neighbors, accuracy

    # data preparation
    if (test_set_by_user == -1):
        testSet = get_test_set()
    else:
        testSet = test_set_by_user

    trainingSet = get_training_set()
    print 'training set -->  ' + repr(trainingSet)
    print 'testing set --> ' + repr(testSet)
    # predictions
    predictions = []
    for x in range(len(testSet)):
        neighbors = get_neighbors(trainingSet, testSet[x], k)
        responses_result = get_response(neighbors)
        predictions.append(responses_result)
        print('> predicted = ' + repr(responses_result) + ', actual = ' + repr(testSet[x][-1]))

    # accuracy:
    accuracy = get_accuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%')
Ejemplo n.º 2
0
def test():
    # TODO : Test Later
    print('==> Testing network..')
    # Make predictions on full X_test mels
    y_predicted = accuracy.predict_class_all(create_segmented_mels(X_test), a_net)

    # Print statistics
    print(np.sum(accuracy.confusion_matrix(y_predicted, y_test),axis=1))
    print(accuracy.confusion_matrix(y_predicted, y_test))
    print(accuracy.get_accuracy(y_predicted,y_test))
def calculateWithKFofGraph(test_set_by_user=[], trainingSet=[], k = 1):
    global neighbors, accuracy

    testSet = test_set_by_user
    # predictions
    predictions = []
    for x in range(len(testSet)):
        neighbors = get_neighbors(trainingSet, testSet[x], k)
        responses_result = get_response(neighbors)
        predictions.append(responses_result)
        print('> predicted = ' + repr(responses_result) + ', actual = ' + repr(testSet[x][-1]))

    # accuracy:
    accuracy = get_accuracy(testSet, predictions)
    print('Accuracy: ' + repr(accuracy) + '%')
    return accuracy
Ejemplo n.º 4
0
def main():
    """main function"""

    args = get_args()

    print("\n#################")
    print("### Arguments ###")
    print("#################")
    for arg in vars(args):
        print(f"{arg} : {getattr(args, arg)}")
    print("#################\n")

    # get the features

    output_features_filename = get_prefix_file() + ".features"

    if not os.path.exists(output_features_filename) or True:
        
        results_features = get_features(args.arch, 300, pooling=args.pool)
        with open(output_features_filename, 'wb') as output_file:
            pickle.dump(results_features, output_file)
    else:
        print("## features already generated")
        return

    # generate the similarity measures for patch-pair combinations
    results_comparison = compare(results_features, args.distance)

    output_comparison_filename = get_prefix_file() + ".comparison"
    with open(output_comparison_filename, 'wb') as output_file:
        pickle.dump(results_comparison, output_file)

    # generate the top-k accuracy (for k = 1 and 5)
    top1accuracy, top5accuracy = get_accuracy(
        results_comparison, args.distance)
    print("Top-1 and 5 accuracy for size {} : {} and {}\n\n".format(args.size,
                                                                    top1accuracy, top5accuracy))
Ejemplo n.º 5
0
    X_test = pool.map(get_wav, X_test)

    # Convert to MFCC
    if DEBUG:
        print('converting to mfcc')
    X_train = pool.map(to_mfcc, X_train)
    X_test = pool.map(to_mfcc, X_test)

    # Create segments from MFCCs
    X_train, y_train = make_segments(X_train, y_train)
    X_validation, y_validation = make_segments(X_test, y_test)

    # Randomize training segments
    X_train, _, y_train, _ = train_test_split(X_train, y_train, test_size=0)

    # Train model
    model = train_model(np.array(X_train), np.array(y_train), np.array(X_validation),np.array(y_validation))

    # Make predictions on full X_test MFCCs
    y_predicted = accuracy.predict_class_all(create_segmented_mfccs(X_test), model)

    # Print statistics
    print train_count
    print test_count
    print acc_to_beat
    print np.sum(accuracy.confusion_matrix(y_predicted, y_test),axis=1)
    print accuracy.confusion_matrix(y_predicted, y_test)
    print accuracy.get_accuracy(y_predicted,y_test)

    # Save model
    save_model(model, model_filename)
Ejemplo n.º 6
0
    if DEBUG:
        print('Converting to MFCC....')
    X_train = pool.map(to_mfcc, X_train)
    X_test = pool.map(to_mfcc, X_test)

    # Create segments from MFCCs
    X_train, y_train = make_segments(X_train, y_train)
    X_validation, y_validation = make_segments(X_test, y_test)

    # Randomize training segments
    X_train, _, y_train, _ = train_test_split(X_train, y_train, test_size=0.2)

    # Train model
    model = train_model(np.array(X_train), np.array(y_train), np.array(X_validation),np.array(y_validation))

    # Make predictions on full X_test MFCCs
    y_predicted = accuracy.predict_class_all(create_segmented_mfccs(X_test), model)

    # Save model
    save_model(model, model_filename)

    # Print statistics
    print('Training samples:', train_count)
    print('Testing samples:', test_count)
    print('Accuracy to beat:', acc_to_beat)
    print('Confusion matrix of total samples:\n', np.sum(accuracy.confusion_matrix(y_predicted, y_test),axis=1))
    print('Confusion matrix:\n',accuracy.confusion_matrix(y_predicted, y_test))
    print('Accuracy:', accuracy.get_accuracy(y_predicted,y_test))


Ejemplo n.º 7
0
# this part is the main function
from data_process import load_csv, spilt_data
from feature_extract import summary_by_class
from forecast import get_predict
from accuracy import get_accuracy

if __name__ == '__main__':
    filename = 'pima-indians-diabetes.data.csv'
    splitRatio = 0.67
    dataset = load_csv(filename)
    trainingSet, testSet = spilt_data(dataset, splitRatio)
    # prepare model
    summaries = summary_by_class(trainingSet)
    # test model
    predictions = get_predict(summaries, testSet)
    accuracy = get_accuracy(testSet, predictions)
    print accuracy
    print predictions

import numpy as np
import utils as ut
import accuracy as ac

data_set_train, data_sets_test = ut.get_data()

columns = [30, 53]
data_set_train_selected, data_sets_test_selected = ut.select_data(data_set_train, data_sets_test, columns)

data_set_train_selected[:, 0] += 1
data_set_train_selected[:, 0] = np.log(data_set_train_selected[:, 0])

for i in range(len(data_sets_test_selected)):
    data_sets_test_selected[i][:, 0] += 1
    data_sets_test_selected[i][:, 0] = np.log(data_sets_test_selected[i][:, 0])

ac.get_accuracy(data_set_train_selected, data_sets_test_selected)
Ejemplo n.º 9
0
def train_model(model, trainds, testds, config, device, writer=None):
    batch_size = config['data']['batch_size']
    status = config['training']['status']
    epochs = config['training']['epochs']
    balanced_loss = config['loss']['balanced']
    # nval = config['nval']
    nval_tests = config['nval_tests']
    nsave = config['nsave']
    model_save = config['model_save']
    rank = config['rank']
    nranks = config['nranks']
    hvd = config['hvd']
    num_classes = config['data']['num_classes']

    ## create samplers for these datasets
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainds, nranks, rank, shuffle=True, drop_last=True)
    test_sampler = torch.utils.data.distributed.DistributedSampler(
        testds, nranks, rank, shuffle=True, drop_last=True)

    ## create data loaders
    train_loader = torch.utils.data.DataLoader(
        trainds,
        shuffle=False,
        sampler=train_sampler,
        num_workers=config['data']['num_parallel_readers'],
        batch_size=batch_size,
        persistent_workers=True)
    test_loader = torch.utils.data.DataLoader(
        testds,
        shuffle=False,
        sampler=test_sampler,
        num_workers=config['data']['num_parallel_readers'],
        batch_size=batch_size,
        persistent_workers=True)

    loss_func = loss.get_loss(config)
    ave_loss = CalcMean.CalcMean()
    acc_func = accuracy.get_accuracy(config)
    ave_acc = CalcMean.CalcMean()

    opt_func = optimizer.get_optimizer(config)
    opt = opt_func(model.parameters(), **config['optimizer']['args'])

    lrsched_func = optimizer.get_learning_rate_scheduler(config)
    lrsched = lrsched_func(opt, **config['lr_schedule']['args'])

    # Add Horovod Distributed Optimizer
    if hvd:
        opt = hvd.DistributedOptimizer(
            opt, named_parameters=model.named_parameters())

        # Broadcast parameters from rank 0 to all other processes.
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)

    model.to(device)

    for epoch in range(epochs):
        logger.info(' epoch %s of %s', epoch, epochs)

        train_sampler.set_epoch(epoch)
        test_sampler.set_epoch(epoch)
        model.to(device)
        for batch_counter, (inputs, targets, class_weights,
                            nonzero_mask) in enumerate(train_loader):

            # move data to device
            inputs = inputs.to(device)
            targets = targets.to(device)
            class_weights = class_weights.to(device)
            nonzero_mask = nonzero_mask.to(device)

            # zero grads
            opt.zero_grad()
            outputs, endpoints = model(inputs)

            # set the weights
            if balanced_loss:
                weights = class_weights
                nonzero_to_class_scaler = torch.sum(
                    nonzero_mask.type(torch.float32)) / torch.sum(
                        class_weights.type(torch.float32))
            else:
                weights = nonzero_mask
                nonzero_to_class_scaler = torch.ones(1, device=device)

            loss_value = loss_func(outputs, targets.long())
            loss_value = torch.mean(
                loss_value * weights) * nonzero_to_class_scaler

            # backward calc grads
            loss_value.backward()

            # apply grads
            opt.step()

            ave_loss.add_value(float(loss_value.to('cpu')))

            # calc acc
            ave_acc.add_value(
                float(acc_func(outputs, targets, weights).to('cpu')))

            # print statistics
            if batch_counter % status == 0:

                logger.info(
                    '<[%3d of %3d, %5d of %5d]> train loss: %6.4f acc: %6.4f',
                    epoch + 1, epochs, batch_counter,
                    len(trainds) / nranks / batch_size, ave_loss.mean(),
                    ave_acc.mean())

                if writer and rank == 0:
                    global_batch = epoch * len(
                        trainds) / nranks / batch_size + batch_counter
                    writer.add_scalars('loss', {'train': ave_loss.mean()},
                                       global_batch)
                    writer.add_scalars('accuracy', {'train': ave_acc.mean()},
                                       global_batch)
                    #writer.add_histogram('input_trans',endpoints['input_trans'].view(-1),global_batch)

                ave_loss = CalcMean.CalcMean()
                ave_acc = CalcMean.CalcMean()

            # release tensors for memory
            del inputs, targets, weights, endpoints, loss_value

            if config['batch_limiter'] and batch_counter > config[
                    'batch_limiter']:
                logger.info('batch limiter enabled, stop training early')
                break

        # save at end of epoch
        torch.save(model.state_dict(),
                   model_save + '_%05d.torch_model_state_dict' % epoch)

        if nval_tests == -1:
            nval_tests = len(testds) / nranks / batch_size
        logger.info('epoch %s complete, running validation on %s batches',
                    epoch, nval_tests)

        model.to(device)
        # every epoch, evaluate validation data set
        with torch.no_grad():

            vloss = CalcMean.CalcMean()
            vacc = CalcMean.CalcMean()

            vious = [CalcMean.CalcMean() for i in range(num_classes)]

            for valid_batch_counter, (inputs, targets, class_weights,
                                      nonzero_mask) in enumerate(test_loader):

                inputs = inputs.to(device)
                targets = targets.to(device)
                class_weights = class_weights.to(device)
                nonzero_mask = nonzero_mask.to(device)

                # set the weights
                if balanced_loss:
                    weights = class_weights
                    nonzero_to_class_scaler = torch.sum(
                        nonzero_mask.type(torch.float32)) / torch.sum(
                            class_weights.type(torch.float32))
                else:
                    weights = nonzero_mask
                    nonzero_to_class_scaler = torch.ones(1, device=device)

                outputs, endpoints = model(inputs)

                loss_value = loss_func(outputs, targets.long())
                loss_value = torch.mean(
                    loss_value * weights) * nonzero_to_class_scaler
                vloss.add_value(float(loss_value.to('cpu')))

                # calc acc
                vacc.add_value(
                    float(acc_func(outputs, targets, weights).to('cpu')))

                # calc ious
                ious = get_ious(outputs, targets, weights, num_classes)
                for i in range(num_classes):
                    vious[i].add_value(float(ious[i]))

                if valid_batch_counter > nval_tests:
                    break

            mean_acc = vacc.mean()
            mean_loss = vloss.mean()
            # if config['hvd'] is not None:
            #    mean_acc  = config['hvd'].allreduce(torch.tensor([mean_acc]))
            #    mean_loss = config['hvd'].allreduce(torch.tensor([mean_loss]))
            mious = float(
                torch.sum(torch.FloatTensor([x.mean()
                                             for x in vious]))) / num_classes
            ious_out = {
                'jet': vious[0].mean(),
                'electron': vious[1].mean(),
                'bkgd': vious[2].mean(),
                'all': mious
            }
            # add validation to tensorboard
            if writer and rank == 0:
                global_batch = epoch * len(
                    trainds) / nranks / batch_size + batch_counter
                writer.add_scalars('loss', {'valid': mean_loss}, global_batch)
                writer.add_scalars('accuracy', {'valid': mean_acc},
                                   global_batch)
                writer.add_scalars('IoU', ious_out, global_batch)

            logger.warning(
                '>[%3d of %3d, %5d of %5d]<<< ave valid loss: %6.4f ave valid acc: %6.4f on %s batches >>>',
                epoch + 1, epochs, batch_counter,
                len(trainds) / nranks / batch_size, mean_loss, mean_acc,
                valid_batch_counter + 1)
            logger.warning('      >> ious: %s', ious_out)

        # update learning rate
        lrsched.step()
Ejemplo n.º 10
0
    #########################################################
    ## TESTING
    #########################################################
    # Calculating accuracy by testing on the entire dataset

    x_test = Variable(torch.from_numpy(X_test))

    y_real = y_test
    output = classifier(x_test.float())
    y_pred = []
    for y in output:
        index_max = np.argmax(y.detach().numpy())
        y_pred.append(index_max)

    accuracy = get_accuracy(y_pred, y_real)
    print("Accuracy = {}%".format(accuracy))

    print("Confusion matrix : \n", confusion_matrix(y_real, y_pred))
    target_names = ['A', 'B', 'C', 'D', 'E']
    print(
        "Classification Report : \n",
        classification_report(y_real,
                              y_pred,
                              target_names=target_names,
                              zero_division=0))

    # f = open("res.txt", "a")
    # f.write("{}%\n".format(accuracy))
    # f.close()
Ejemplo n.º 11
0
def train(config, train_data, encoder, decoder):
    # loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    best_decoder = decoder
    best_encoder = encoder
    loss_function_1 = nn.NLLLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate)
    dec_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate)
    train_loss_over_epochs = []
    val_accuracy_over_epochs_slot = []
    val_accuracy_over_epochs_intent = []
    best_val = 0
    for epoch in range(config.epochs):
        losses = []
        losses_overepoch = []
        count = 0
        for i, batch in enumerate(getBatch(config.batch_size, train_data)):
            count = count + 1
            x, y_1, y_2 = zip(*batch)
            x = torch.cat(x)
            tag_target = torch.cat(y_1).view(-1)
            intent_target = torch.cat(y_2)
            x_mask = torch.cat([Variable(torch.BoolTensor(tuple(map(lambda s: s == 0, t.data)))) for t in x])\
                .view(config.batch_size, -1)

            encoder.zero_grad()
            decoder.zero_grad()

            output, hidden_c = encoder(x, x_mask)
            start_decode = Variable(torch.LongTensor([[0] * config.batch_size
                                                      ])).transpose(1, 0)
            # pdb.set_trace()
            tag_score, intent_score = decoder(start_decode, hidden_c, output,
                                              x_mask)

            loss_1 = loss_function_1(tag_score, tag_target)
            loss_2 = loss_function_2(intent_score, intent_target)

            #loss = 0.4*loss_1+0.6*loss_2
            loss = loss_1 + loss_2
            losses.append(loss.item())
            losses_overepoch.append(loss.item())
            loss.backward()

            torch.nn.utils.clip_grad_norm_(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm_(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 10 == 0:
                print("Epoch", epoch, " batch", i, " : ", np.mean(losses))
                losses = []
        val_accuracy_slot, val_accuracy_intent = get_accuracy(encoder, decoder)
        print(val_accuracy_slot)
        print(val_accuracy_intent)

        if epoch == 1:
            best_val = val_accuracy_slot
        if val_accuracy_slot > best_val:
            best_val = val_accuracy_slot
            best_decoder = decoder
            best_encoder = encoder
            #best_net = net.parameters
        train_loss_over_epochs.append(np.mean(losses_overepoch))
        val_accuracy_over_epochs_slot.append(val_accuracy_slot)
        val_accuracy_over_epochs_intent.append(val_accuracy_intent)
    if not os.path.exists(config.model_dir):
        os.makedirs(config.model_dir)

    # pdb.set_trace()
    plt.figure(0)
    plt.subplot(2, 1, 1)
    plt.ylabel('Train loss')
    plt.plot(np.arange(config.epochs), train_loss_over_epochs, 'k-')
    plt.title('train loss and slot filling accuracy on validation set')
    plt.xticks(np.arange(config.epochs, dtype=int))
    plt.grid(True)

    plt.subplot(2, 1, 2)
    plt.plot(np.arange(config.epochs), val_accuracy_over_epochs_slot, 'b-')
    plt.ylabel('Slot filling accuracy')
    plt.xlabel('Epochs')
    plt.xticks(np.arange(config.epochs, dtype=int))
    plt.grid(True)
    plt.savefig("plot1.png")

    plt.figure(1)
    plt.subplot(2, 1, 1)
    plt.ylabel('Train loss')
    plt.plot(np.arange(config.epochs), train_loss_over_epochs, 'k-')
    plt.title(
        'train loss and intent classification accuracy on validation set')
    plt.xticks(np.arange(config.epochs, dtype=int))
    plt.grid(True)

    plt.subplot(2, 1, 2)
    plt.plot(np.arange(config.epochs), val_accuracy_over_epochs_intent, 'b-')
    plt.ylabel('Intent classification accuracy')
    plt.xlabel('Epochs')
    plt.xticks(np.arange(config.epochs, dtype=int))
    plt.grid(True)
    plt.savefig("plot2.png")

    print('Finished Training')
    torch.save(best_decoder.state_dict(),
               os.path.join(config.model_dir, 'jointnlu-decoder.pkl'))
    torch.save(best_encoder.state_dict(),
               os.path.join(config.model_dir, 'jointnlu-encoder.pkl'))
    print("Train Complete!")
Ejemplo n.º 12
0
import utils as ut
import accuracy as ac

data_set_train, data_sets_test = ut.get_data()
ac.get_accuracy(data_set_train, data_sets_test)
Ejemplo n.º 13
0
        # print (trainer)

        # Train model
        model = train_model(np.array(X_train), np.array(y_train),
                            np.array(X_validation), np.array(y_validation))
        # model = load_model("model.h5")
        # predicted = model.predict (k.create_segmented_mfccs(X_test))
        # for i in predicted:
        #     print (i)
        # Make predictions on full X_test MFCCs
        y_predicted = accuracy.predict_class_all(
            k.create_segmented_mfccs(X_test), model)
        # print (y_predicted)
        # for i in y_predicted:
        #     print (i)
        # Print statistics
        print('Training samples:', train_count)
        print('Testing samples:', test_count)
        print('Accuracy to beat:', acc_to_beat)
        print('Confusion matrix of total samples:\n',
              np.sum(accuracy.confusion_matrix(y_predicted, y_test), axis=1))
        print('Confusion matrix:\n',
              accuracy.confusion_matrix(y_predicted, y_test))
        print('Accuracy:', accuracy.get_accuracy(y_predicted, y_test))

        results.append(accuracy.confusion_matrix(y_predicted, y_test))
        acc.append(accuracy.get_accuracy(y_predicted, y_test))
        # Save model
    print(results)
    print(acc)
    save_model(model, model_filename)