Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    model = L.Classifier(network.MLP(args.unit, 10))
    if args.gpu >= 0:
        # Make a speciied GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    sum_accuracy = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.data) * len(t.data)
        sum_accuracy += float(model.accuracy.data) * len(t.data)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            for batch in test_iter:
                x_array, t_array = convert.concat_examples(batch, args.gpu)
                x = chainer.Variable(x_array)
                t = chainer.Variable(t_array)
                loss = model(x, t)
                sum_loss += float(loss.data) * len(t.data)
                sum_accuracy += float(model.accuracy.data) * len(t.data)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('mlp.model', model)
    print('save the optimizer')
    serializers.save_npz('mlp.state', optimizer)
Esempio n. 2
0
def main():
    '''
    Main block

    Steps:
    1. Pull data (MNIST)
    2. Initialise network
    3. Train network
    4. Save weights
    '''

    DATA = download_mnist.load_mnist()
    validation = []
    for key in ['fold-{f}'.format(f=f) for f in range(4)]:
        validation += DATA[key]
    validation = np.array(validation)

    epochs = 8
    initial_lr = 8e-3
    final_lr = 8e-6

    if args.question in ["1", "2", "5"]:
        model = network.MLP([784, 1000, 500, 250, 10])

        train_losses,val_losses,test_losses,\
        train_accuracies,val_accuracies,test_accuracies\
        =model.fit(np.array(DATA['train']),validation,np.array(DATA['fold-4']),\
        epochs=epochs,\
        initial_lr=initial_lr,\
        final_lr=final_lr)
        print(val_losses, test_losses)

        helper.plot_loss([train_losses, val_losses, test_losses],
                         epochs=epochs,
                         name="sigmoid_loss")
        helper.plot_accuracy(
            [train_accuracies, val_accuracies, test_accuracies],
            epochs=epochs,
            name="sigmoid_accuracy")
        run_stats(model, DATA, tag="sigmoid")

    elif args.question == "3":
        epochs = 4
        initial_lr = 8e-1
        final_lr = 8e-6
        variance = 0.00001

        model= network.MLP([784,1000,500,250,10],activation="relu",\
        variance=variance)

        train_losses,val_losses,test_losses,\
        train_accuracies,val_accuracies,test_accuracies\
        =model.fit(np.array(DATA['train']),validation,np.array(DATA['fold-4']),\
        epochs=epochs,\
        initial_lr=initial_lr,\
        final_lr=final_lr)
        print(val_losses, test_losses)

        helper.plot_loss([train_losses, val_losses, test_losses],
                         epochs=epochs,
                         name="relu_loss")
        helper.plot_accuracy(
            [train_accuracies, val_accuracies, test_accuracies],
            epochs=epochs,
            name="sigmoid_accuracy")
        run_stats(model, DATA, tag="relu")

    elif args.question == "4":
        train_data = noise_addition(DATA['train'], sigma=1e-3)

        model = network.MLP([784, 1000, 500, 250, 10])

        train_losses,val_losses,test_losses,\
        train_accuracies,val_accuracies,test_accuracies\
        =model.fit(np.array(train_data),validation,np.array(DATA['fold-4']),\
        l2=0.1,\
        l1=0.01,\
        epochs=epochs,\
        initial_lr=initial_lr,\
        final_lr=final_lr)

        helper.plot_loss([train_losses, val_losses, test_losses],
                         epochs=epochs,
                         name="sigmoid_regularised_loss")
        helper.plot_accuracy(
            [train_accuracies, val_accuracies, test_accuracies],
            epochs=epochs,
            name="sigmoid_regularised_accuracy")
        run_stats(model, DATA, tag="sigmoid_regularised")

    elif args.question == "6":
        epochs = 10
        initial_lr = 8e-4
        final_lr = 8e-6
        variance = 0.001

        model = network.MLP([64, 32, 10])
        train_data = preprocess(DATA['train'])
        val_data = np.array(preprocess(validation))
        test_data = np.array(preprocess(DATA['fold-4']))
        print(val_data.shape)

        train_losses,val_losses,test_losses,\
        train_accuracies,val_accuracies,test_accuracies\
        =model.fit(train_data,val_data,test_data,\
        epochs=epochs,\
        l2=0.1,\
        l1=0.01,\
        initial_lr=initial_lr,\
        final_lr=final_lr)
        print(val_losses, test_losses)

        DATA_HOG_fold = {
            'fold-{f}'.format(f=f): preprocess(DATA['fold-{f}'.format(f=f)])
            for f in range(4)
        }

        helper.plot_loss([train_losses, val_losses, test_losses],
                         epochs=epochs,
                         name="sigmoid_HOG_loss")
        helper.plot_accuracy(
            [train_accuracies, val_accuracies, test_accuracies],
            epochs=epochs,
            name="sigmoid_HOG_accuracy")
        run_stats(model, DATA_HOG_fold, tag="sigmoid")

    elif args.question == "7":
        train_data = np.array(preprocess(DATA['train']))
        val_data = np.array(preprocess(validation))
        test_data = np.array(preprocess(DATA['fold-4']))

        svc = svm.SVC(kernel='linear')
        labels = np.array([
            np.where(train_data[:, 1][x] == 1)[0][0]
            for x in range(len(train_data[:, 1]))
        ])
        labels = np.array(labels).reshape((len(labels), 1))

        train_data = np.concatenate(train_data[:, 0], axis=1)
        svc.fit(train_data, labels)

        y_true = np.array([
            np.where(test_data[:, 1][x] == 1)[0][0]
            for x in range(len(test_data[:, 1]))
        ])
        test_data = np.vstack(test_data[:, 0])
        y_pred = svc.predict(test_data)

        print(sklearn.metrics.accuracy_score(y_true, y_pred))
        print(sklearn.metrics.classification_report(y_true, y_pred))

    else:
        print("Invalid question {}".format(args.question))
Esempio n. 3
0
test_image_data, test_label_data= test_data['image'], test_data['label']

print("Testing Images: ",test_image_data.shape)
print("Testing Labels: ",test_label_data.shape)

Testing_Data = []
for i in range(len(test_image_data)):
    Testing_Data.append([test_image_data[i][::d_rate,::d_rate], test_label_data[i]])
np.random.shuffle(Testing_Data)

plt.imshow(Testing_Data[2][0], cmap = 'gray')
plt.title("Class "+ str(Testing_Data[2][1]))
plt.show()
plt.clf()

for test in Testing_Data:
    test[0] /= 2550
    chain = itertools.chain(*test[0])
    test[0] = list(chain)
    
    foo = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    foo[int(test[1])] = 1
    test[1] = foo

#print a sample to make sure data is loaded correctly
print("Label: ", Testing_Data[2][1])

#arguments: (shape, outputlayer, randominit, learningrate, epochs)
whatever = 196
nn = network.MLP([int(784/d_rate/d_rate), int(whatever), int(whatever*0.5), int(whatever*0.5), 2], n_outputs=10, RandomInit=True, LearningRate=0.01, Epochs=20)
nn.Trainer(Training_Data, Testing_Data, Visualize=True)
Esempio n. 4
0
    "training_percentage": 0.85,
    "n_input": 22,
    "n_classes": 1,
    "learning_rate": 0.01,
    "batch_size": 128,
    "save_step": 60000,
    "training_iterations": 1000000,
    "training": True,
    "dropout": 0.75
}

x = tf.placeholder("float", [None, config["n_input"]])
y = tf.placeholder("float", [None])
keep_prob = tf.placeholder("float")

pred = network.MLP(x, keep_prob, config)
n_samples = tf.cast(tf.shape(x)[0], tf.float32)
cost = tf.reduce_sum(tf.pow(pred-y, 2))/(2*n_samples)
optimizer = tf.train.AdamOptimizer(learning_rate=config["learning_rate"]).minimize(cost)
accuracy = tf.reduce_mean(tf.abs(tf.sub(pred, y)))

d = data.Data(config)

# Launch the graph
with tf.Session() as sess:
    saver = tf.train.Saver()
    init = tf.global_variables_initializer()
    sess.run(init)
    # saver = tf.train.Saver()
    # saver.restore(sess, "models/23/n-hidden-16/660000.ckpt")
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=256,
                        help='Number of images in each mini-batch')
    parser.add_argument('--batchsize2',
                        '-b2',
                        type=int,
                        default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--data_type', '-d', type=str, default='LSHTC1')
    parser.add_argument('--model_type', '-m', type=str, default='DocModel')
    parser.add_argument('--model_path',
                        '-mp',
                        type=str,
                        default='./models/ResNet50_model_500.npz')
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--cluster', '-c', type=int, default=100)
    parser.add_argument('--weight_decay', '-w', type=float, default=0.0000)
    parser.add_argument('--unit', '-u', type=int, default=300)
    parser.add_argument('--alpha', '-a', type=float, default=0.005)
    parser.add_argument('--epoch', '-e', type=int, default=10)
    parser.add_argument('--epoch2', '-e2', type=int, default=10)
    parser.add_argument('--mu', '-mu', type=float, default=30.0)
    parser.add_argument('--out', '-o', type=str, default='results')

    parser.add_argument('--train_file',
                        '-train_f',
                        type=str,
                        default='dataset/LSHTC1/LSHTC1_selected03.train')
    parser.add_argument('--test_file',
                        '-test_f',
                        type=str,
                        default='dataset/LSHTC1/LSHTC1_selected03.test')

    parser.add_argument('--train_instance',
                        '-train_i',
                        type=str,
                        default='PDSparse/examples/LSHTC1/LSHTC1.train')
    parser.add_argument('--train_label',
                        '-train_l',
                        type=str,
                        default='PDSparse/examples/LSHTC1/LSHTC1.train')
    parser.add_argument('--test_instance',
                        '-test_i',
                        type=str,
                        default='PDSparse/examples/LSHTC1/LSHTC1.train')
    parser.add_argument('--test_label',
                        '-test_l',
                        type=str,
                        default='PDSparse/examples/LSHTC1/LSHTC1.train')

    parser.add_argument('--seed', '-s', type=int, default=0)
    parser.add_argument('--resume',
                        '-r',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--resume2',
                        '-r2',
                        default='',
                        help='resume the training from snapshot')
    parser.add_argument('--optimizer', '-op', type=str, default='Adam')
    parser.add_argument('--optimizer2', '-op2', type=str, default='Adam')
    parser.add_argument('--initial_lr', type=float, default=0.05)
    parser.add_argument('--lr_decay_rate', type=float, default=0.5)
    parser.add_argument('--lr_decay_epoch', type=float, default=25)
    parser.add_argument('--random',
                        action='store_true',
                        default=False,
                        help='Use random assignment or not')
    parser.add_argument('--valid',
                        '--v',
                        action='store_true',
                        help='Use random assignment or not')
    args = parser.parse_args()

    random.seed(args.seed)
    np.random.seed(args.seed)

    gpu = args.gpu
    data_type = args.data_type
    model_type = args.model_type
    num_clusters = args.cluster
    initial_lr = args.initial_lr
    lr_decay_rate = args.lr_decay_rate
    lr_decay_epoch = args.lr_decay_epoch
    opt1 = args.optimizer
    opt2 = args.optimizer2
    model_path = args.model_path
    rand_assign = args.random
    train_file = args.train_file
    test_file = args.test_file

    unit = args.unit
    alpha = args.alpha
    sparse = False

    ndim = 1
    n_in = None
    train_transform = None
    test_transform = None
    if data_type == 'toy':
        model = network.LinearModel(2, 2)
        num_classes = 4
    elif data_type == 'mnist':
        num_classes = 10
        if model_type == 'linear':
            model = network.LinearModel(784, num_clusters)
        elif model_type == 'DNN':
            model = network.MLP(1000, num_clusters)
        elif model_type == 'CNN':
            ndim = 3
            model = network.CNN(num_clusters)
        else:
            raise ValueError
    elif data_type == 'cifar100':
        num_classes = 100
        train_transform = partial(dataset.transform,
                                  mean=0.0,
                                  std=1.0,
                                  train=True)
        test_transform = partial(dataset.transform,
                                 mean=0.0,
                                 std=1.0,
                                 train=False)
        if model_type == 'Resnet50':
            model = network.ResNet50(num_clusters)
            n_in = 2048
            load_npz(model_path, model, not_load_list=['fc7'])
        elif model_type == 'VGG':
            model = network.VGG(num_clusters)
            n_in = 1024
            load_npz(model_path, model, not_load_list=['fc6'])
        else:
            raise ValueError
    elif data_type == 'LSHTC1':
        sparse = True
        num_classes = None
        if model_type == 'DocModel':
            model = network.DocModel(n_in=1024, n_mid=unit, n_out=num_clusters)
        elif model_type == 'DocModel2':
            model = network.DocModel2(n_in=1024,
                                      n_mid=unit,
                                      n_out=num_clusters)
        elif model_type == 'linear':
            model = network.LinearModel(n_in=92586, n_out=num_clusters)
        else:
            raise ValueError
    elif data_type == 'Dmoz':
        sparse = True
        num_classes = None
        if model_type == 'DocModel':
            model = network.DocModel(n_in=561127,
                                     n_mid=unit,
                                     n_out=num_clusters)
        elif model_type == 'linear':
            model = network.LinearModel(n_in=1024, n_out=num_clusters)
        else:
            raise ValueError
    else:
        num_classes = 10
        if model_type == 'Resnet50':
            model = network.ResNet50(num_clusters)
        elif model_type == 'Resnet101':
            model = network.ResNet101(num_clusters)
        elif model_type == 'VGG':
            model = network.VGG(num_clusters)
        elif model_type == 'CNN':
            model = network.CNN(num_clusters)
        else:
            raise ValueError

    if gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    (train_instances, train_labels), (test_instances, test_labels), num_classes \
        = load_data(data_type, ndim, train_file, test_file)

    if rand_assign:
        assignment, count_classes = random_assignment(num_clusters,
                                                      num_classes)
    else:
        if opt1 == 'Adam':
            optimizer = chainer.optimizers.Adam(alpha=alpha)
        else:
            optimizer = chainer.optimizers.SGD(lr=alpha)
        optimizer.setup(model)

        train = Dataset(*(train_instances, train_labels), sparse)
        test = Dataset(*(test_instances, test_labels), sparse)

        train_iter = chainer.iterators.SerialIterator(
            train, batch_size=args.batchsize)

        train_updater = Updater(model,
                                train,
                                train_iter,
                                optimizer,
                                num_clusters=num_clusters,
                                device=gpu,
                                mu=args.mu)

        trainer = training.Trainer(train_updater, (args.epoch, 'epoch'),
                                   out=args.out)

        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'iteration', 'main/loss', 'main/loss_cc',
                'main/loss_mut_info', 'main/H_Y', 'main/H_YX', 'elapsed_time'
            ]))
        trainer.extend(extensions.snapshot(), trigger=(5, 'epoch'))

        if args.resume:
            chainer.serializers.load_npz(args.resume, trainer)

        trainer.run()
        """
        end clustering
        """
        """
        res, ss = check_cluster(model, train, num_classes, num_clusters, device=gpu)
        res_sum = tuple(0 for _ in range(num_clusters))
        for i in range(num_classes):
            res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters))
        print(res, res_sum, ss)
        """
        """
        res, ss = check_cluster(model, test, num_classes, num_clusters, device=gpu)
        res_sum = tuple(0 for _ in range(num_clusters))
        for i in range(num_classes):
            res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters))
        """
        cluster_label = separate.det_cluster(model,
                                             train,
                                             num_classes,
                                             batchsize=128,
                                             device=gpu,
                                             sparse=sparse)

        assignment, count_classes = separate.assign(cluster_label, num_classes,
                                                    num_clusters)

        del optimizer
        del train_iter
        del train_updater
        del trainer
        del train
        del test

    print(count_classes)
    """
    start classification
    """
    model = h_net.HierarchicalNetwork(model,
                                      num_clusters,
                                      count_classes,
                                      n_in=n_in)
    if opt2 == 'Adam':
        optimizer2 = chainer.optimizers.Adam(alpha=initial_lr)
    elif opt2 == 'SGD':
        optimizer2 = chainer.optimizers.SGD(lr=initial_lr)
    else:
        optimizer2 = chainer.optimizers.MomentumSGD(lr=initial_lr)
    optimizer2.setup(model)
    if args.weight_decay > 0:
        optimizer2.add_hook(chainer.optimizer.WeightDecay(args.weight_decay))

    if gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    train = dataset.Dataset(train_instances,
                            train_labels,
                            assignment,
                            _transform=train_transform,
                            sparse=sparse)
    test = dataset.Dataset(test_instances,
                           test_labels,
                           assignment,
                           _transform=test_transform,
                           sparse=sparse)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=args.batchsize2)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 batch_size=1,
                                                 repeat=False)

    train_updater = updater.Updater(model,
                                    train,
                                    train_iter,
                                    optimizer2,
                                    num_clusters,
                                    device=gpu)

    trainer = training.Trainer(train_updater, (args.epoch2, 'epoch'), args.out)

    acc = accuracy.Accuracy(model, assignment, num_clusters)
    trainer.extend(extensions.Evaluator(test_iter, acc, device=gpu))
    """
    trainer.extend(
        extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'),
        trigger=(20, 'epoch'))
    """
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch')))
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/loss_cluster', 'main/loss_class',
            'validation/main/accuracy', 'validation/main/cluster_accuracy',
            'validation/main/loss', 'elapsed_time'
        ]))

    if opt2 != 'Adam':
        trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate),
                       trigger=(lr_decay_epoch, 'epoch'))

    if args.resume2:
        chainer.serializers.load_npz(args.resume2, trainer)

    trainer.run()