def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train model = L.Classifier(network.MLP(args.unit, 10)) if args.gpu >= 0: # Make a speciied GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): ''' Main block Steps: 1. Pull data (MNIST) 2. Initialise network 3. Train network 4. Save weights ''' DATA = download_mnist.load_mnist() validation = [] for key in ['fold-{f}'.format(f=f) for f in range(4)]: validation += DATA[key] validation = np.array(validation) epochs = 8 initial_lr = 8e-3 final_lr = 8e-6 if args.question in ["1", "2", "5"]: model = network.MLP([784, 1000, 500, 250, 10]) train_losses,val_losses,test_losses,\ train_accuracies,val_accuracies,test_accuracies\ =model.fit(np.array(DATA['train']),validation,np.array(DATA['fold-4']),\ epochs=epochs,\ initial_lr=initial_lr,\ final_lr=final_lr) print(val_losses, test_losses) helper.plot_loss([train_losses, val_losses, test_losses], epochs=epochs, name="sigmoid_loss") helper.plot_accuracy( [train_accuracies, val_accuracies, test_accuracies], epochs=epochs, name="sigmoid_accuracy") run_stats(model, DATA, tag="sigmoid") elif args.question == "3": epochs = 4 initial_lr = 8e-1 final_lr = 8e-6 variance = 0.00001 model= network.MLP([784,1000,500,250,10],activation="relu",\ variance=variance) train_losses,val_losses,test_losses,\ train_accuracies,val_accuracies,test_accuracies\ =model.fit(np.array(DATA['train']),validation,np.array(DATA['fold-4']),\ epochs=epochs,\ initial_lr=initial_lr,\ final_lr=final_lr) print(val_losses, test_losses) helper.plot_loss([train_losses, val_losses, test_losses], epochs=epochs, name="relu_loss") helper.plot_accuracy( [train_accuracies, val_accuracies, test_accuracies], epochs=epochs, name="sigmoid_accuracy") run_stats(model, DATA, tag="relu") elif args.question == "4": train_data = noise_addition(DATA['train'], sigma=1e-3) model = network.MLP([784, 1000, 500, 250, 10]) train_losses,val_losses,test_losses,\ train_accuracies,val_accuracies,test_accuracies\ =model.fit(np.array(train_data),validation,np.array(DATA['fold-4']),\ l2=0.1,\ l1=0.01,\ epochs=epochs,\ initial_lr=initial_lr,\ final_lr=final_lr) helper.plot_loss([train_losses, val_losses, test_losses], epochs=epochs, name="sigmoid_regularised_loss") helper.plot_accuracy( [train_accuracies, val_accuracies, test_accuracies], epochs=epochs, name="sigmoid_regularised_accuracy") run_stats(model, DATA, tag="sigmoid_regularised") elif args.question == "6": epochs = 10 initial_lr = 8e-4 final_lr = 8e-6 variance = 0.001 model = network.MLP([64, 32, 10]) train_data = preprocess(DATA['train']) val_data = np.array(preprocess(validation)) test_data = np.array(preprocess(DATA['fold-4'])) print(val_data.shape) train_losses,val_losses,test_losses,\ train_accuracies,val_accuracies,test_accuracies\ =model.fit(train_data,val_data,test_data,\ epochs=epochs,\ l2=0.1,\ l1=0.01,\ initial_lr=initial_lr,\ final_lr=final_lr) print(val_losses, test_losses) DATA_HOG_fold = { 'fold-{f}'.format(f=f): preprocess(DATA['fold-{f}'.format(f=f)]) for f in range(4) } helper.plot_loss([train_losses, val_losses, test_losses], epochs=epochs, name="sigmoid_HOG_loss") helper.plot_accuracy( [train_accuracies, val_accuracies, test_accuracies], epochs=epochs, name="sigmoid_HOG_accuracy") run_stats(model, DATA_HOG_fold, tag="sigmoid") elif args.question == "7": train_data = np.array(preprocess(DATA['train'])) val_data = np.array(preprocess(validation)) test_data = np.array(preprocess(DATA['fold-4'])) svc = svm.SVC(kernel='linear') labels = np.array([ np.where(train_data[:, 1][x] == 1)[0][0] for x in range(len(train_data[:, 1])) ]) labels = np.array(labels).reshape((len(labels), 1)) train_data = np.concatenate(train_data[:, 0], axis=1) svc.fit(train_data, labels) y_true = np.array([ np.where(test_data[:, 1][x] == 1)[0][0] for x in range(len(test_data[:, 1])) ]) test_data = np.vstack(test_data[:, 0]) y_pred = svc.predict(test_data) print(sklearn.metrics.accuracy_score(y_true, y_pred)) print(sklearn.metrics.classification_report(y_true, y_pred)) else: print("Invalid question {}".format(args.question))
test_image_data, test_label_data= test_data['image'], test_data['label'] print("Testing Images: ",test_image_data.shape) print("Testing Labels: ",test_label_data.shape) Testing_Data = [] for i in range(len(test_image_data)): Testing_Data.append([test_image_data[i][::d_rate,::d_rate], test_label_data[i]]) np.random.shuffle(Testing_Data) plt.imshow(Testing_Data[2][0], cmap = 'gray') plt.title("Class "+ str(Testing_Data[2][1])) plt.show() plt.clf() for test in Testing_Data: test[0] /= 2550 chain = itertools.chain(*test[0]) test[0] = list(chain) foo = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] foo[int(test[1])] = 1 test[1] = foo #print a sample to make sure data is loaded correctly print("Label: ", Testing_Data[2][1]) #arguments: (shape, outputlayer, randominit, learningrate, epochs) whatever = 196 nn = network.MLP([int(784/d_rate/d_rate), int(whatever), int(whatever*0.5), int(whatever*0.5), 2], n_outputs=10, RandomInit=True, LearningRate=0.01, Epochs=20) nn.Trainer(Training_Data, Testing_Data, Visualize=True)
"training_percentage": 0.85, "n_input": 22, "n_classes": 1, "learning_rate": 0.01, "batch_size": 128, "save_step": 60000, "training_iterations": 1000000, "training": True, "dropout": 0.75 } x = tf.placeholder("float", [None, config["n_input"]]) y = tf.placeholder("float", [None]) keep_prob = tf.placeholder("float") pred = network.MLP(x, keep_prob, config) n_samples = tf.cast(tf.shape(x)[0], tf.float32) cost = tf.reduce_sum(tf.pow(pred-y, 2))/(2*n_samples) optimizer = tf.train.AdamOptimizer(learning_rate=config["learning_rate"]).minimize(cost) accuracy = tf.reduce_mean(tf.abs(tf.sub(pred, y))) d = data.Data(config) # Launch the graph with tf.Session() as sess: saver = tf.train.Saver() init = tf.global_variables_initializer() sess.run(init) # saver = tf.train.Saver() # saver.restore(sess, "models/23/n-hidden-16/660000.ckpt")
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=256, help='Number of images in each mini-batch') parser.add_argument('--batchsize2', '-b2', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--data_type', '-d', type=str, default='LSHTC1') parser.add_argument('--model_type', '-m', type=str, default='DocModel') parser.add_argument('--model_path', '-mp', type=str, default='./models/ResNet50_model_500.npz') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--cluster', '-c', type=int, default=100) parser.add_argument('--weight_decay', '-w', type=float, default=0.0000) parser.add_argument('--unit', '-u', type=int, default=300) parser.add_argument('--alpha', '-a', type=float, default=0.005) parser.add_argument('--epoch', '-e', type=int, default=10) parser.add_argument('--epoch2', '-e2', type=int, default=10) parser.add_argument('--mu', '-mu', type=float, default=30.0) parser.add_argument('--out', '-o', type=str, default='results') parser.add_argument('--train_file', '-train_f', type=str, default='dataset/LSHTC1/LSHTC1_selected03.train') parser.add_argument('--test_file', '-test_f', type=str, default='dataset/LSHTC1/LSHTC1_selected03.test') parser.add_argument('--train_instance', '-train_i', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--train_label', '-train_l', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--test_instance', '-test_i', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--test_label', '-test_l', type=str, default='PDSparse/examples/LSHTC1/LSHTC1.train') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--resume2', '-r2', default='', help='resume the training from snapshot') parser.add_argument('--optimizer', '-op', type=str, default='Adam') parser.add_argument('--optimizer2', '-op2', type=str, default='Adam') parser.add_argument('--initial_lr', type=float, default=0.05) parser.add_argument('--lr_decay_rate', type=float, default=0.5) parser.add_argument('--lr_decay_epoch', type=float, default=25) parser.add_argument('--random', action='store_true', default=False, help='Use random assignment or not') parser.add_argument('--valid', '--v', action='store_true', help='Use random assignment or not') args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) gpu = args.gpu data_type = args.data_type model_type = args.model_type num_clusters = args.cluster initial_lr = args.initial_lr lr_decay_rate = args.lr_decay_rate lr_decay_epoch = args.lr_decay_epoch opt1 = args.optimizer opt2 = args.optimizer2 model_path = args.model_path rand_assign = args.random train_file = args.train_file test_file = args.test_file unit = args.unit alpha = args.alpha sparse = False ndim = 1 n_in = None train_transform = None test_transform = None if data_type == 'toy': model = network.LinearModel(2, 2) num_classes = 4 elif data_type == 'mnist': num_classes = 10 if model_type == 'linear': model = network.LinearModel(784, num_clusters) elif model_type == 'DNN': model = network.MLP(1000, num_clusters) elif model_type == 'CNN': ndim = 3 model = network.CNN(num_clusters) else: raise ValueError elif data_type == 'cifar100': num_classes = 100 train_transform = partial(dataset.transform, mean=0.0, std=1.0, train=True) test_transform = partial(dataset.transform, mean=0.0, std=1.0, train=False) if model_type == 'Resnet50': model = network.ResNet50(num_clusters) n_in = 2048 load_npz(model_path, model, not_load_list=['fc7']) elif model_type == 'VGG': model = network.VGG(num_clusters) n_in = 1024 load_npz(model_path, model, not_load_list=['fc6']) else: raise ValueError elif data_type == 'LSHTC1': sparse = True num_classes = None if model_type == 'DocModel': model = network.DocModel(n_in=1024, n_mid=unit, n_out=num_clusters) elif model_type == 'DocModel2': model = network.DocModel2(n_in=1024, n_mid=unit, n_out=num_clusters) elif model_type == 'linear': model = network.LinearModel(n_in=92586, n_out=num_clusters) else: raise ValueError elif data_type == 'Dmoz': sparse = True num_classes = None if model_type == 'DocModel': model = network.DocModel(n_in=561127, n_mid=unit, n_out=num_clusters) elif model_type == 'linear': model = network.LinearModel(n_in=1024, n_out=num_clusters) else: raise ValueError else: num_classes = 10 if model_type == 'Resnet50': model = network.ResNet50(num_clusters) elif model_type == 'Resnet101': model = network.ResNet101(num_clusters) elif model_type == 'VGG': model = network.VGG(num_clusters) elif model_type == 'CNN': model = network.CNN(num_clusters) else: raise ValueError if gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU (train_instances, train_labels), (test_instances, test_labels), num_classes \ = load_data(data_type, ndim, train_file, test_file) if rand_assign: assignment, count_classes = random_assignment(num_clusters, num_classes) else: if opt1 == 'Adam': optimizer = chainer.optimizers.Adam(alpha=alpha) else: optimizer = chainer.optimizers.SGD(lr=alpha) optimizer.setup(model) train = Dataset(*(train_instances, train_labels), sparse) test = Dataset(*(test_instances, test_labels), sparse) train_iter = chainer.iterators.SerialIterator( train, batch_size=args.batchsize) train_updater = Updater(model, train, train_iter, optimizer, num_clusters=num_clusters, device=gpu, mu=args.mu) trainer = training.Trainer(train_updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/loss_cc', 'main/loss_mut_info', 'main/H_Y', 'main/H_YX', 'elapsed_time' ])) trainer.extend(extensions.snapshot(), trigger=(5, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() """ end clustering """ """ res, ss = check_cluster(model, train, num_classes, num_clusters, device=gpu) res_sum = tuple(0 for _ in range(num_clusters)) for i in range(num_classes): res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters)) print(res, res_sum, ss) """ """ res, ss = check_cluster(model, test, num_classes, num_clusters, device=gpu) res_sum = tuple(0 for _ in range(num_clusters)) for i in range(num_classes): res_sum = tuple(res_sum[j] + res[i][j] for j in range(num_clusters)) """ cluster_label = separate.det_cluster(model, train, num_classes, batchsize=128, device=gpu, sparse=sparse) assignment, count_classes = separate.assign(cluster_label, num_classes, num_clusters) del optimizer del train_iter del train_updater del trainer del train del test print(count_classes) """ start classification """ model = h_net.HierarchicalNetwork(model, num_clusters, count_classes, n_in=n_in) if opt2 == 'Adam': optimizer2 = chainer.optimizers.Adam(alpha=initial_lr) elif opt2 == 'SGD': optimizer2 = chainer.optimizers.SGD(lr=initial_lr) else: optimizer2 = chainer.optimizers.MomentumSGD(lr=initial_lr) optimizer2.setup(model) if args.weight_decay > 0: optimizer2.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) if gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU train = dataset.Dataset(train_instances, train_labels, assignment, _transform=train_transform, sparse=sparse) test = dataset.Dataset(test_instances, test_labels, assignment, _transform=test_transform, sparse=sparse) train_iter = chainer.iterators.SerialIterator(train, batch_size=args.batchsize2) test_iter = chainer.iterators.SerialIterator(test, batch_size=1, repeat=False) train_updater = updater.Updater(model, train, train_iter, optimizer2, num_clusters, device=gpu) trainer = training.Trainer(train_updater, (args.epoch2, 'epoch'), args.out) acc = accuracy.Accuracy(model, assignment, num_clusters) trainer.extend(extensions.Evaluator(test_iter, acc, device=gpu)) """ trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=(20, 'epoch')) """ trainer.extend(extensions.LogReport(trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/loss_cluster', 'main/loss_class', 'validation/main/accuracy', 'validation/main/cluster_accuracy', 'validation/main/loss', 'elapsed_time' ])) if opt2 != 'Adam': trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate), trigger=(lr_decay_epoch, 'epoch')) if args.resume2: chainer.serializers.load_npz(args.resume2, trainer) trainer.run()