def over_fit_small_data():
    data = get_CIFAR10_data()
    np.random.seed(231)
    num_train = 100
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=15,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=1)
    solver.train()
    plot_loss_acc_history(solver)
Exemple #2
0
def overfit_small_data(model=None, epochs=10, num_train=20, verbose=True):

    data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py')
    small_data = {
        'X_train': data['X_train'][:num_train] / 127.0,
        'y_train': data['y_train'][:num_train],
        'X_val':
        data['X_val'][:num_train] / 127.0,  # batch size must be constant
        'y_val': data['y_val'][:num_train],
    }

    if model is None:
        input_dim = small_data['X_train'].shape[1:]
        print input_dim
        # 32 - 16, 8, 4, 2
        model = FlexNet(input_dim=input_dim,
                        num_filters=(8, 8, 16, 16),
                        hidden_dim=(100, ))
        model.print_params()

    print '\n--- Training a few epochs ---'

    solver = Solver(model,
                    small_data,
                    num_epochs=epochs,
                    batch_size=np.minimum(50, num_train),
                    update_rule='sgd',
                    optim_config={
                        'learning_rate': 1e-4,
                    },
                    verbose=verbose,
                    print_every=1)
    solver.train()
    print 'Train acc:', solver.train_acc_history[-1]
    return model
Exemple #3
0
def test_mnist(num_epochs=60, batch_size=60, learning_rate=3e-3):
    X_train, y_train = get_mnist_data('mnist_train.csv', 50000)
    X_val, y_val = get_mnist_data('mnist_test.csv', 10000)
    hidden_dims = [100, 100, 100]
    # num_train = 48000
    test_data = {
        'X_train': X_train,
        'y_train': y_train,
        'X_val': X_val,
        'y_val': y_val
    }
    weight_scale = 2e-2
    bn_model = FullyConnectedNet(hidden_dims,
                                 input_dim=1 * 784,
                                 weight_scale=weight_scale,
                                 use_batchnorm=True)
    bn_solver = Solver(bn_model,
                       test_data,
                       num_epochs=num_epochs,
                       batch_size=batch_size,
                       update_rule='sgd',
                       optim_config={
                           'learning_rate': learning_rate,
                       },
                       verbose=True,
                       print_every=400)
    step, train_accuracies, val_accuracies, loss = bn_solver.train()
    return bn_model, step, train_accuracies, val_accuracies, loss
def RunTwoLayerNet():
    model = TwoLayerNet()
    solver = Solver(model,
                    data,
                    optim_config={'learning_rate': 1e-3,},
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
def RunFullyConnectedNet():
    model = FullyConnectedNet([100, 50], dropout=0.5, use_batchnorm=True)
    #model = FullyConnectedNet([100, 50])
    solver = Solver(model,
                    data,
                    optim_config={'learning_rate': 1e-3,},
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
Exemple #6
0
def RunCnnNet():
    model = ThreeLayerConvNet(reg=1e-2)
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-5,
                    },
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
Exemple #7
0
def RunFullyConnectedNet():
    model = FullyConnectedNet([100, 50], dropout=0.5, use_batchnorm=True)
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
Exemple #8
0
def RunTwoLayerNet():
    model = TwoLayerNet()
    solver = Solver(model,
                    data,
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    lr_decay=0.95,
                    print_every=100)
    solver.train()
def RunCnnNet():
    model = ThreeLayerConvNet(weight_scale=0.001, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
Exemple #10
0
def RunCnnNet():
    model = ThreeLayerConvNet(weight_scale=0.001, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
def TwoLayerNetDemo(reg=0.0):
    data = get_CIFAR10_data(9000, 1000)
    model = TwoLayerNet(reg=reg)
    solver = Solver(model, data, update_rule='sgd',
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=100, print_every=100)

    solver.train()

    X_test = data['X_test']
    y_test = data['y_test']
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
def train_net():
    data = get_CIFAR10_data()
    model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
    visualize_filters(model)
def ThreeLayerConvNetDemo(batch_size=32, num_filters=9, use_batchnorm=False,
                          weight_scale=1e-2, reg=0.0, update_rule='sgd'):
    data = get_CIFAR10_data(1000, 100)
    hidden_dims = [100, 50]
    model = ThreeLayerConvNet(num_filters=num_filters)

    solver = Solver(model, data, update_rule=update_rule,
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=batch_size, print_every=100)

    solver.train()

    X_test = data['X_test'][1:100]
    y_test = data['y_test'][1:100]
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
def run_batchsize_experiments(normalization_mode):
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    n_epochs = 10
    weight_scale = 2e-2
    batch_sizes = [5, 10, 50]
    lr = 10**(-3.5)
    solver_bsize = batch_sizes[0]

    print('No normalization: batch size = ', solver_bsize)
    model = FullyConnectedNet(hidden_dims,
                              weight_scale=weight_scale,
                              normalization=None)
    solver = Solver(model,
                    small_data,
                    num_epochs=n_epochs,
                    batch_size=solver_bsize,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': lr,
                    },
                    verbose=False)
    solver.train()

    bn_solvers = []
    for i in range(len(batch_sizes)):
        b_size = batch_sizes[i]
        print('Normalization: batch size = ', b_size)
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     normalization=normalization_mode)
        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=n_epochs,
                           batch_size=b_size,
                           update_rule='adam',
                           optim_config={
                               'learning_rate': lr,
                           },
                           verbose=False)
        bn_solver.train()
        bn_solvers.append(bn_solver)

    return bn_solvers, solver, batch_sizes
def SMALL_CNN():
    num_train = 100
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    model = ThreeLayerConvNet(weight_scale=1e-3)

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-4,
                    },
                    verbose=True,
                    print_every=20)

    solver.train()
def FullyConnectedNetDemo(dropout=0.5, use_batchnorm=True, HeReLU=False,
                          weight_scale=1e-2, reg=0.0, update_rule='adam',
                          num_epochs=10):
    data = get_CIFAR10_data(19000, 1000)
    hidden_dims = [100, 50]
    model = FullyConnectedNet(hidden_dims=hidden_dims,
                              weight_scale=weight_scale,
                              use_batchnorm=use_batchnorm,
                              HeReLU=False, reg=reg)

    solver = Solver(model, data, update_rule=update_rule,
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=num_epochs,
                    batch_size=100, print_every=100)

    solver.train()

    X_test = data['X_test']
    y_test = data['y_test']
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
################################################################################

X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']
y_test = data['y_test']
learning_rate = 3.1e-4
weight_scale = 2.5e-2
model = FullyConnectedNet([600, 500, 400, 300, 200, 100],
                          weight_scale=weight_scale,
                          dtype=np.float,
                          reg=0.02)
solver = Solver(model,
                data,
                print_every=500,
                num_epochs=30,
                batch_size=100,
                update_rule='adam',
                optim_config={
                    'learning_rate': learning_rate,
                },
                lr_decay=0.9)

solver.train()

best_model = model
y_test_pred = np.argmax(best_model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(best_model.loss(data['X_val']), axis=1)
print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())
# Batch together parameters for update rule
# Note that additional hyperparameters may need to be added
# if you change to a different update rule
opcon = {'learning_rate': lr, 'momentum': mm};

###############################################################
""" NO USER INPUTS PAST THIS LINE """
###############################################################


# Initialize model and solver
model = FancyNet(num_filters=nfilter, filter_sizes=sfilter, maxpools=mp,
                use_spatial_batchnorm=sbn, hidden_dims=hd, use_batchnorm=bn, reg=rg, weight_scale=ws);

solver = Solver(model, data, num_epochs=ne, batch_size=bs, update_rule=uprule, optim_config=opcon, lr_decay=lrd,
               verbose=vb, print_every=pe);

# Optimize the model (this is the part that takes a while)
solver.train();


# Check if this beats the previous best accuracy:
train_hist = solver.train_acc_history;
val_hist = solver.val_acc_history;
train_best = np.max(train_hist);
val_best = np.max(val_hist);

print 'Max training accuracy: ' + str(train_best);
print 'Max validation accuracy:  ' + str(val_best);
if val_best > Best_val_acc:
    Best_model = model;
Exemple #19
0
 acc = 0.0
 for i in range(1):
     learning_rate = 10**np.random.uniform(low=-9,high=-3,size = 5)
     reg =np.random.uniform(low=0.45,high=0.75,size = 5)
     dropout =[0.7] #np.random.uniform(low=0.5,high=0.8,size = 2)
     print("\niteration from main:%d\n"%(i))
     for dp in dropout:
         for lr in learning_rate:
             for rg in reg:
                 # weight_scale=weight_scale,reg=rg, dtype=np.float64,normalization="batchnorm",dropout = dp)
                 model = FullyConnectedNet([hidden_size,hidden_size],input_dim=1*5,
                               weight_scale=weight_scale,reg=rg, dtype=np.float64,normalization="batchnorm",dropout = dp)
                 solver = Solver(model,data,
                                 print_every=10, num_epochs=num_epochs, batch_size=batch_size,
                                 update_rule="rmsprop",verbose=True,
                                 optim_config={
                                   'learning_rate': lr,
                              }
                         )
                 solver.train()
                 result = solver.check_accuracy_for_saccarde( X, y2, num_samples=None, batch_size=546)
                 if result > acc:
                     acc = result
                     marker = "cs231n/data_record/3layer_itteration_%d_hidden_%d_lr_%f_end_rg_%f_end_dp_%f_acc_%f"%(i,hidden_size,lr,rg,dp,acc)
                     solver.checkpoint_name = marker
                     solver._save_checkpoint() 
                     print("\niteration = %d\n"%(i))
                     print("\nlearning rate = %f\n"%(lr))
                     print("\nreg = %f\n"%(rg))
                     print("\n dropout = %f\n"%(dp))
                     print("\ntest accuracy = %f\n"%(acc))
Exemple #20
0
# #model = ThreeLayerConvNet(num_filters=2, input_dim=input_dim, filter_size=5, hidden_dim=10)
#
# loss, grads = model.loss(X, y)
# print 'Initial loss (no regularization): ', loss
#
# for name in sorted(grads):
#   f = lambda _: model.loss(X, y)[0]
#   grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
#   print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name]))

data = get_CIFAR10_data()
#for k, v in data.iteritems():
#  print '%s: ' % k, v.shape

model = ConvNet(weight_scale=0.001, hidden_dim=500, reg=0)

print

solver = Solver(model, data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 5e-3,
                },
                verbose=True, print_every=100)

describe_solver(solver)

print

solver.train()
    'y_train': data['y_train'][:num_train],
    'X_val': data['X_val'],
    'y_val': data['y_val'],
}

solvers = {}
dropout_choices = [0, 0.9]
for dropout in dropout_choices:
    model = FullyConnectedNet([500], dropout=dropout)
    print dropout

    solver = Solver(model,
                    small_data,
                    num_epochs=25,
                    batch_size=100,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 5e-4,
                    },
                    verbose=True,
                    print_every=100)
    solver.train()
    solvers[dropout] = solver

#%%
# Plot train and validation accuracies of the two models

train_accs = []
val_accs = []
for dropout in dropout_choices:
    solver = solvers[dropout]
    train_accs.append(solver.train_acc_history[-1])
Exemple #22
0
weight_scale = [5e-2]
best_acc = -1.0

for ws in weight_scale:
    for lr in learning_rate:
        model = FullyConnectedNet([100, 100, 100, 100],
                                  weight_scale=ws,
                                  dtype=np.float64,
                                  use_batchnorm=False,
                                  reg=1e-2)
        solver = Solver(
            model,
            data,
            print_every=100,
            num_epochs=10,
            batch_size=25,
            update_rule='adam',
            optim_config={
                'learning_rate': lr,
            },
            lr_decay=0.95,  #需要调的参数
            verbose=True)

        solver.train()

        if np.mean(solver.train_acc_history[-10:]) >= best_acc:
            best_model = model
            best_acc = np.mean(solver.train_acc_history[-10:])
            print 'ws: ', ws, 'lr: ', lr
#把数据集继续训练
solver = Solver(
    model,
Exemple #23
0
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

np.random.seed(231)
data = get_CIFAR10_data()

num_train = 100
small_data = {
    'X_train': data['X_train'][:num_train],
    'y_train': data['y_train'][:num_train],
    'X_val': data['X_val'],
    'y_val': data['y_val'],
}

model = ThreeLayerConvNet(weight_scale=1e-2)
# print(model.params['W1'].shape)

solver = Solver(model,
                small_data,
                num_epochs=15,
                batch_size=50,
                update_rule='sgd',
                optim_config={
                    'learning_rate': 1e-3,
                },
                verbose=True,
                print_every=1)
solver.train()
Exemple #24
0
def batch_normalization_and_initialization():
    """
    We will now run a small experiment to study the interaction of batch normalization
    and weight initialization.

    The first cell will train 8-layer networks both with and without batch normalization
    using different scales for weight initialization. The second layer will plot training
    accuracy, validation set accuracy, and training loss as a function of the weight
    initialization scale.
    """
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data()
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [50, 50, 50, 50, 50, 50, 50]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    bn_solvers_ws = {}
    solvers_ws = {}
    weight_scales = np.logspace(-4, 0, num=20)
    for i, weight_scale in enumerate(weight_scales):
        print('Running weight scale %d / %d' % (i + 1, len(weight_scales)))
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     normalization='batchnorm')
        model = FullyConnectedNet(hidden_dims,
                                  weight_scale=weight_scale,
                                  normalization=None)
        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=10,
                           batch_size=50,
                           update_rule='adam',
                           optim_config={'learning_rate': 1e-3},
                           verbose=False,
                           print_every=200)
        bn_solver.train()
        bn_solvers_ws[weight_scale] = bn_solver
        solver = Solver(model,
                        small_data,
                        num_epochs=10,
                        batch_size=50,
                        update_rule='adam',
                        optim_config={'learning_rate': 1e-3},
                        verbose=False,
                        print_every=200)
        solver.train()
        solvers_ws[weight_scale] = solver

    # Plot results of weight scale experiment
    best_train_accs, bn_best_train_accs = [], []
    best_val_accs, bn_best_val_accs = [], []
    final_train_loss, bn_final_train_loss = [], []

    for ws in weight_scales:
        best_train_accs.append(max(solvers_ws[ws].train_acc_history))
        bn_best_train_accs.append(max(bn_solvers_ws[ws].train_acc_history))

        best_val_accs.append(max(solvers_ws[ws].val_acc_history))
        bn_best_val_accs.append(max(bn_solvers_ws[ws].val_acc_history))

        final_train_loss.append(np.mean(solvers_ws[ws].loss_history[-100:]))
        bn_final_train_loss.append(
            np.mean(bn_solvers_ws[ws].loss_history[-100:]))
    """
    semilogx半对数坐标函数:只有一个坐标轴是对数坐标另一个是普通算术坐标。 在下列情况下建议用半对数坐标:
    (1)变量之一在所研究的范围内发生了几个数量级的变化。 
    (2)在自变量由零开始逐渐增大的初始阶段,当自变量的少许变化引起因变量极大变化时,
    此时采用半对数坐标纸,曲线最大变化范围可伸长,使图形轮廓清楚。
    (3)需要将某种函数变换为直线函数关系。
    """
    plt.subplot(3, 1, 1)
    plt.title('Best val accuracy vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Best val accuracy')
    plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    plt.title('Best train accuracy vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Best training accuracy')
    plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm')
    plt.legend(ncol=1, loc='upper right')

    plt.subplot(3, 1, 3)
    plt.title('Final training loss vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Final training loss')
    plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm')
    plt.legend(ncol=1, loc='lower left')
    plt.gca().set_ylim(1.0, 3.5)

    plt.gcf().set_size_inches(15, 15)
    plt.show()
Exemple #25
0
    weight_scale = np.random.choice(weight_scales, 1)[0]
    dropout = np.random.choice(dropouts, 1)[0]
    
    print(lr, weight_scale, dropout)

    model = FullyConnectedNet([100, 100, 100, 100],
                              input_dim=input_dim,
                              num_classes=num_classes,
                              dropout=dropout,
                              use_batchnorm=use_bn,
                              reg=reg,
                              weight_scale=weight_scale, 
                              dtype=np.float64)
    solver = Solver(model, data,
                    num_epochs=25, batch_size=100,
                    update_rule='sgd',
                    optim_config={
                      'learning_rate': lr,
                    },
                    lr_decay=0.95,
                    verbose=False, print_every=100
             )
    solver.train()

    if solver.best_val_acc > best_val_acc:
        best_val_acc = solver.best_val_acc
        print('best_val_acc: ', best_val_acc)

################################################################################
#                              END OF YOUR CODE                                #
################################################################################
# In[15]:

num_train = 100
small_data = {
  'X_train': data['X_train'][:num_train],
  'y_train': data['y_train'][:num_train],
  'X_val': data['X_val'],
  'y_val': data['y_val'],
}

model = ThreeLayerConvNet(weight_scale=1e-2)

solver = Solver(model, small_data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 1e-3,
                },
                verbose=True, print_every=1)
solver.train()


# Plotting the loss, training accuracy, and validation accuracy should show clear overfitting:

# In[16]:

plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
input_dim = [3, 32, 32]

model = FirstConvNet(num_filters=[16, 32, 64, 128],
                     filter_size=3,
                     input_dim=input_dim,
                     hidden_dims=[256, 256],
                     dtype=np.float64,
                     use_batchnorm=True,
                     reg=0.05,
                     weight_scale=0.05)

solver = Solver(model,
                data,
                num_epochs=20,
                batch_size=50,
                update_rule='adam',
                optim_config={
                    'learning_rate': 1e-3,
                },
                verbose=True,
                print_every=20)
solver.train()

y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
print('Validation set accuracy: ', (y_val_pred == data['y_val']).mean())
print('Test set accuracy: ', (y_test_pred == data['y_test']).mean())

model2 = FirstConvNet(num_filters=[16, 32, 64, 128],
                      filter_size=3,
                      input_dim=input_dim,
                      hidden_dims=[500, 500],
Exemple #28
0
    'X_train': data['X_train'][:num_train],
    'y_train': data['y_train'][:num_train],
    'X_val': data['X_val'],
    'y_val': data['y_val'],
}

weight_scale = 1e-2  # Experiment with this!
learning_rate = 1e-4  # Experiment with this!
model = FullyConnectedNet([100, 100],
                          weight_scale=weight_scale,
                          dtype=np.float64)
solver = Solver(model,
                small_data,
                print_every=10,
                num_epochs=20,
                batch_size=25,
                update_rule='sgd',
                optim_config={
                    'learning_rate': learning_rate,
                })
solver.train()

plt.plot(solver.loss_history, 'o')
plt.title('Training loss history')
plt.xlabel('Iteration')
plt.ylabel('Training loss')
plt.show()

# %% [markdown]
# Now try to use a five-layer network with 100 units on each layer to overfit 50 training examples. Again, you will have to adjust the learning rate and weight initialization scale, but you should be able to achieve 100% training accuracy within 20 epochs.
Exemple #29
0
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

data = get_CIFAR10_data()
    
from cs231n.classifiers.convnet import *

log = open("log.txt", "w")

for i in xrange(10):
    lr = np.random.uniform(1e-2, 1e-5)
    model = ConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001, filter_size=3, num_filters=(16, 16, 16, 16))

    print("lr: %e" % (lr))
    solver = Solver(model, data,
                    num_epochs=2, batch_size=50,
                    update_rule="adam",
                    optim_config={"learning_rate":lr},
                    verbose=True, print_every=50)

    solver.train()
    acc = solver.check_accuracy(solver.X_val, solver.y_val)
    log.write("lr: %e, acc: %f\n" % (lr, acc))

log.close()
def main():
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_raw_data()
    for k, v in data.iteritems():
        print '%s: ' % k, v.shape

    # Get small data for finetuning
    small_data = get_small_data(data, 5000)

    # Network Architecture
    # {conv- [batch norm] - relu - pool}
    cnn_layer_1 = (64, 3, 1, 1)
    pool_layer_1 = (2, 2, 2)
    layer_1 = (cnn_layer_1, pool_layer_1)
    cnn_layer_2 = (128, 3, 1, 1)
    pool_layer_2 = (2, 2, 2)
    layer_2 = (cnn_layer_2, pool_layer_2)
    cnn_layer_3 = (256, 3, 1, 1)
    pool_layer_3 = (2, 2, 2)
    layer_3 = (cnn_layer_3, pool_layer_3)
    hidden_dims_CNN = (layer_1, layer_2, layer_3)

    # {affine - [batch norm] - relu - [dropout]}
    fc_layer_1 = 256
    drop_layer_1 = 1
    layer_1 = (fc_layer_1, drop_layer_1)
    fc_layer_2 = 128
    drop_layer_2 = 1
    layer_2 = (fc_layer_2, drop_layer_2)
    hidden_dims_FC = (layer_1, layer_2)

    num_classes = 10

    model = ConvNet(input_dim=(3, 32, 32),
                    hidden_dims_CNN=hidden_dims_CNN,
                    hidden_dims_FC=hidden_dims_FC,
                    num_classes=num_classes,
                    weight_scale=1e-2,
                    reg=0.001,
                    dtype=np.float32)

    select_num_train_data = 0
    test_weght_scale = 0
    test_lr = 1

    # Test how many data is enough for training
    if select_num_train_data == 1:
        num_train = (500, 1000, 5000, 10000)
        epoch = (20, 10, 2, 1)
        for i in range(0, len(num_train)):
            print 'num_train_data : %d' % (num_train[i])

            small_data = get_small_data(data, num_train[i])
            solver = Solver(model,
                            small_data,
                            num_epochs=epoch[i],
                            batch_size=100,
                            update_rule='sgd_momentum',
                            optim_config={
                                'learning_rate': 1e-3,
                            },
                            verbose=False,
                            print_every=20)

            solver.train()
            print 'num_train : %d, train_acc : %f, val_acc : %f' % (
                num_train[i], solver.train_acc_history[-1],
                solver.val_acc_history[-1])

    # Test settings of weight initialization
    if test_weght_scale == 1:
        weight_scale = (1e-2, 1e-3, -1)
        for i in range(0, len(weight_scale)):
            print 'weight_scale : %f' % (weight_scale[i])
            model = ConvNet(input_dim=(3, 32, 32),
                            hidden_dims_CNN=hidden_dims_CNN,
                            hidden_dims_FC=hidden_dims_FC,
                            num_classes=num_classes,
                            weight_scale=weight_scale[i],
                            reg=0.001,
                            dtype=np.float32)

            solver = Solver(model,
                            small_data,
                            num_epochs=2,
                            batch_size=100,
                            update_rule='sgd_momentum',
                            optim_config={
                                'learning_rate': 1e-3,
                            },
                            verbose=True,
                            print_every=20)
            solver.train()

            print 'weight_scale : %f, train_acc : %f, val_acc : %f' % (
                weight_scale[i], solver.train_acc_history[-1],
                solver.val_acc_history[-1])
    if test_lr == 1:
        lr = (1e-2, 1e-3, 1e-4)
        for i in range(0, len(lr)):
            print 'lr : %f' % (lr[i])
            model = ConvNet(input_dim=(3, 32, 32),
                            hidden_dims_CNN=hidden_dims_CNN,
                            hidden_dims_FC=hidden_dims_FC,
                            num_classes=num_classes,
                            weight_scale=-1,
                            reg=0.001,
                            dtype=np.float32)

            solver = Solver(model,
                            small_data,
                            num_epochs=10,
                            batch_size=100,
                            update_rule='sgd_momentum',
                            optim_config={
                                'learning_rate': lr[i],
                            },
                            verbose=True,
                            print_every=10)
            solver.train()

            print 'lr : %f, train_acc : %f, val_acc : %f' % (
                lr[i], solver.train_acc_history[-1],
                solver.val_acc_history[-1])
    'y_val': data['y_val'],
}

weight_scale = 2e-2
bn_model = FullyConnectedNet(hidden_dims,
                             weight_scale=weight_scale,
                             normalization='batchnorm')
model = FullyConnectedNet(hidden_dims,
                          weight_scale=weight_scale,
                          normalization=None)

bn_solver = Solver(bn_model,
                   small_data,
                   num_epochs=1,
                   batch_size=50,
                   update_rule='adam',
                   optim_config={
                       'learning_rate': 1e-3,
                   },
                   verbose=True,
                   print_every=20)
bn_solver.train()

solver = Solver(model,
                small_data,
                num_epochs=1,
                batch_size=50,
                update_rule='adam',
                optim_config={
                    'learning_rate': 1e-3,
                },
                verbose=True,
Exemple #32
0
N_val, _, _, _ = X_val.shape

data = {'X_train' : X_train[:num_train,:,:,:], 'y_train' : y_train[:num_train], \
  "X_val" : X_val[:num_val,:,:,:], "y_val" : y_val[:num_val] }

print "Done loading data"

model = ThreeLayerConvNet(input_dim=(1, H, W), num_classes=7, num_filters=num_filters, filter_size=filter_size, \
       weight_scale=weight, hidden_dim=hidden_dim, reg=reg)

solver = Solver(model,
                data,
                num_epochs=num_epochs,
                batch_size=batch_size,
                update_rule='adam',
                optim_config={
                    'learning_rate': learning_rate,
                },
                lr_decay=lr_decay,
                verbose=True,
                print_every=10)

solver.train()


def getConfusionMatrix(y_pred, y_true, numClasses=7, asFraction=True):
    """
    Returns confusion matrix.
    Row: True Value
    Column: Prediction
    Entries: Counts
Exemple #33
0
data = get_CIFAR10_data()
for k,v in list(data.items()):
    print(('%s: ' % k, v.shape))

learning_rate = [5e-4]
weight_scale = [2e-2]
best_model = None
best_acc = 0
for wei in weight_scale:
    for lr in learning_rate:
        model = FullyConnectedNet([100,100,100,100], weight_scale=wei)

        solver = Solver(model, data, update_rule='rmsprop', 
                optim_config={'learning_rate':lr,},
                lr_decay=0.8, 
                num_epochs=12, 
                batch_size=100, 
                print_every=100)

        solver.train()
        scores = model.loss(data['X_test'])
        y_pred = np.argmax(scores, axis=1)
        acc = np.mean(y_pred == data['y_test'])
        print ('test acc: %f'% (acc))
        if acc > best_acc:
            best_model = solver

plt.subplot(2,1,1)
plt.title('Training loss')
plt.plot(best_model.loss_history, 'o')
plt.xlabel('Iteration')
Exemple #34
0
def regularization_experiment():
    data = get_CIFAR10_data()

    # Train two identical nets, one with dropout and one without
    np.random.seed(231)
    num_train = 500
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    dropout_choices = [1, 0.9, 0.75, 0.5, 0.25]
    for dropout in dropout_choices:
        model = FullyConnectedNet([500], dropout=dropout)
        print(dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models
    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 '-o',
                 label='%.2f dropout' % dropout)
    plt.title('Train accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 '-o',
                 label='%.2f dropout' % dropout)
    plt.title('Val accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.gcf().set_size_inches(15, 20)
    plt.show()
Exemple #35
0
                                           verbose=False)
        print '%s relative error: %.2e' % (name,
                                           rel_error(grad_num, grads[name]))

model = TwoLayerNet(reg=1e-1)
solver = None

##############################################################################
# TODO: Use a Solver instance to train a TwoLayerNet that achieves at least  #
# 50% accuracy on the validation set.                                        #
##############################################################################
solver = Solver(model,
                data,
                update_rule='sgd',
                optim_config={
                    'learning_rate': 1e-3,
                },
                lr_decay=0.8,
                num_epochs=10,
                batch_size=100,
                print_every=100)
solver.train()
scores = model.loss(data['X_test'])
y_pred = np.argmax(scores, axis=1)
acc = np.mean(y_pred == data['y_test'])
print 'test acc: %f' % (acc)
#pass
##############################################################################
#                             END OF YOUR CODE                               #
##############################################################################

# Run this cell to visualize training loss and train / val accuracy
Exemple #36
0
        grads['W4'] += self.reg * W4
        loss += 0.5 * self.reg * sum([np.sum(W**2) for W in [W1, W2, W3, W4]])

        return loss, grads


if __name__ == '__main__':
    data = get_CIFAR10_data()
    for k, v in data.items():
        print('%s: ' % k, v.shape)

    model = SixLayerConvNet(weight_scale=1e-2, filter_size=3, reg=1e-3)
    solver = Solver(model,
                    data,
                    num_epochs=10,
                    batch_size=1000,
                    update_rule='adam',
                    optim_config={'learning_rate': 1e-3},
                    verbose=True,
                    print_every=1)
    solver.train()

    plt.subplot(2, 1, 1)
    plt.plot(solver.loss_history, 'o')
    plt.xlabel('iteration')
    plt.ylabel('loss')

    plt.subplot(2, 1, 2)
    plt.plot(solver.train_acc_history, '-o')
    plt.plot(solver.val_acc_history, '-o')
    plt.legend(['train', 'val'], loc='upper left')
    plt.xlabel('epoch')
Exemple #37
0
  'y_val': Yte[:],
}

learning_rate = 0.01
reg = 0.0005

model = deeperUNet(input_dim=(Xtr[0].shape),h5_file = 'croped',reg = reg, dtype=np.float32)
#model = UNet(input_dim=(1, Xsz, Xsz),h5_file = None,reg = reg, dtype=np.float32)

solver = Solver(model, data,
                num_epochs=1, batch_size=8,
                update_rule='adam',
                lr_decay=1,
                max_jitter=0,
                h5_file = 'croped2',
                flipOrNot=True,
                optim_config={
                  'learning_rate': learning_rate,
                    #1e-4
                    'beta2': 0.999
                },
                verbose=True, print_every=1000)

solver.train()
plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')

plt.subplot(2, 1, 2)
plt.plot(solver.train_acc_history, '-o')
def RunCnnNet():
  model = ThreeLayerConvNet(reg = 1e-2)
  solver = Solver(model, data, optim_config={'learning_rate': 1e-5,}, lr_decay=0.95, print_every = 100)
  solver.train()