コード例 #1
0
ファイル: test.py プロジェクト: liviust/cs231n_perso_2
def overfit_small_data(model=None, epochs=10, num_train=20, verbose=True):

    data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py')
    small_data = {
        'X_train': data['X_train'][:num_train] / 127.0,
        'y_train': data['y_train'][:num_train],
        'X_val':
        data['X_val'][:num_train] / 127.0,  # batch size must be constant
        'y_val': data['y_val'][:num_train],
    }

    if model is None:
        input_dim = small_data['X_train'].shape[1:]
        print input_dim
        # 32 - 16, 8, 4, 2
        model = FlexNet(input_dim=input_dim,
                        num_filters=(8, 8, 16, 16),
                        hidden_dim=(100, ))
        model.print_params()

    print '\n--- Training a few epochs ---'

    solver = Solver(model,
                    small_data,
                    num_epochs=epochs,
                    batch_size=np.minimum(50, num_train),
                    update_rule='sgd',
                    optim_config={
                        'learning_rate': 1e-4,
                    },
                    verbose=verbose,
                    print_every=1)
    solver.train()
    print 'Train acc:', solver.train_acc_history[-1]
    return model
コード例 #2
0
def over_fit_small_data():
    data = get_CIFAR10_data()
    np.random.seed(231)
    num_train = 100
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }

    model = ThreeLayerConvNet(weight_scale=1e-2)

    solver = Solver(model,
                    small_data,
                    num_epochs=15,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=1)
    solver.train()
    plot_loss_acc_history(solver)
コード例 #3
0
ファイル: BatchNormalization.py プロジェクト: huiyiygy/cs231n
def run_batchsize_experiments(normalization_mode):
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data()
    np.random.seed(231)
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    n_epochs = 10
    weight_scale = 2e-2
    batch_sizes = [5, 10, 50]
    learning_rate = 10**(-3.5)
    solver_bsize = batch_sizes[0]

    print('No normalization: batch size = ', solver_bsize)
    model = FullyConnectedNet(hidden_dims,
                              weight_scale=weight_scale,
                              normalization=None)
    solver = Solver(model,
                    small_data,
                    num_epochs=n_epochs,
                    batch_size=solver_bsize,
                    update_rule='adam',
                    optim_config={'learning_rate': learning_rate},
                    verbose=False)
    solver.train()

    bn_solvers = []
    for i in range(len(batch_sizes)):
        b_size = batch_sizes[i]
        print('Normalization: batch size = ', b_size)
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     normalization=normalization_mode)
        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=n_epochs,
                           batch_size=b_size,
                           update_rule='adam',
                           optim_config={'learning_rate': learning_rate},
                           verbose=False)
        bn_solver.train()
        bn_solvers.append(bn_solver)

    return bn_solvers, solver, batch_sizes
コード例 #4
0
def train_net():
    data = get_CIFAR10_data()
    model = ThreeLayerConvNet(weight_scale=0.001, hidden_dim=500, reg=0.001)

    solver = Solver(model,
                    data,
                    num_epochs=1,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={
                        'learning_rate': 1e-3,
                    },
                    verbose=True,
                    print_every=20)
    solver.train()
    visualize_filters(model)
def TwoLayerNetDemo(reg=0.0):
    data = get_CIFAR10_data(9000, 1000)
    model = TwoLayerNet(reg=reg)
    solver = Solver(model, data, update_rule='sgd',
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=100, print_every=100)

    solver.train()

    X_test = data['X_test']
    y_test = data['y_test']
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
def ThreeLayerConvNetDemo(batch_size=32, num_filters=9, use_batchnorm=False,
                          weight_scale=1e-2, reg=0.0, update_rule='sgd'):
    data = get_CIFAR10_data(1000, 100)
    hidden_dims = [100, 50]
    model = ThreeLayerConvNet(num_filters=num_filters)

    solver = Solver(model, data, update_rule=update_rule,
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=10,
                    batch_size=batch_size, print_every=100)

    solver.train()

    X_test = data['X_test'][1:100]
    y_test = data['y_test'][1:100]
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
def FullyConnectedNetDemo(dropout=0.5, use_batchnorm=True, HeReLU=False,
                          weight_scale=1e-2, reg=0.0, update_rule='adam',
                          num_epochs=10):
    data = get_CIFAR10_data(19000, 1000)
    hidden_dims = [100, 50]
    model = FullyConnectedNet(hidden_dims=hidden_dims,
                              weight_scale=weight_scale,
                              use_batchnorm=use_batchnorm,
                              HeReLU=False, reg=reg)

    solver = Solver(model, data, update_rule=update_rule,
                    optim_config={'learning_rate': 1e-3, },
                    lr_decay=0.95, num_epochs=num_epochs,
                    batch_size=100, print_every=100)

    solver.train()

    X_test = data['X_test']
    y_test = data['y_test']
    num_samples = y_test.shape[0]

    acc = solver.predict(X_test, y_test, num_samples)
    print ["Accuracy", acc]
コード例 #8
0
def regularization_experiment():
    data = get_CIFAR10_data()

    # Train two identical nets, one with dropout and one without
    np.random.seed(231)
    num_train = 500
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    solvers = {}
    dropout_choices = [1, 0.9, 0.75, 0.5, 0.25]
    for dropout in dropout_choices:
        model = FullyConnectedNet([500], dropout=dropout)
        print(dropout)

        solver = Solver(model,
                        small_data,
                        num_epochs=25,
                        batch_size=100,
                        update_rule='adam',
                        optim_config={
                            'learning_rate': 5e-4,
                        },
                        verbose=True,
                        print_every=100)
        solver.train()
        solvers[dropout] = solver

    # Plot train and validation accuracies of the two models
    train_accs = []
    val_accs = []
    for dropout in dropout_choices:
        solver = solvers[dropout]
        train_accs.append(solver.train_acc_history[-1])
        val_accs.append(solver.val_acc_history[-1])

    plt.subplot(3, 1, 1)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].train_acc_history,
                 '-o',
                 label='%.2f dropout' % dropout)
    plt.title('Train accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    for dropout in dropout_choices:
        plt.plot(solvers[dropout].val_acc_history,
                 '-o',
                 label='%.2f dropout' % dropout)
    plt.title('Val accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(ncol=2, loc='lower right')

    plt.gcf().set_size_inches(15, 20)
    plt.show()
コード例 #9
0
predictions_train = predict(train_x, train_y, parameters)
predictions_test = predict(test_x, test_y, parameters)

#%%

#%%
# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
   del X_train, y_train
   del X_test, y_test
   print('Clear previously loaded data.')
except:
   pass

# X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data(num_training=9000, num_validation=1000, num_test=1000)
train_x_orig = X_train
train_y = y_train
test_x_orig = X_test
test_y = y_test


#%%
# Shuffle data, and use one small part
m_train_take = 25000
m_test_take = 10000
permutation = list(np.random.permutation(train_x_orig.shape[0]))
train_x_orig = train_x_orig[permutation, :]
train_y = train_y[permutation]

train_x_orig = train_x_orig[0:m_train_take, :]
コード例 #10
0
ファイル: BatchNormalization.py プロジェクト: huiyiygy/cs231n
def batch_normalization_and_initialization():
    """
    We will now run a small experiment to study the interaction of batch normalization
    and weight initialization.

    The first cell will train 8-layer networks both with and without batch normalization
    using different scales for weight initialization. The second layer will plot training
    accuracy, validation set accuracy, and training loss as a function of the weight
    initialization scale.
    """
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data()
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [50, 50, 50, 50, 50, 50, 50]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'],
        'y_val': data['y_val'],
    }
    bn_solvers_ws = {}
    solvers_ws = {}
    weight_scales = np.logspace(-4, 0, num=20)
    for i, weight_scale in enumerate(weight_scales):
        print('Running weight scale %d / %d' % (i + 1, len(weight_scales)))
        bn_model = FullyConnectedNet(hidden_dims,
                                     weight_scale=weight_scale,
                                     normalization='batchnorm')
        model = FullyConnectedNet(hidden_dims,
                                  weight_scale=weight_scale,
                                  normalization=None)
        bn_solver = Solver(bn_model,
                           small_data,
                           num_epochs=10,
                           batch_size=50,
                           update_rule='adam',
                           optim_config={'learning_rate': 1e-3},
                           verbose=False,
                           print_every=200)
        bn_solver.train()
        bn_solvers_ws[weight_scale] = bn_solver
        solver = Solver(model,
                        small_data,
                        num_epochs=10,
                        batch_size=50,
                        update_rule='adam',
                        optim_config={'learning_rate': 1e-3},
                        verbose=False,
                        print_every=200)
        solver.train()
        solvers_ws[weight_scale] = solver

    # Plot results of weight scale experiment
    best_train_accs, bn_best_train_accs = [], []
    best_val_accs, bn_best_val_accs = [], []
    final_train_loss, bn_final_train_loss = [], []

    for ws in weight_scales:
        best_train_accs.append(max(solvers_ws[ws].train_acc_history))
        bn_best_train_accs.append(max(bn_solvers_ws[ws].train_acc_history))

        best_val_accs.append(max(solvers_ws[ws].val_acc_history))
        bn_best_val_accs.append(max(bn_solvers_ws[ws].val_acc_history))

        final_train_loss.append(np.mean(solvers_ws[ws].loss_history[-100:]))
        bn_final_train_loss.append(
            np.mean(bn_solvers_ws[ws].loss_history[-100:]))
    """
    semilogx半对数坐标函数:只有一个坐标轴是对数坐标另一个是普通算术坐标。 在下列情况下建议用半对数坐标:
    (1)变量之一在所研究的范围内发生了几个数量级的变化。 
    (2)在自变量由零开始逐渐增大的初始阶段,当自变量的少许变化引起因变量极大变化时,
    此时采用半对数坐标纸,曲线最大变化范围可伸长,使图形轮廓清楚。
    (3)需要将某种函数变换为直线函数关系。
    """
    plt.subplot(3, 1, 1)
    plt.title('Best val accuracy vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Best val accuracy')
    plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm')
    plt.legend(ncol=2, loc='lower right')

    plt.subplot(3, 1, 2)
    plt.title('Best train accuracy vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Best training accuracy')
    plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm')
    plt.legend(ncol=1, loc='upper right')

    plt.subplot(3, 1, 3)
    plt.title('Final training loss vs weight initialization scale')
    plt.xlabel('Weight initialization scale')
    plt.ylabel('Final training loss')
    plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline')
    plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm')
    plt.legend(ncol=1, loc='lower left')
    plt.gca().set_ylim(1.0, 3.5)

    plt.gcf().set_size_inches(15, 15)
    plt.show()
コード例 #11
0
ファイル: BatchNormalization.py プロジェクト: huiyiygy/cs231n
def check_for_deep_network():
    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data()
    for k, v in data.items():
        print('%s: ' % k, v.shape)
    np.random.seed(231)
    # Try training a very deep net with batchnorm
    hidden_dims = [100, 100, 100, 100, 100]
    num_train = 1000
    small_data = {
        'X_train': data['X_train'][:num_train],
        'y_train': data['y_train'][:num_train],
        'X_val': data['X_val'][:num_train],
        'y_val': data['y_val'][:num_train]
    }
    weight_scale = 2e-2
    reg = 0.01
    bn_model = FullyConnectedNet(hidden_dims,
                                 reg=reg,
                                 weight_scale=weight_scale,
                                 normalization='batchnorm')
    model = FullyConnectedNet(hidden_dims,
                              reg=reg,
                              weight_scale=weight_scale,
                              normalization=None)

    bn_solver = Solver(bn_model,
                       small_data,
                       num_epochs=10,
                       batch_size=50,
                       update_rule='adam',
                       optim_config={'learning_rate': 1e-3},
                       verbose=True,
                       print_every=20)
    bn_solver.train()

    solver = Solver(model,
                    small_data,
                    num_epochs=10,
                    batch_size=50,
                    update_rule='adam',
                    optim_config={'learning_rate': 1e-3},
                    verbose=True,
                    print_every=20)
    solver.train()

    plt.subplot(3, 1, 1)
    plot_training_history('Training loss',
                          'Iteration',
                          solver, [bn_solver],
                          lambda x: x.loss_history,
                          bl_marker='o',
                          bn_marker='o')
    plt.subplot(3, 1, 2)
    plot_training_history('Training accuracy',
                          'Epoch',
                          solver, [bn_solver],
                          lambda x: x.train_acc_history,
                          bl_marker='-o',
                          bn_marker='-o')
    plt.subplot(3, 1, 3)
    plot_training_history('Validation accuracy',
                          'Epoch',
                          solver, [bn_solver],
                          lambda x: x.val_acc_history,
                          bl_marker='-o',
                          bn_marker='-o')
    plt.show()
コード例 #12
0
ファイル: cnn_train.py プロジェクト: AKAGB/AI-Study
from cs231n.data_utils import get_CIFAR10_data
from cs231n.classifiers.mycnn import CNN
from cs231n.solver import Solver

dataset = get_CIFAR10_data()

train_data = {
    'X_train': dataset['X_train'],
    'y_train': dataset['y_train'],
    'X_val': dataset['X_val'],
    'y_val': dataset['y_val'],
}

model = CNN()

solver = Solver(model,
                train_data,
                update_rule='adam',
                optim_config={
                    'learning_rate': 0.001,
                },
                lr_decay=0.95,
                num_epochs=50,
                batch_size=100,
                print_every=100)

solver.train()

solver.check_accuracy(dataset['X_test'], dataset['y_test'])
コード例 #13
0
    init_checkpoint = {'model': '',
                       'epoch': 0,
                       'best_val_acc': 0,
                       'best_params': '',
                       'best_val_acc': 0,
                       'loss_history': [],
                       'train_acc_history': [],
                       'val_acc_history': []}
    name = 'check_0'
    os.mkdir(os.path.join(folder, 'checkpoints', name))
    joblib.dump(init_checkpoint, os.path.join(
        folder, 'checkpoints', name, name + '.pkl'))
    path = folder

    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data(DIR_CS231n)
    for k, v in data.iteritems():
        print '%s: ' % k, v.shape

    print 'The parameters are: '
    for key, value in conf.iteritems():
        print key + ': ', value, ' \n'

    # Initialize the model instance
    model = ThreeLayerConvNet(input_dim=input_dim,
                              num_filters=num_filters,
                              filter_size=filter_size,
                              hidden_dim=hidden_dim,
                              num_classes=num_classes,
                              weight_scale=weight_scale,
                              reg=reg,
コード例 #14
0
ファイル: FirstConvNet.py プロジェクト: wEEang763162/CS231
    init_checkpoint = {'model': '',
                       'epoch': 0,
                       'best_val_acc': 0,
                       'best_params': '',
                       'best_val_acc': 0,
                       'loss_history': [],
                       'train_acc_history': [],
                       'val_acc_history': []}
    name = 'check_0'
    os.mkdir(os.path.join(folder, 'checkpoints', name))
    joblib.dump(init_checkpoint, os.path.join(
        folder, 'checkpoints', name, name + '.pkl'))
    path = folder

    # Load the (preprocessed) CIFAR10 data.
    data = get_CIFAR10_data(DIR_CS231n)
    for k, v in data.iteritems():
        print '%s: ' % k, v.shape

    print 'The parameters are: '
    for key, value in conf.iteritems():
        print key + ': ', value, ' \n'

    # Initialize the model instance
    model = FirstConvNet(input_dim=input_dim,
                         num_filters=num_filters,
                         filter_size=filter_size,
                         hidden_dims=hidden_dims,
                         num_classes=num_classes,
                         weight_scale=weight_scale,
                         reg=reg,
コード例 #15
0
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# third: append the bias dimension of ones (i.e. bias trick) so that our SVM
# only has to worry about optimizing a single weight matrix W.
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print X_train.shape, X_val.shape, X_test.shape, X_dev.shape
# (49000, 3073) (1000, 3073) (1000, 3073) (500, 3073)

# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape
print 'dev data shape: ', X_dev.shape
print 'dev labels shape: ', y_dev.shape
# =>
# Train data shape:  (49000, 3073)
# Train labels shape:  (49000,)
# Validation data shape:  (1000, 3073)
# Validation labels shape:  (1000,)
# Test data shape:  (1000, 3073)
# Test labels shape:  (1000,)
コード例 #16
0
ファイル: runner.py プロジェクト: kireet/cs231n.github.io
#
# input_dim = (X.shape[1], X.shape[2], X.shape[3])
#
# model = ConvNet(num_filters=2, input_dim=input_dim, filter_size=5, hidden_dim=10, use_batchnorm=True, gradcheck=True)
#
# #model = ThreeLayerConvNet(num_filters=2, input_dim=input_dim, filter_size=5, hidden_dim=10)
#
# loss, grads = model.loss(X, y)
# print 'Initial loss (no regularization): ', loss
#
# for name in sorted(grads):
#   f = lambda _: model.loss(X, y)[0]
#   grad_num = eval_numerical_gradient(f, model.params[name], verbose=False, h=1e-5)
#   print '%s relative error: %.2e' % (name, rel_error(grad_num, grads[name]))

data = get_CIFAR10_data()
#for k, v in data.iteritems():
#  print '%s: ' % k, v.shape

model = ConvNet(weight_scale=0.001, hidden_dim=500, reg=0)

print

solver = Solver(model, data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 5e-3,
                },
                verbose=True, print_every=100)
コード例 #17
0
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
#%load_ext autoreload
#%autoreload 2


def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


# Load the (preprocessed) CIFAR10 data.

data = get_CIFAR10_data()
for k, v in data.iteritems():
    print '%s: ' % k, v.shape

# Test the affine_forward function

num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape),
                                                    output_dim)
コード例 #18
0
  X, y = make_data()
  # Build network
  model = Sequential(batch_shape=X.shape)
  model.add(Dense(num_neurons=10))
  model.build(loss=Softmax())
  # Forward + Backward
  loss, grads = model.loss(X, y)
  print '--- Loss sanity check ---'
  print loss


# loss_sanity_check()
# test.overfit_small_data(model, num_train=num_train, epochs=20)

total_examples = 3
data = get_CIFAR10_data(dir='datasets/cifar-10-batches-py')
X = data['X_train'][:total_examples, :, :8, :8] / 127.0
y = data['y_train'][:total_examples]

model = Sequential(batch_shape=X.shape, weight_scale=1e-3, reg=0.0, dtype=np.float64)
#model.add(ConvBnRelu(2))
#model.add(Pool(pool_factor=8))
model.add(Dense(num_neurons=10))
model.add(Dense(num_neurons=10))
model.build(loss=Softmax())
model.print_params()

print '--- Train a few epochs ---'
solver = Solver(model, {'X_train': X, 'y_train': y, 'X_val': X, 'y_val': y},
                num_epochs=20, batch_size=3,
                update_rule='sgd',