Esempio n. 1
0
def cnn_run_dropout_maxout(data_path, num_rows, num_cols, num_channels,
                           input_path, pred_path):
    t = time.time()
    sub_window = gen_center_sub_window(76, num_cols)
    trn = SarDataset(ds[0][0], ds[0][1], sub_window)
    vld = SarDataset(ds[1][0], ds[1][1], sub_window)
    tst = SarDataset(ds[2][0], ds[2][1], sub_window)
    print 'Take {}s to read data'.format(time.time() - t)
    t = time.time()
    batch_size = 100
    h1 = maxout.Maxout(layer_name='h2', num_units=1, num_pieces=100, irange=.1)
    hidden_layer = mlp.ConvRectifiedLinear(layer_name='h2',
                                           output_channels=8,
                                           irange=0.05,
                                           kernel_shape=[5, 5],
                                           pool_shape=[2, 2],
                                           pool_stride=[2, 2],
                                           max_kernel_norm=1.9365)
    hidden_layer2 = mlp.ConvRectifiedLinear(layer_name='h3',
                                            output_channels=8,
                                            irange=0.05,
                                            kernel_shape=[5, 5],
                                            pool_shape=[2, 2],
                                            pool_stride=[2, 2],
                                            max_kernel_norm=1.9365)
    #output_layer = mlp.Softplus(dim=1,layer_name='output',irange=0.1)
    output_layer = mlp.Linear(dim=1, layer_name='output', irange=0.05)
    trainer = sgd.SGD(learning_rate=0.001,
                      batch_size=100,
                      termination_criterion=EpochCounter(2000),
                      cost=dropout.Dropout(),
                      train_iteration_mode='even_shuffled_sequential',
                      monitor_iteration_mode='even_shuffled_sequential',
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld,
                          'train': trn
                      })
    layers = [hidden_layer, hidden_layer2, output_layer]
    input_space = space.Conv2DSpace(shape=[num_rows, num_cols],
                                    num_channels=num_channels)

    ann = mlp.MLP(layers, input_space=input_space, batch_size=batch_size)
    watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective',
                                               save_path='sar_cnn_mlp.pkl')
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher])
    print 'Take {}s to compile code'.format(time.time() - t)
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}s'.format(time.time() - t)
    serial.save('cnn_hhv_{0}_{1}.pkl'.format(num_rows, num_cols),
                ann,
                on_overwrite='backup')

    #read hh and hv into a 3D numpy
    image = read_hhv(input_path)
    return ann, sar_predict(ann, image, pred_path)
Esempio n. 2
0
def train(d):
    print 'Creating dataset'
    # load mnist here
    # X = d.train_X
    # y = d.train_Y
    # test_X = d.test_X
    # test_Y = d.test_Y
    # nb_classes = len(np.unique(y))
    # train_y = convert_one_hot(y)
    # train_set = DenseDesignMatrix(X=X, y=y)
    train = DenseDesignMatrix(X=d.train_X, y=convert_one_hot(d.train_Y))
    valid = DenseDesignMatrix(X=d.valid_X, y=convert_one_hot(d.valid_Y))
    test = DenseDesignMatrix(X=d.test_X, y=convert_one_hot(d.test_Y))

    print 'Setting up'
    batch_size = 1000
    conv = mlp.ConvRectifiedLinear(
        layer_name='c0',
        output_channels=20,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[2, 2],
        # W_lr_scale=0.25,
        max_kernel_norm=1.9365)
    mout = MaxoutConvC01B(layer_name='m0',
                          num_pieces=4,
                          num_channels=96,
                          irange=.05,
                          kernel_shape=[5, 5],
                          pool_shape=[4, 4],
                          pool_stride=[2, 2],
                          W_lr_scale=0.25,
                          max_kernel_norm=1.9365)
    mout2 = MaxoutConvC01B(layer_name='m1',
                           num_pieces=4,
                           num_channels=96,
                           irange=.05,
                           kernel_shape=[5, 5],
                           pool_shape=[4, 4],
                           pool_stride=[2, 2],
                           W_lr_scale=0.25,
                           max_kernel_norm=1.9365)
    sigmoid = mlp.Sigmoid(
        layer_name='Sigmoid',
        dim=500,
        sparse_init=15,
    )
    smax = mlp.Softmax(layer_name='y', n_classes=10, irange=0.)
    in_space = Conv2DSpace(shape=[28, 28],
                           num_channels=1,
                           axes=['c', 0, 1, 'b'])
    net = mlp.MLP(
        layers=[mout, mout2, smax],
        input_space=in_space,
        # nvis=784,
    )
    trainer = bgd.BGD(batch_size=batch_size,
                      line_search_mode='exhaustive',
                      conjugate=1,
                      updates_per_batch=10,
                      monitoring_dataset={
                          'train': train,
                          'valid': valid,
                          'test': test
                      },
                      termination_criterion=termination_criteria.MonitorBased(
                          channel_name='valid_y_misclass'))
    trainer = sgd.SGD(learning_rate=0.15,
                      cost=dropout.Dropout(),
                      batch_size=batch_size,
                      monitoring_dataset={
                          'train': train,
                          'valid': valid,
                          'test': test
                      },
                      termination_criterion=termination_criteria.MonitorBased(
                          channel_name='valid_y_misclass'))
    trainer.setup(net, train)
    epoch = 0
    while True:
        print 'Training...', epoch
        trainer.train(dataset=train)
        net.monitor()
        epoch += 1
Esempio n. 3
0
        X = train_d[0]
        Y = train_d[1]
        Y_oneHot = np.zeros((np.shape(X)[0], numClasses))
        for i, val in enumerate(Y):
            Y_oneHot[i, val] = 1.0
        super(PlanktonData, self).__init__(X=X, y=Y_oneHot)


ds = PlanktonData()

# 2. Create convolutional layer
print 'Creating network layers'
layerh2 = mlp.ConvRectifiedLinear(layer_name='h2',
                                  output_channels=64,
                                  irange=.05,
                                  kernel_shape=[5, 5],
                                  pool_shape=[4, 4],
                                  pool_stride=[2, 2],
                                  max_kernel_norm=1.9365)

layerh3 = mlp.ConvRectifiedLinear(layer_name='h3',
                                  output_channels=64,
                                  irange=.05,
                                  kernel_shape=[5, 5],
                                  pool_shape=[4, 4],
                                  pool_stride=[2, 2],
                                  max_kernel_norm=1.9365)
''' Note: changed the number of classes '''
layery = mlp.Softmax(max_col_norm=1.9365,
                     layer_name='y',
                     n_classes=121,
Esempio n. 4
0
def train(d=None):
    train_X = np.array(d.train_X)
    train_y = np.array(d.train_Y)
    valid_X = np.array(d.valid_X)
    valid_y = np.array(d.valid_Y)
    test_X = np.array(d.test_X)
    test_y = np.array(d.test_Y)
    nb_classes = len(np.unique(train_y))
    train_y = convert_one_hot(train_y)
    valid_y = convert_one_hot(valid_y)
    # train_set = RotationalDDM(X=train_X, y=train_y)
    train_set = DenseDesignMatrix(X=train_X, y=train_y)
    valid_set = DenseDesignMatrix(X=valid_X, y=valid_y)
    print 'Setting up'
    batch_size = 100
    c0 = mlp.ConvRectifiedLinear(
        layer_name='c0',
        output_channels=64,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[2, 2],
        # W_lr_scale=0.25,
        max_kernel_norm=1.9365)
    c1 = mlp.ConvRectifiedLinear(
        layer_name='c1',
        output_channels=64,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[2, 2],
        # W_lr_scale=0.25,
        max_kernel_norm=1.9365)
    c2 = mlp.ConvRectifiedLinear(
        layer_name='c2',
        output_channels=64,
        irange=.05,
        kernel_shape=[5, 5],
        pool_shape=[4, 4],
        pool_stride=[5, 4],
        W_lr_scale=0.25,
        # max_kernel_norm=1.9365
    )
    sp0 = mlp.SoftmaxPool(
        detector_layer_dim=16,
        layer_name='sp0',
        pool_size=4,
        sparse_init=512,
    )
    sp1 = mlp.SoftmaxPool(
        detector_layer_dim=16,
        layer_name='sp1',
        pool_size=4,
        sparse_init=512,
    )
    r0 = mlp.RectifiedLinear(
        layer_name='r0',
        dim=512,
        sparse_init=512,
    )
    r1 = mlp.RectifiedLinear(
        layer_name='r1',
        dim=512,
        sparse_init=512,
    )
    s0 = mlp.Sigmoid(
        layer_name='s0',
        dim=500,
        # max_col_norm=1.9365,
        sparse_init=15,
    )
    out = mlp.Softmax(
        n_classes=nb_classes,
        layer_name='output',
        irange=.0,
        # max_col_norm=1.9365,
        # sparse_init=nb_classes,
    )
    epochs = EpochCounter(100)
    layers = [s0, out]
    decay_coeffs = [.00005, .00005, .00005]
    in_space = Conv2DSpace(
        shape=[d.size, d.size],
        num_channels=1,
    )
    vec_space = VectorSpace(d.size**2)
    nn = mlp.MLP(
        layers=layers,
        # input_space=in_space,
        nvis=d.size**2,
        # batch_size=batch_size,
    )
    trainer = sgd.SGD(
        learning_rate=0.01,
        # cost=SumOfCosts(costs=[
        # dropout.Dropout(),
        #     MethodCost(method='cost_from_X'),
        # WeightDecay(decay_coeffs),
        # ]),
        # cost=MethodCost(method='cost_from_X'),
        batch_size=batch_size,
        # train_iteration_mode='even_shuffled_sequential',
        termination_criterion=epochs,
        # learning_rule=learning_rule.Momentum(init_momentum=0.5),
    )
    trainer = bgd.BGD(
        batch_size=10000,
        line_search_mode='exhaustive',
        conjugate=1,
        updates_per_batch=10,
        termination_criterion=epochs,
    )
    lr_adjustor = LinearDecayOverEpoch(
        start=1,
        saturate=10,
        decay_factor=.1,
    )
    momentum_adjustor = learning_rule.MomentumAdjustor(
        final_momentum=.99,
        start=1,
        saturate=10,
    )
    trainer.setup(nn, train_set)
    print 'Learning'
    test_X = vec_space.np_format_as(test_X, nn.get_input_space())
    train_X = vec_space.np_format_as(train_X, nn.get_input_space())
    i = 0
    X = nn.get_input_space().make_theano_batch()
    Y = nn.fprop(X)
    predict = theano.function([X], Y)
    best = -40
    best_iter = -1
    while trainer.continue_learning(nn):
        print '--------------'
        print 'Training Epoch ' + str(i)
        trainer.train(dataset=train_set)
        nn.monitor()
        print 'Evaluating...'
        predictions = convert_categorical(predict(train_X[:2000]))
        score = accuracy_score(convert_categorical(train_y[:2000]),
                               predictions)
        print 'Score on train: ' + str(score)
        predictions = convert_categorical(predict(test_X))
        score = accuracy_score(test_y, predictions)
        print 'Score on test: ' + str(score)
        best, best_iter = (best, best_iter) if best > score else (score, i)
        print 'Current best: ' + str(best) + ' at iter ' + str(best_iter)
        print classification_report(test_y, predictions)
        print 'Adjusting parameters...'
        # momentum_adjustor.on_monitor(nn, valid_set, trainer)
        # lr_adjustor.on_monitor(nn, valid_set, trainer)
        i += 1
        print ' '
Esempio n. 5
0
def cnn_train_tranformer(train_path,
                         test_path,
                         valid_path,
                         save_path,
                         predict_path,
                         num_rows=28,
                         num_cols=28,
                         num_channels=2,
                         batch_size=128,
                         output_channels=[64, 64],
                         kernel_shape=[[12, 12], [5, 5]],
                         pool_shape=[[4, 4], [2, 2]],
                         pool_stride=[[2, 2], [2, 2]],
                         irange=[0.05, 0.05, 0.05],
                         max_kernel_norm=[1.9365, 1.9365],
                         learning_rate=0.001,
                         init_momentum=0.9,
                         weight_decay=[0.0002, 0.0002, 0.0002],
                         n_epoch=1000,
                         image_path=''):

    ds = load_data_transformed(train_path, num_cols, batch_size)
    ds = (np.transpose(ds[0], axes=[0, 3, 1, 2]), ds[1])
    trn = SarDataset(np.array(ds[0]), ds[1])
    ds = load_data_transformed(valid_path, num_cols, batch_size)
    ds = (np.transpose(ds[0], axes=[0, 3, 1, 2]), ds[1])
    vld = SarDataset(np.array(ds[0]), ds[1])
    ds = load_data_transformed(test_path, num_cols, batch_size)
    ds = (np.transpose(ds[0], axes=[0, 3, 1, 2]), ds[1])
    tst = SarDataset(np.array(ds[0]), ds[1])
    #setup the network
    #X = np.random.random([400000,2,41,41])
    #y = np.random.random([400000,1])
    #trn = SarDataset(X,y)
    #X = np.random.random([60000,2,41,41])
    #y = np.random.random([60000,1])
    #tst = SarDataset(X,y)
    #X = np.random.random([60000,2,41,41])
    #y = np.random.random([60000,1])
    #vld = SarDataset(X,y)
    t = time.time()
    layers = []
    for i in range(len(output_channels)):
        layer_name = 'h{}'.format(i + 1)
        convlayer = mlp.ConvRectifiedLinear(layer_name=layer_name,
                                            output_channels=output_channels[i],
                                            irange=irange[i],
                                            kernel_shape=kernel_shape[i],
                                            pool_shape=pool_shape[i],
                                            pool_stride=pool_stride[i],
                                            max_kernel_norm=max_kernel_norm[i])
        layers.append(convlayer)

    output_mlp = mlp.Linear(dim=1, layer_name='output', irange=irange[-1])
    #output_mlp = mlp.linear_mlp_bayesian_cost(dim=1,layer_name='output',irange=irange[-1])
    layers.append(output_mlp)

    trainer = sgd.SGD(
        learning_rate=learning_rate,
        batch_size=batch_size,
        termination_criterion=EpochCounter(n_epoch),
        #termination_criterion = termination_criteria.And([termination_criteria.MonitorBased(channel_name = 'train_objective', prop_decrease=0.01,N=10),EpochCounter(n_epoch)]),
        #cost = dropout.Dropout(),
        cost=cost.SumOfCosts(
            [cost.MethodCost('cost_from_X'),
             WeightDecay(weight_decay)]),
        init_momentum=init_momentum,
        train_iteration_mode='even_shuffled_sequential',
        monitor_iteration_mode='even_shuffled_sequential',
        monitoring_dataset={
            'test': tst,
            'valid': vld,
            'train': trn
        })

    input_space = space.Conv2DSpace(shape=[num_rows, num_cols],
                                    num_channels=num_channels)
    #ann = mlp.MLP(layers,input_space=input_space,batch_size=batch_size)
    watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective',
                                               save_path=predict_path +
                                               save_path)
    #flip = window_flip.WindowAndFlip((num_rows,num_cols),randomize=[tst,vld,trn])
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher])
    print 'Take {}s to compile code'.format(time.time() - t)

    #train the network
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}h'.format((time.time() - t) / 3600)
    utils.sms_notice('Training time:{}'.format((time.time() - t) / 3600))

    return ann
Esempio n. 6
0
def cnn_train(
    train_path,
    test_path,
    valid_path,
    save_path,
    predict_path,
    image_path,
    num_rows=28,
    num_cols=28,
    num_channels=2,
    batch_size=128,
    output_channels=[64, 64],
    kernel_shape=[[12, 12], [5, 5]],
    pool_shape=[[4, 4], [2, 2]],
    pool_stride=[[2, 2], [2, 2]],
    irange=[0.05, 0.05, 0.05],
    max_kernel_norm=[1.9365, 1.9365],
    learning_rate=0.001,
    init_momentum=0.9,
    weight_decay=[0.0002, 0.0002, 0.0002],
    n_epoch=1000,
):
    #load data
    #t = time.time()
    ds = load_data(valid_path, num_rows, num_cols, num_channels)
    vld = SarDataset(np.array(ds[0]), ds[1])
    ds = load_data(train_path, num_rows, num_cols, num_channels)
    trn = SarDataset(np.array(ds[0]), ds[1])
    ds = load_data(test_path, num_rows, num_cols, num_channels)
    tst = SarDataset(np.array(ds[0]), ds[1])
    #load balanced data
    #ds = load_data_balance_under_sample(train_path, num_rows,num_cols, num_channels)
    #trn = SarDataset(np.array(ds[0]),ds[1])
    #ds = load_data_balance(valid_path, num_rows,num_cols, num_channels)
    #vld = SarDataset(np.array(ds[0]),ds[1])
    #ds = load_data_balance(test_path, num_rows,num_cols, num_channels)
    #tst = SarDataset(np.array(ds[0]),ds[1])
    #print 'Take {}s to read data'.format( time.time()-t)
    #use gaussian convlution on the origional image to see if it can concentrate in the center
    #trn,tst,vld = load_data_lidar()

    #mytransformer = transformer.TransformationPipeline(input_space=space.Conv2DSpace(shape=[num_rows,num_cols],num_channels=num_channels),transformations=[transformer.Rotation(),transformer.Flipping()])
    #trn = contestTransformerDataset.TransformerDataset(trn,mytransformer,space_preserving=True)
    #tst = contestTransformerDataset.TransformerDataset(tst,mytransformer,space_preserving=True)
    #vld = contestTransformerDataset.TransformerDataset(vld,mytransformer,space_preserving=True)

    #trn = transformer_dataset.TransformerDataset(trn,mytransformer,space_preserving=True)
    #tst = transformer_dataset.TransformerDataset(tst,mytransformer,space_preserving=True)
    #vld = transformer_dataset.TransformerDataset(vld,mytransformer,space_preserving=True)

    #setup the network
    t = time.time()
    layers = []
    for i in range(len(output_channels)):
        layer_name = 'h{}'.format(i + 1)
        convlayer = mlp.ConvRectifiedLinear(layer_name=layer_name,
                                            output_channels=output_channels[i],
                                            irange=irange[i],
                                            kernel_shape=kernel_shape[i],
                                            pool_shape=pool_shape[i],
                                            pool_stride=pool_stride[i],
                                            max_kernel_norm=max_kernel_norm[i])
        layers.append(convlayer)

    output_mlp = mlp.Linear(dim=1,
                            layer_name='output',
                            irange=irange[-1],
                            use_abs_loss=True)
    #output_mlp = mlp.linear_mlp_ace(dim=1,layer_name='output',irange=irange[-1])
    layers.append(output_mlp)

    #ann = cPickle.load(open('../output/train_with_2010_2l_40_64/original_500/f/f0.pkl'))
    #layers = []
    #for layer in ann.layers:
    #    layer.set_mlp_force(None)
    #    layers.append(layer)

    trainer = sgd.SGD(
        learning_rate=learning_rate,
        batch_size=batch_size,
        termination_criterion=EpochCounter(n_epoch),
        #termination_criterion = termination_criteria.And([termination_criteria.MonitorBased(channel_name = 'train_objective', prop_decrease=0.01,N=10),EpochCounter(n_epoch)]),
        #cost = dropout.Dropout(),
        cost=cost.SumOfCosts(
            [cost.MethodCost('cost_from_X'),
             WeightDecay(weight_decay)]),
        init_momentum=init_momentum,
        train_iteration_mode='even_shuffled_sequential',
        monitor_iteration_mode='even_shuffled_sequential',
        monitoring_dataset={
            'test': tst,
            'valid': vld,
            'train': trn
        })

    input_space = space.Conv2DSpace(shape=[num_rows, num_cols],
                                    num_channels=num_channels)
    #ann = mlp.MLP(layers,input_space=input_space,batch_size=batch_size)
    ann = serial.load(
        '../output/train_with_2010_2l_40_64/original_500/f/f0.pkl')
    ann = monitor.push_monitor(ann, 'stage_0')
    watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective',
                                               save_path=predict_path +
                                               save_path)
    flip = window_flip.WindowAndFlip((num_rows, num_cols),
                                     randomize=[tst, vld, trn])
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher, flip])
    print 'Take {}s to compile code'.format(time.time() - t)

    #train the network
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}h'.format((time.time() - t) / 3600)
    utils.sms_notice('Training time:{}'.format((time.time() - t) / 3600))

    return ann
Esempio n. 7
0
def supervisedLayerwisePRL(trainset, testset):
    '''
	The supervised layerwise training as used in the PRL Paper.
	
	Input
	------
	trainset : A path to an hdf5 file created through h5py.
	testset  : A path to an hdf5 file created through h5py.
	'''
    batch_size = 100

    # Both train and test h5py files are expected to have a 'topo_view' and 'y'
    # datasets side them corresponding to the 'b01c' data format as used in pylearn2
    # and 'y' equivalent to the one hot encoded labels
    trn = HDF5Dataset(filename=trainset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    tst = HDF5Dataset(filename=testset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    '''
	The 1st Convolution and Pooling Layers are added below.
	'''
    h1 = mlp.ConvRectifiedLinear(layer_name='h1',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    layers = [h1, fc, output]

    mdl = mlp.MLP(layers,
                  input_space=Conv2DSpace(shape=(70, 70), num_channels=1))

    trainer = sgd.SGD(
        learning_rate=0.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(
            costs=[Default(),
                   WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best1.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl')
    mdl = push_monitor(mdl, 'k')
    '''
	The 2nd Convolution and Pooling Layers are added below.
	'''
    h2 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h2, fc, output])

    trainer = sgd.SGD(learning_rate=0.002,
                      batch_size=batch_size,
                      learning_rule=learning_rule.RMSProp(),
                      cost=SumOfCosts(costs=[
                          Default(),
                          WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005])
                      ]),
                      train_iteration_mode='shuffled_sequential',
                      monitor_iteration_mode='sequential',
                      termination_criterion=EpochCounter(max_epochs=15),
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld
                      })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best2.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl')
    mdl = push_monitor(mdl, 'l')
    '''
	The 3rd Convolution and Pooling Layers are added below.
	'''
    h3 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=10,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h3, output])

    trainer = sgd.SGD(
        learning_rate=.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(costs=[
            Default(),
            WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005])
        ]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best3.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()
Esempio n. 8
0
print "Trying to load dataset"

trn = FrameDataSet(TRAIN_DATA, TRAIN_LABELS)

tst = FrameDataSet(TEST_DATA, TEST_LABELS)

print "Finished loading dataset"

#Define the network here
in_space = Conv2DSpace(shape=(32, 32), num_channels=3, axes=('c', 0, 1, 'b'))

h2 = mlp.ConvRectifiedLinear(layer_name='h2',
                             output_channels=64,
                             irange=.05,
                             kernel_shape=(5, 5),
                             pool_shape=(4, 4),
                             pool_stride=(2, 2),
                             max_kernel_norm=1.9365)

h3 = mlp.ConvRectifiedLinear(layer_name='h3',
                             output_channels=64,
                             irange=.05,
                             kernel_shape=(5, 5),
                             pool_shape=(4, 4),
                             pool_stride=(2, 2),
                             max_kernel_norm=1.9365)

output = mlp.Softmax(layer_name='y',
                     n_classes=2,
                     irange=.005,