Ejemplo n.º 1
0
    def test_get_all_params(self):
        from lasagne.layers import (InputLayer, DenseLayer, count_params)
        l1 = InputLayer((10, 20))
        l2 = DenseLayer(l1, 30)
        l3 = DenseLayer(l2, 40)

        num_weights = 20 * 30 + 30 * 40
        num_biases = 30 + 40

        assert count_params(l3, regularizable=True) == num_weights
        assert count_params(l3, regularizable=False) == num_biases
        assert count_params(l3) == num_weights + num_biases
Ejemplo n.º 2
0
    def test_get_all_params(self):
        from lasagne.layers import (InputLayer, DenseLayer, count_params)
        l1 = InputLayer((10, 20))
        l2 = DenseLayer(l1, 30)
        l3 = DenseLayer(l2, 40)

        num_weights = 20 * 30 + 30 * 40
        num_biases = 30 + 40

        assert count_params(l3, regularizable=True) == num_weights
        assert count_params(l3, regularizable=False) == num_biases
        assert count_params(l3) == num_weights + num_biases
Ejemplo n.º 3
0
def buildModel():

    #this is our input layer with the inputs (None, dimensions, width, height)
    l_input = layers.InputLayer((None, 3, 64, 64))

    #first convolutional layer, has l_input layer as incoming and is followed by a pooling layer
    l_conv1 = layers.Conv2DLayer(l_input, num_filters=32, filter_size=3, pad='same', nonlinearity=tanh)
    l_pool1 = layers.MaxPool2DLayer(l_conv1, pool_size=2)

    #second convolution (l_pool1 is incoming), let's increase the number of filters
    l_conv2 = layers.Conv2DLayer(l_pool1, num_filters=64, filter_size=3, pad='same', nonlinearity=tanh)
    l_pool2 = layers.MaxPool2DLayer(l_conv2, pool_size=2)

    #third convolution (l_pool2 is incoming), even more filters
    l_conv3 = layers.Conv2DLayer(l_pool2, num_filters=128, filter_size=3, pad='same', nonlinearity=tanh)
    l_pool3 = layers.MaxPool2DLayer(l_conv3, pool_size=2)

    #fourth and final convolution
    l_conv4 = layers.Conv2DLayer(l_pool3, num_filters=256, filter_size=3, pad='same', nonlinearity=tanh)
    l_pool4 = layers.MaxPool2DLayer(l_conv4, pool_size=2)

    #our cnn contains 3 dense layers, one of them is our output layer
    l_dense1 = layers.DenseLayer(l_pool4, num_units=128, nonlinearity=tanh)
    l_dense2 = layers.DenseLayer(l_dense1, num_units=128, nonlinearity=tanh)

    #the output layer has 6 units which is exactly the count of our class labels
    #it has a softmax activation function, its values represent class probabilities
    l_output = layers.DenseLayer(l_dense2, num_units=6, nonlinearity=softmax)

    #let's see how many params our net has
    print ("MODEL HAS"+ str(layers.count_params(l_output))+" PARAMS")

    #we return the layer stack as our network by returning the last layer
    return l_output
Ejemplo n.º 4
0
def buildModel(mtype=1):

    print "BUILDING MODEL TYPE", mtype, "..."

    #default settings (Model 1)
    filters = 64
    first_stride = 2
    last_filter_multiplier = 16

    #specific model type settings (see working notes for details)
    if mtype == 2:
        first_stride = 1
    elif mtype == 3:
        filters = 32
        last_filter_multiplier = 8

    #input layer
    net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))

    #conv layers
    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    if mtype == 2:
        net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
        net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) 

    #dense layers
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))

    #Classification Layer
    if MULTI_LABEL:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
    else:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1))

    print "...DONE!"

    #model stats
    print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
    print "MODEL HAS", l.count_params(net), "PARAMS"

    return net
Ejemplo n.º 5
0
def build_pi_model():

    log.i('BUILDING RASBPERRY PI MODEL...')

    # Random Seed
    lasagne_random.set_rng(cfg.getRandomState())

    # Input layer for images
    net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0]))

    # Convolutinal layer groups
    for i in range(len(cfg.FILTERS)):

        # 3x3 Convolution + Stride
        net = batch_norm(
            l.Conv2DLayer(net,
                          num_filters=cfg.FILTERS[i],
                          filter_size=cfg.KERNEL_SIZES[i],
                          num_groups=cfg.NUM_OF_GROUPS[i],
                          pad='same',
                          stride=2,
                          W=initialization(cfg.NONLINEARITY),
                          nonlinearity=nonlinearity(cfg.NONLINEARITY)))

        log.i(('\tGROUP', i + 1, 'OUT SHAPE:', l.get_output_shape(net)))

    # Fully connected layers + dropout layers
    net = l.DenseLayer(net,
                       cfg.DENSE_UNITS,
                       nonlinearity=nonlinearity(cfg.NONLINEARITY),
                       W=initialization(cfg.NONLINEARITY))
    net = l.DropoutLayer(net, p=cfg.DROPOUT)

    net = l.DenseLayer(net,
                       cfg.DENSE_UNITS,
                       nonlinearity=nonlinearity(cfg.NONLINEARITY),
                       W=initialization(cfg.NONLINEARITY))
    net = l.DropoutLayer(net, p=cfg.DROPOUT)

    # Classification Layer (Softmax)
    net = l.DenseLayer(net,
                       len(cfg.CLASSES),
                       nonlinearity=nonlinearity('softmax'),
                       W=initialization('softmax'))

    log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net)))
    log.i("...DONE!")

    # Model stats
    log.i(("MODEL HAS",
           (sum(hasattr(layer, 'W')
                for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"))
    log.i(("MODEL HAS", l.count_params(net), "PARAMS"))

    return net
Ejemplo n.º 6
0
def buildModel():

    print "BUILDING MODEL TYPE..."

    #default settings
    filters = 32
    first_stride = 2
    last_filter_multiplier = 4

    #input layer
    net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0]))

    #conv layers
    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters     , filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2   , filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4 , filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8 , filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 16 , filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.MaxPool2DLayer(net, pool_size=2)

    #net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 32 , filter_size=7, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    #net = l.MaxPool2DLayer(net, pool_size=2)


    #print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net)

    #dense layers
    net = l.batch_norm(l.DenseLayer(net, 256, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.DropoutLayer(net, DROPOUT)
    net = l.batch_norm(l.DenseLayer(net, 256, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY))
    net = l.DropoutLayer(net, DROPOUT)

    #Classification Layer
    if MULTI_LABEL:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))
    else:
        net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1))

    print "...DONE!"

    #model stats
    print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"
    print "MODEL HAS", l.count_params(net), "PARAMS"

    return net
Ejemplo n.º 7
0
def print_layers(l_out):
    all_layers = layers.get_all_layers(l_out)
    print('this network has %d learnable parameters' % (
        (layers.count_params(l_out))))
    for layer in all_layers:
        if hasattr(layer, 'W') and hasattr(layer, 'b'):
            num_params = np.prod(
                layer.W.get_value().shape) + np.prod(layer.b.get_value().shape)
            print('layer %s has output shape %r with %d parameters' % (
                (layer.name, layer.output_shape, num_params)))
        else:
            print('layer %s has output shape %r' % (
                (layer.name, layer.output_shape)))
Ejemplo n.º 8
0
def print_layers(l_out):
    all_layers = layers.get_all_layers(l_out)
    print('this network has %d learnable parameters' %
          ((layers.count_params(l_out))))
    for layer in all_layers:
        if hasattr(layer, 'W') and hasattr(layer, 'b'):
            num_params = np.prod(layer.W.get_value().shape) + np.prod(
                layer.b.get_value().shape)
            print('layer %s has output shape %r with %d parameters' %
                  ((layer.name, layer.output_shape, num_params)))
        else:
            print('layer %s has output shape %r' %
                  ((layer.name, layer.output_shape)))
Ejemplo n.º 9
0
def train_setup():

    x = T.tensor3('input')
    y = T.matrix('output')

    encoding, decoding = cnn( x, config.input_length, config.output_length, \
                                 config.encoding_length )

    print 'Number of Parameters {0}'.format(count_params(decoding))

    if config.init_model is not None:

        with np.load(config.init_model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]

        set_all_param_values(decoding, param_values)

    # training tasks in sequence

    prediction = get_output(decoding)

    error = squared_error(y, prediction)
    error = error.mean()

    l1_norm = config.l1_weight * regularize_network_params(decoding, l1)
    l2_norm = config.l2_weight * regularize_network_params(decoding, l2)

    total_error = error + l1_norm + l2_norm

    params = get_all_params(decoding, trainable=True)

    updates = adadelta( total_error, params, config.learning_rate, \
                                             config.rho, \
                                             config.eps )

    train_fn = function( [x, y], [error, l1_norm, l2_norm], \
                              updates = updates, \
                              allow_input_downcast = True )

    val_prediction = get_output(decoding, deterministic=True)
    val_error = squared_error(y, val_prediction)
    val_error = val_error.mean()

    val_fn = function([x, y], val_error, allow_input_downcast=True)

    return encoding, decoding, train_fn, val_fn
Ejemplo n.º 10
0
def train_setup():

    x = T.tensor3('input')
    y = T.lvector('output')

    network = cnn(x, config.input_length, config.output_length)

    print 'Number of Parameters {0}'.format(count_params(network))

    if config.init_model is not None:

        with np.load(config.init_model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]

        set_all_param_values(decoding, param_values)

    # training tasks in sequence

    prediction = get_output(network)

    ent = categorical_crossentropy(prediction, y)
    ent = ent.mean()

    l1_norm = config.l1_weight * regularize_network_params(network, l1)
    l2_norm = config.l2_weight * regularize_network_params(network, l2)

    total_error = ent + l1_norm + l2_norm

    params = get_all_params(network, trainable=True)

    updates = adadelta( total_error, params, config.learning_rate, \
                                             config.rho, \
                                             config.eps )

    train_fn = function( [x, y], [ent, l1_norm, l2_norm, prediction], \
                              updates = updates, \
                              allow_input_downcast = True )

    val_prediction = get_output(network, deterministic=True)
    val_ent = categorical_crossentropy(val_prediction, y)
    val_ent = val_ent.mean()

    val_fn = function([x, y], [val_ent, val_prediction],
                      allow_input_downcast=True)

    return network, train_fn, val_fn
Ejemplo n.º 11
0
def main(config=None, init_path='', out_path='', batchsize=64, dataset='C10',
         n=31, growth=40, bottleneck=True, neck_size=None, compression=1,
         dropout=0):
    # network
    assert dataset in ('C10', 'C100', 'SVHN')
    classes = 100 if dataset == 'C100' else 10
    model = DenseNet.cifar_model(
        n=n, growth=growth, bottleneck=bottleneck, neck_size=neck_size,
        compression=compression, dropout=dropout, classes=classes
    )
    # trainer
    if dataset == 'SVHN':
        trainer_cls = SVHN_DenseNetTrainer
    else:
        trainer_cls = CIFAR_DenseNetTrainer
    if init_path:
        trainer = trainer_cls.load_state(model, init_path, batchsize=batchsize)
    else:
        trainer = trainer_cls(model, batchsize=batchsize)
    # dataset
    if not trainer.dataset:
        if dataset == 'C10':
            trainer.dataset = CIFAR10(testsplit=0.1)
        elif dataset == 'C100':
            trainer.dataset = CIFAR100(testsplit=0.1)
        else:
            raise NotImplementedError(
                'The SVHN dataset is not yet implemented.')

    # training the network
    print('Training model ({} parameters) ...'.format(
        count_params(model, trainable=True)))
    trainer.train(config)

    # save the network, the updates and the journal
    if not out_path:
        _, acc = trainer.validate()
        date = datetime.now().strftime('%Y-%m-%d_%H:%M')
        bn_str = 'bottleneck' if bottleneck else 'no_bottleneck'
        tmpl = 'densenet-{}_-_n_{}_-_k_{}_-_{}_-_t_{:.2f}_-_acc_{:.2f}_{}'
        out_path = tmpl.format(dataset, n, growth, bn_str, compression,
                               acc * 100, date)
    trainer.save_state(out_path, resume=True)
def test_setup():

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    print( " with input dimension {0},{1},{2}".format( config.image_height, \
                                                       config. image_width, \
                                                       config.image_channel ) )
    network = cnn_archi( input_var,  \
                         config.image_channel, \
                         config.image_height, config.image_width,\
                         config.output_length )

    print('Number of parameters : {0}'.format(count_params(network)))

    with np.load(config.model_file) as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]

    set_all_param_values(network, param_values)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_classes = T.argmax(test_prediction, axis=1)
    test_loss       = categorical_crossentropy(test_prediction,\
                                                            target_var)

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.eq(test_classes, target_var)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var,  target_var], \
                             [test_loss, test_prediction, test_acc], \
                             allow_input_downcast=True )

    return val_fn
Ejemplo n.º 13
0
def main(options):
    print 'Build and compile network'
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')
    target_data = T.imatrix('target_data')
    target_mask = T.fmatrix('target_mask')

    network_outputs = build_network(
        input_data=input_data,
        input_mask=input_mask,
        num_inputs=options['num_inputs'],
        num_outputs=options['num_outputs'],
        num_inner_units_list=options['num_inner_units_list'],
        num_outer_units_list=options['num_outer_units_list'],
        use_peepholes=options['use_peepholes'],
        use_layer_norm=options['use_layer_norm'],
        learn_init=options['learn_init'],
        grad_clipping=options['grad_clip'])

    network = network_outputs[-1]
    inner_loop_layers = network_outputs[:-1]

    network_params = get_all_params(network, trainable=True)

    print("number of parameters in model: %d" %
          count_params(network, trainable=True))

    if options['reload_model']:
        print('Loading Parameters...')
        [
            pretrain_network_params_val, pretrain_update_params_val,
            pretrain_total_batch_cnt
        ] = pickle.load(open(options['reload_model'], 'rb'))

        print('Applying Parameters...')
        set_model_param_value(network_params, pretrain_network_params_val)
    else:
        pretrain_update_params_val = None
        pretrain_total_batch_cnt = 0

    print 'Build network trainer'
    train_lr = theano.shared(convert_to_floatX(options['lr']))
    training_fn, trainer_params = set_network_trainer(
        input_data=input_data,
        input_mask=input_mask,
        target_data=target_data,
        target_mask=target_mask,
        num_outputs=options['num_outputs'],
        network=network,
        inner_loop_layers=inner_loop_layers,
        updater=options['updater'],
        learning_rate=train_lr,
        grad_max_norm=options['grad_norm'],
        l2_lambda=options['l2_lambda'],
        load_updater_params=pretrain_update_params_val)

    print 'Build network predictor'
    predict_fn = set_network_predictor(input_data=input_data,
                                       input_mask=input_mask,
                                       target_data=target_data,
                                       target_mask=target_mask,
                                       num_outputs=options['num_outputs'],
                                       network=network)

    # evaluation
    if options['reload_model']:
        train_eval_datastream = get_datastream(
            path=options['data_path'],
            norm_path=options['norm_data_path'],
            which_set='train_si84',
            batch_size=options['eval_batch_size'])
        valid_eval_datastream = get_datastream(
            path=options['data_path'],
            norm_path=options['norm_data_path'],
            which_set='test_dev93',
            batch_size=options['eval_batch_size'])
        train_nll, train_bpc, train_fer = network_evaluation(
            predict_fn, train_eval_datastream)
        valid_nll, valid_bpc, valid_fer = network_evaluation(
            predict_fn, valid_eval_datastream)
        print '======================================================='
        print 'Train NLL: ', str(train_nll), ', FER: ', str(train_fer)
        print 'Valid NLL: ', str(valid_nll), ', FER: ', str(valid_fer)
        print '======================================================='

    print 'Load data stream'
    train_datastream = get_datastream(path=options['data_path'],
                                      norm_path=options['norm_data_path'],
                                      which_set='train_si84',
                                      batch_size=options['batch_size'])

    print 'Start training'
    if os.path.exists(options['save_path'] + '_eval_history.npz'):
        evaluation_history = numpy.load(
            options['save_path'] +
            '_eval_history.npz')['eval_history'].tolist()
    else:
        evaluation_history = [[[100.0, 100.0, 1.0], [100.0, 100.0, 1.0]]]

    total_batch_cnt = 0
    start_time = time.time()
    try:
        # for each epoch
        for e_idx in range(options['num_epochs']):
            # for each batch
            for b_idx, data in enumerate(
                    train_datastream.get_epoch_iterator()):
                total_batch_cnt += 1

                if pretrain_total_batch_cnt >= total_batch_cnt:
                    continue

                # get input, target data
                input_data = data[0].astype(floatX)
                input_mask = data[1].astype(floatX)

                # get target data
                target_data = data[2]
                target_mask = data[3].astype(floatX)

                # get output
                train_output = training_fn(input_data, input_mask, target_data,
                                           target_mask)
                train_predict_cost = train_output[0]
                network_grads_norm = train_output[1]
                train_sf_cost0 = train_output[2]
                train_sf_cost1 = train_output[3]
                train_sf_cost2 = train_output[4]

                print('=====================================================')
                print(total_batch_cnt, train_predict_cost, network_grads_norm)
                print(train_sf_cost0, train_sf_cost1, train_sf_cost2)

                if numpy.isnan(train_predict_cost) or numpy.isnan(
                        network_grads_norm):
                    print('update cnt: ', total_batch_cnt)
                    print('NaN detected: ', train_predict_cost,
                          network_grads_norm)
                    raw_input()

                # show intermediate result
                if total_batch_cnt % options[
                        'train_disp_freq'] == 0 and total_batch_cnt != 0:
                    best_idx = numpy.asarray(evaluation_history)[:, 1,
                                                                 2].argmin()
                    print '============================================================================================'
                    print 'Model Name: ', options['save_path'].split('/')[-1]
                    print '============================================================================================'
                    print 'Epoch: ', str(e_idx), ', Update: ', str(
                        total_batch_cnt), ', Time: ', str(time.time() -
                                                          start_time)
                    print '--------------------------------------------------------------------------------------------'
                    print 'Prediction Cost: ', str(train_predict_cost)
                    print 'Gradient Norm: ', str(network_grads_norm)
                    print '--------------------------------------------------------------------------------------------'
                    print 'Learn Rate: ', str(train_lr.get_value())
                    print '--------------------------------------------------------------------------------------------'
                    print 'Train NLL: ', str(
                        evaluation_history[-1][0][0]), ', BPC: ', str(
                            evaluation_history[-1][0][1]), ', FER: ', str(
                                evaluation_history[-1][0][2])
                    print 'Valid NLL: ', str(
                        evaluation_history[-1][1][0]), ', BPC: ', str(
                            evaluation_history[-1][1][1]), ', FER: ', str(
                                evaluation_history[-1][1][2])
                    print '--------------------------------------------------------------------------------------------'
                    print 'Best NLL: ', str(
                        evaluation_history[best_idx][1][0]), ', BPC: ', str(
                            evaluation_history[best_idx][1]
                            [1]), ', FER: ', str(
                                evaluation_history[best_idx][1][2])
                    start_time = time.time()

                # # evaluation
                # if total_batch_cnt%options['train_eval_freq'] == 0 and total_batch_cnt!=0:
                #     train_eval_datastream = get_datastream(path=options['data_path'],
                #                                            norm_path=options['norm_data_path'],
                #                                            which_set='train_si84',
                #                                            batch_size=options['eval_batch_size'])
                #     valid_eval_datastream = get_datastream(path=options['data_path'],
                #                                            norm_path=options['norm_data_path'],
                #                                            which_set='test_dev93',
                #                                            batch_size=options['eval_batch_size'])
                #     train_nll, train_bpc, train_fer = network_evaluation(predict_fn,
                #                                                          train_eval_datastream)
                #     valid_nll, valid_bpc, valid_fer = network_evaluation(predict_fn,
                #                                                          valid_eval_datastream)
                #
                #     # check over-fitting
                #     if valid_fer<numpy.asarray(evaluation_history)[:, 1, 2].min():
                #         best_network_params_vals = get_model_param_values(network_params)
                #         pickle.dump(best_network_params_vals,
                #                     open(options['save_path'] + '_best_model.pkl', 'wb'))
                #
                #     # save results
                #     evaluation_history.append([[train_nll, train_bpc, train_fer],
                #                                [valid_nll, valid_bpc, valid_fer]])
                #     numpy.savez(options['save_path'] + '_eval_history',
                #                 eval_history=evaluation_history)

                # save network
                if total_batch_cnt % options[
                        'train_save_freq'] == 0 and total_batch_cnt != 0:
                    cur_network_params_val = get_model_param_values(
                        network_params)
                    cur_trainer_params_val = get_update_params_values(
                        trainer_params)
                    cur_total_batch_cnt = total_batch_cnt
                    pickle.dump([
                        cur_network_params_val, cur_trainer_params_val,
                        cur_total_batch_cnt
                    ],
                                open(
                                    options['save_path'] +
                                    str(total_batch_cnt).zfill(10) +
                                    '_model.pkl', 'wb'))

    except KeyboardInterrupt:
        print 'Training Interrupted'
        cur_network_params_val = get_model_param_values(network_params)
        cur_trainer_params_val = get_update_params_values(trainer_params)
        cur_total_batch_cnt = total_batch_cnt
        pickle.dump([
            cur_network_params_val, cur_trainer_params_val, cur_total_batch_cnt
        ], open(options['save_path'] + '_last_model.pkl', 'wb'))
def main():
    # BN parameters
    batch_size = 100
    logger_lip.info("batch_size = %s", batch_size)
    # alpha is the exponential moving average factor
    alpha = .1
    logger_lip.info("alpha = %s", alpha)
    epsilon = 1e-4
    logger_lip.info("epsilon = %s", epsilon)

    # BinaryOut
    activation = binary_net.binary_tanh_unit
    print("activation = binary_tanh_unit")
    stochastic = True
    print("stochastic = " + str(stochastic))
    # (-H,+H) are the two binary values
    #H = "Glorot"
    H = 1.
    print("H = " + str(H))
    # W_LR_scale = 1.
    W_LR_scale = "Glorot"  # "Glorot" means we are using the coefficients from Glorot's paper
    print("W_LR_scale = " + str(W_LR_scale))

    # Training parameters
    num_epochs = 50
    logger_lip.info("num_epochs = %s", num_epochs)

    # Decaying LR
    LR_start = 0.1
    logger_lip.info("LR_start = %s", LR_start)
    LR_fin = 0.0000003
    logger_lip.info("LR_fin = %s", LR_fin)
    # LR_decay = (LR_fin / LR_start) ** (1. / num_epochs)
    LR_decay = 0.5  # sqrt(0.5)
    logger_lip.info("LR_decay = %s", LR_decay)
    # BTW, LR decay might good for the BN moving average...

    shuffle_parts = 1
    logger_lip.info("shuffle_parts = %s", shuffle_parts)
    if binary: oneHot = True
    else: oneHot = False

    ##############################################
    network_type = "google"
    viseme = False  # will set nbClasses and store path   vis: 6.498.828   phn: 7.176.231

    if viseme:
        nbClasses = 12
    else:
        nbClasses = 39

    # get the database
    # If it's small (lipspeakers) -> generate X_train, y_train etc here
    # otherwise we need to load and generate each speaker seperately in the training loop
    dataset = "TCDTIMIT"
    root_dir = os.path.join(
        os.path.expanduser('~/TCDTIMIT/lipreading/' + dataset))
    results_dir = root_dir + "/results/CNN_binaryNet"
    if not os.path.exists(results_dir): os.makedirs(results_dir)
    if viseme:
        database_binaryDir = root_dir + '/binaryViseme'
    else:
        database_binaryDir = root_dir + '/binary'
    datasetType = "lipspeakers"  # "lipspeakers" #"volunteers" #"volunteers" #    lipspeakers or volunteers"
    ##############################################

    if datasetType == "lipspeakers":
        loadPerSpeaker = False  # only lipspeakers small enough to fit in CPU RAM, generate X_train etc here
        storeProcessed = True
        processedDir = database_binaryDir + "_allLipspeakersProcessed"

        # TODO: prepLip_all can be used to generate pkl containing all the lipspeaker data. Not sure if this stil works, so use with care!
        if not oneHot: pkl_path = processedDir + os.sep + datasetType + ".pkl"
        else:
            pkl_path = processedDir + os.sep + datasetType + "_oneHot" + ".pkl"
        if not os.path.exists(pkl_path):
            logger_lip.info("dataset not yet processed. Processing...")
            code.lipreading.preprocessLipreading.prepLip_all(
                data_path=database_binaryDir,
                store_path=pkl_path,
                trainFraction=0.7,
                validFraction=0.1,
                testFraction=0.2,
                nbClasses=nbClasses,
                onehot=oneHot,
                type=datasetType,
                verbose=True)
        datasetFiles = code.lipreading.general_tools.unpickle(pkl_path)
        X_train, y_train, X_val, y_val, X_test, y_test = datasetFiles
        dtypeX = 'float32'
        dtypeY = 'float32'
        X_train = X_train.astype(dtypeX)
        y_train = y_train.astype(dtypeY)
        X_val = X_val.astype(dtypeX)
        y_val = y_val.astype(dtypeY)
        X_test = X_test.astype(dtypeX)
        y_test = y_test.astype(dtypeY)
        datasetFiles = [X_train, y_train, X_val, y_val, X_test, y_test]

        # These files have been generated with datasetToPkl_fromCombined, so that the train/val/test set are the same as for combinedSR.
        # X_train, y_train = unpickle(os.path.expanduser("~/TCDTIMIT/lipreading/TCDTIMIT/binary/allLipspeakersTrain.pkl"))
        # X_val, y_val = unpickle(os.path.expanduser("~/TCDTIMIT/lipreading/TCDTIMIT/binary/allLipspeakersVal.pkl"))
        # X_test, y_test = unpickle(os.path.expanduser("~/TCDTIMIT/lipreading/TCDTIMIT/binary/allLipspeakersTest.pkl"))
        # datasetFiles = [X_train, y_train, X_val, y_val, X_test, y_test]

    else:  # we need to load and preprocess each speaker before we evaluate, because dataset is too large and doesn't fit in CPU RAM
        loadPerSpeaker = True
        storeProcessed = True  # if you have about 10GB hdd space, you can increase the speed by not reprocessing it each iteration
        processedDir = database_binaryDir + "_finalProcessed"
        # you can just run this program and it will generate the files the first time it encounters them, or generate them manually with datasetToPkl.py

        # just get the names
        testVolunteerNumbers = [
            "13F", "15F", "21M", "23M", "24M", "25M", "28M", "29M", "30F",
            "31F", "34M", "36F", "37F", "43F", "47M", "51F", "54M"
        ]
        testVolunteers = [
            str(testNumber) + ".pkl" for testNumber in testVolunteerNumbers
        ]
        lipspeakers = ["Lipspkr1.pkl", "Lipspkr2.pkl", "Lipspkr3.pkl"]
        allSpeakers = [
            f for f in os.listdir(database_binaryDir)
            if os.path.isfile(os.path.join(database_binaryDir, f))
            and os.path.splitext(f)[1] == ".pkl"
        ]
        trainVolunteers = [
            f for f in allSpeakers
            if not (f in testVolunteers or f in lipspeakers)
        ]
        trainVolunteers = [vol for vol in trainVolunteers if vol is not None]

        if datasetType == "combined":
            trainingSpeakerFiles = trainVolunteers + lipspeakers
            testSpeakerFiles = testVolunteers
        elif datasetType == "volunteers":
            trainingSpeakerFiles = trainVolunteers
            testSpeakerFiles = testVolunteers
        else:
            raise Exception("invalid dataset entered")
        datasetFiles = [trainingSpeakerFiles, testSpeakerFiles]

    model_name = datasetType + "_" + network_type + "_" + ("viseme" if viseme else "phoneme") + str(nbClasses) \
        + ("_binary" if binary else "")
    model_save_name = os.path.join(results_dir, model_name)

    # log file
    logFile = results_dir + os.sep + model_name + '.log'
    # if os.path.exists(logFile):
    #     fh = logging.FileHandler(logFileT)  # append to existing log
    # else:
    fh = logging.FileHandler(logFile, 'w')  # create new logFile
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)
    logger_lip.addHandler(fh)

    logger_lip.info('Building the CNN...')

    # Prepare Theano variables for inputs and targets
    inputs = T.tensor4('inputs')
    if oneHot:
        targets = T.matrix('targets')
    else:
        targets = T.ivector('targets')

    LR = T.scalar('LR', dtype=theano.config.floatX)

    # get the network structure
    l_out = code.lipreading.buildNetworks.build_network_google_binary(
        activation, alpha, epsilon, inputs, binary, stochastic, H,
        W_LR_scale)  # 7176231 params
    for layer in L.get_all_layers(l_out):
        print(layer)

    # print het amount of network parameters
    logger_lip.info("Using the %s network", network_type)
    logger_lip.info("The number of parameters of this network: %s",
                    L.count_params(l_out))

    logger_lip.info("loading %s", model_save_name + '.npz')
    load_model(model_save_name + '.npz', l_out)

    logger_lip.info("* COMPILING FUNCTIONS...")
    train_output = lasagne.layers.get_output(l_out, deterministic=False)

    # squared hinge loss
    loss = T.mean(T.sqr(T.maximum(0., 1. - targets * train_output)))

    # W updates
    W = lasagne.layers.get_all_params(l_out, binary=True)
    W_grads = binary_net.compute_grads(loss, l_out)
    updates = lasagne.updates.adam(loss_or_grads=W_grads,
                                   params=W,
                                   learning_rate=LR)
    updates = binary_net.clipping_scaling(updates, l_out)

    # other parameters updates
    params = lasagne.layers.get_all_params(l_out, trainable=True, binary=False)
    updates = OrderedDict(updates.items() + lasagne.updates.adam(
        loss_or_grads=loss, params=params, learning_rate=LR).items())

    test_output = lasagne.layers.get_output(l_out, deterministic=True)
    out_fn = theano.function([inputs], test_output)

    test_loss = T.mean(T.sqr(T.maximum(0., 1. - targets * test_output)))
    test_acc = T.mean(T.eq(T.argmax(test_output, axis=1),
                           T.argmax(targets, axis=1)),
                      dtype=theano.config.floatX)
    k = 3
    test_top3_acc = T.zeros((1, ))
    topk_acc_fn = theano.function([], test_top3_acc)
    val_fn = theano.function([inputs, targets],
                             [test_loss, test_acc, test_top3_acc])

    if debug:
        nb = 3
        debugX = X_train[0:nb]
        debugY = y_train[0:nb]
        out = out_fn(debugX)
        val = val_fn(debugX, debugY)
        import pdb
        pdb.set_trace()

        # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
    # and returning the corresponding training loss:
    train_fn = theano.function([inputs, targets, LR], loss, updates=updates)

    logger_lip.info('Training...')
    import code.lipreading.train_lipreading
    code.lipreading.train_lipreading.train(
        train_fn=train_fn,
        val_fn=val_fn,
        out_fn=out_fn,
        topk_acc_fn=topk_acc_fn,
        k=k,
        network_output_layer=l_out,
        batch_size=batch_size,
        LR_start=LR_start,
        LR_decay=LR_decay,
        num_epochs=num_epochs,
        dataset=datasetFiles,
        database_binaryDir=database_binaryDir,
        storeProcessed=storeProcessed,
        processedDir=processedDir,
        loadPerSpeaker=loadPerSpeaker,
        justTest=justTest,
        save_name=model_save_name,
        shuffleEnabled=True)
Ejemplo n.º 15
0
    # print 'extract input var \n'
    # X = get_all_layers(network)[0].input_var
    #####################################################
#    #  VGG run
#    net = build_model_vanila_CNN(X=X, channel= n_ch, stride=1  )
#    network = net['prob']
    #####################################################
    # FULLCCN run
    network = build_CNN_nopool(in_shape = (None, n_ch,96,96),
                               num_filter = [64,64,128,128,128,128],
                               fil_size   = [ 3,  1, 3,  3,  3, 12],
                               strides =    [ 1,  1, 2,  2,  2,  1],
                               num_out = 30,
                               nlin_func=rectify,
                               in_var=X)
    print "num_params", count_params(network)
    #####################################################


    train_fn, val_fn = build_update_functions(train_set_x=train_set_x, train_set_y=train_set_y,
                                              valid_set_x=valid_set_x,valid_set_y= valid_set_y,
                                              y= y,X= X,network=network,
                                              val_MASK=val_MASK, train_MASK=train_MASK,
                                              learning_rate=learn_rate,batch_size=batch_size,l2_reg=l2)
    print 'compile done successfully \n'

    # call early_stop_train function
    early_stop_train(train_set_x, train_set_y,
                     valid_set_x, valid_set_y,
                     network, train_fn, val_fn,
                     batch_size=batch_size)
Ejemplo n.º 16
0
def main():

    for batch_size, network_type in zip(batch_sizes, networks):
        print(batch_size, network_type)
        # BN parameters
        # batch_size = 100
        logger_lip.info("batch_size = %s", batch_size)
        # alpha is the exponential moving average factor
        alpha = .1
        logger_lip.info("alpha = %s", alpha)
        epsilon = 1e-4
        logger_lip.info("epsilon = %s", epsilon)

        # activation
        activation = T.nnet.relu
        logger_lip.info("activation = T.nnet.relu")

        # Training parameters
        num_epochs = 20
        logger_lip.info("num_epochs = %s", num_epochs)

        # Decaying LR
        LR_start = 0.001
        logger_lip.info("LR_start = %s", LR_start)
        LR_fin = 0.0000003
        logger_lip.info("LR_fin = %s", LR_fin)
        #LR_decay = (LR_fin / LR_start) ** (1. / num_epochs)
        LR_decay = 0.5  # sqrt(0.5)
        logger_lip.info("LR_decay = %s", LR_decay)
        # BTW, LR decay might good for the BN moving average...

        shuffle_parts = 1
        logger_lip.info("shuffle_parts = %s", shuffle_parts)
        oneHot = False

        ##############################################

        if viseme: nbClasses = 12
        else: nbClasses = 39

        # get the database
        # If it's small (lipspeakers) -> generate X_train, y_train etc here
        # otherwise we need to load and generate each speaker seperately in the training loop
        dataset = "TCDTIMIT"
        root_dir = os.path.join(
            os.path.expanduser('~/TCDTIMIT/lipreading/' + dataset))
        results_dir = root_dir + "/results/CNN"
        if not os.path.exists(results_dir): os.makedirs(results_dir)
        if viseme: database_binaryDir = root_dir + '/binaryViseme'
        else: database_binaryDir = root_dir + '/binary'
        datasetType = "lipspeakers"  #"volunteers" #    lipspeakers or volunteers"
        ##############################################

        if datasetType == "lipspeakers":
            loadPerSpeaker = False  # only lipspeakers small enough to fit in CPU RAM, generate X_train etc here
            storeProcessed = True
            processedDir = database_binaryDir + "_allLipspeakersProcessed"
            # pkl_path =  processedDir + os.sep + datasetType + ".pkl"
            # if not os.path.exists(pkl_path):
            #     logger_lip.info("dataset not yet processed. Processing...")
            #     preprocessLipreading.prepLip_all(data_path=database_binaryDir, store_path=pkl_path, trainFraction=0.7, validFraction=0.1,
            #                 testFraction=0.2,
            #                 nbClasses=nbClasses, onehot=oneHot, type=datasetType, verbose=True)
            #datasetFiles = general_tools.unpickle(pkl_path)

            # if this doesn't succeed, you probably have to generate the files with datasetToPkl_fromCombined.py
            X_train, y_train = unpickle(
                os.path.expanduser(
                    "~/TCDTIMIT/lipreading/TCDTIMIT/binary/allLipspeakersTrain.pkl"
                ))
            X_val, y_val = unpickle(
                os.path.expanduser(
                    "~/TCDTIMIT/lipreading/TCDTIMIT/binary/allLipspeakersVal.pkl"
                ))
            X_test, y_test = unpickle(
                os.path.expanduser(
                    "~/TCDTIMIT/lipreading/TCDTIMIT/binary/allLipspeakersTest.pkl"
                ))
            datasetFiles = [X_train, y_train, X_val, y_val, X_test, y_test]

        else:  # we need to load and preprocess each speaker before we evaluate, because dataset is too large and doesn't fit in CPU RAM
            loadPerSpeaker = True
            storeProcessed = True  #if you have about 10GB hdd space, you can increase the speed by not reprocessing it each iteration
            processedDir = database_binaryDir + "_finalProcessed"
            # you can just run this program and it will generate the files the first time it encounters them, or generate them manually with datasetToPkl.py

            # just get the names
            testVolunteerNumbers = [
                "13F", "15F", "21M", "23M", "24M", "25M", "28M", "29M", "30F",
                "31F", "34M", "36F", "37F", "43F", "47M", "51F", "54M"
            ]
            testVolunteers = [
                str(testNumber) + ".pkl" for testNumber in testVolunteerNumbers
            ]
            lipspeakers = ["Lipspkr1.pkl", "Lipspkr2.pkl", "Lipspkr3.pkl"]
            allSpeakers = [
                f for f in os.listdir(database_binaryDir)
                if os.path.isfile(os.path.join(database_binaryDir, f))
                and os.path.splitext(f)[1] == ".pkl"
            ]
            trainVolunteers = [
                f for f in allSpeakers
                if not (f in testVolunteers or f in lipspeakers)
            ]
            trainVolunteers = [
                vol for vol in trainVolunteers if vol is not None
            ]

            if datasetType == "combined":
                trainingSpeakerFiles = trainVolunteers + lipspeakers
                testSpeakerFiles = testVolunteers
            elif datasetType == "volunteers":
                trainingSpeakerFiles = trainVolunteers
                testSpeakerFiles = testVolunteers
            else:
                raise Exception("invalid dataset entered")
            datasetFiles = [trainingSpeakerFiles, testSpeakerFiles]

        model_name = datasetType + "_" + network_type + "_" + (
            "viseme" if viseme else "phoneme") + str(nbClasses)
        model_save_name = os.path.join(results_dir, model_name)

        # log file
        logFile = results_dir + os.sep + model_name + '.log'
        # if os.path.exists(logFile):
        #     fh = logging.FileHandler(logFileT)  # append to existing log
        # else:
        fh = logging.FileHandler(logFile, 'w')  # create new logFile
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)
        logger_lip.addHandler(fh)

        logger_lip.info('Building the CNN...')

        # Prepare Theano variables for inputs and targets
        inputs = T.tensor4('inputs')
        if oneHot: targets = T.matrix('targets')
        else: targets = T.ivector('targets')

        LR = T.scalar('LR', dtype=theano.config.floatX)

        # get the network structure
        if network_type == "google":
            cnnDict, l_out = buildNetworks.build_network_google(
                activation, alpha, epsilon, inputs,
                nbClasses)  # 7.176.231 params
        elif network_type == "cifar10":
            cnn, l_out = buildNetworks.build_network_cifar10(
                input=inputs,
                nbClasses=nbClasses,
                activation=activation,
                alpha=alpha,
                epsilon=epsilon)
        elif network_type == "cifar10_v2":
            cnn, l_out = buildNetworks.build_network_cifar10_v2(
                input=inputs, nbClasses=nbClasses)
        elif network_type == "resnet50":
            cnn, l_out = buildNetworks.build_network_resnet50(
                inputs, nbClasses)

        # print het amount of network parameters
        logger_lip.info("Using the %s network", network_type)
        logger_lip.info("The number of parameters of this network: %s",
                        L.count_params(l_out))

        logger_lip.info("loading %s", model_save_name + '.npz')
        load_model(model_save_name + '.npz', l_out)

        # a = '/home/matthijs/TCDTIMIT/lipreading/TCDTIMIT/results/thirty.npz'
        # logger_lip.info("loading %s", a)
        # load_model(a, l_out)

        logger_lip.info("* COMPILING FUNCTIONS...")
        # for validation: disable dropout etc layers -> deterministic
        test_network_output = L.get_output(l_out, deterministic=True)
        test_acc = T.mean(T.eq(T.argmax(test_network_output, axis=1), targets),
                          dtype=theano.config.floatX)  # T.zeros((1,))
        test_loss = LO.categorical_crossentropy(test_network_output, targets)
        test_loss = test_loss.mean()

        # Top k accuracy
        k = 3
        # topk_acc = T.mean( T.any(T.eq(T.argsort(test_network_output, axis=1)[:, -k:], targets.dimshuffle(0, 'x')), axis=1),
        #     dtype=theano.config.floatX)
        topk_acc = T.mean(
            lasagne.objectives.categorical_accuracy(test_network_output,
                                                    targets.flatten(),
                                                    top_k=k))

        topk_acc_fn = theano.function([inputs, targets], topk_acc)

        val_fn = theano.function([inputs, targets],
                                 [test_loss, test_acc, topk_acc])

        # For training, use nondeterministic output
        network_output = L.get_output(l_out, deterministic=False)
        out_fn = theano.function([inputs], network_output)
        # cross-entropy loss
        loss = LO.categorical_crossentropy(network_output, targets)
        loss = loss.mean()
        # # Also add weight decay to the cost function
        weight_decay = 1e-5
        weightsl2 = lasagne.regularization.regularize_network_params(
            l_out, lasagne.regularization.l2)
        loss += weight_decay * weightsl2

        # acc
        err = T.mean(T.eq(T.argmax(network_output, axis=1), targets),
                     dtype=theano.config.floatX)

        # set all params to trainable
        params = L.get_all_params(l_out, trainable=True)
        updates = lasagne.updates.adam(loss_or_grads=loss,
                                       params=params,
                                       learning_rate=LR)
        # Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
        # and returning the corresponding training loss:
        train_fn = theano.function([inputs, targets, LR],
                                   loss,
                                   updates=updates)

        logger_lip.info('Training...')

        train_lipreading.train(train_fn=train_fn,
                               val_fn=val_fn,
                               out_fn=out_fn,
                               topk_acc_fn=topk_acc_fn,
                               k=k,
                               network_output_layer=l_out,
                               batch_size=batch_size,
                               LR_start=LR_start,
                               LR_decay=LR_decay,
                               num_epochs=num_epochs,
                               dataset=datasetFiles,
                               database_binaryDir=database_binaryDir,
                               storeProcessed=storeProcessed,
                               processedDir=processedDir,
                               loadPerSpeaker=loadPerSpeaker,
                               justTest=justTest,
                               save_name=model_save_name,
                               shuffleEnabled=True)
Ejemplo n.º 17
0
def print_layers(network):
    for layer in get_all_layers(network):
        print str(type(layer)).split(".")[-1][:-2] + ': ' + str(
            layer.output_shape)
    print count_params(layer)
Ejemplo n.º 18
0
def train_model(window_size, max_epochs, patience):
    root_dir = join('data', 'nets')
    # the file from which to load pre-trained weights
    #init_file = join(root_dir,
    #                 'subj%d_weights_deep_nocsp_wide.pickle' % (
    #                     4))
    #init_file = join(root_dir,
    #                 'weights_super_deeper.pickle')
    init_file = None
    # the file to which the learned weights will be written
    weights_file = join(root_dir, 'weights.pickle')
    temp_weights_file = join(root_dir, 'epoch_%d.pickle')
    train_data, train_events = [], []
    valid_data, valid_events = [], []
    for subj_id in range(1, 13):
        print('loading time series for subject %d...' % (subj_id))
        subj_data_list, subj_events_list = utils.load_subject_train(subj_id)
        print('  creating train and validation sets...')
        subj_train_data, subj_train_events, subj_valid_data, subj_valid_events = \
            utils.split_train_test_data(subj_data_list, subj_events_list,
                                        val_size=2, rand=False)
        train_data += subj_train_data
        train_events += subj_train_events
        valid_data += subj_valid_data
        valid_events += subj_valid_events

    print('using %d time series for training' % (len(train_data)))
    print('using %d time series for validation' % (len(valid_data)))

    print('creating fixed-size time-windows of size %d' % (window_size))
    # the training windows should be in random order
    train_slices = batching.get_permuted_windows(train_data,
                                                 window_size,
                                                 rand=True)
    valid_slices = batching.get_permuted_windows(valid_data,
                                                 window_size,
                                                 rand=True)
    print('there are %d windows for training' % (len(train_slices)))
    print('there are %d windows for validation' % (len(valid_slices)))

    #batch_size = 64
    batch_size = 512
    num_channels = 32
    num_actions = 6
    train_data, valid_data = \
        utils.preprocess(train_data, valid_data)

    print('building model %s...' %
          (sys.modules[build_model.__module__].__name__))
    l_out = build_model(None, num_channels, window_size, num_actions)

    all_layers = layers.get_all_layers(l_out)
    print('this network has %d learnable parameters' %
          (layers.count_params(l_out)))
    for layer in all_layers:
        print('Layer %s has output shape %r' %
              (layer.name, layer.output_shape))

    if init_file is not None:
        print('loading model weights from %s' % (init_file))
        with open(init_file, 'rb') as ifile:
            src_layers = pickle.load(ifile)
        dst_layers = layers.get_all_params(l_out)
        for i, (src_weights,
                dst_layer) in enumerate(zip(src_layers, dst_layers)):
            print('loading pretrained weights for %s' % (dst_layer.name))
            dst_layer.set_value(src_weights)
    else:
        print('all layers will be trained from random initialization')

    #1r = theano.shared(np.cast['float32'](0.001))
    lr = theano.shared(np.cast['float32'](0.01))
    mntm = 0.9
    print('compiling theano functions...')
    train_iter = iter_funcs.create_iter_funcs_train(lr, mntm, l_out)
    valid_iter = iter_funcs.create_iter_funcs_valid(l_out)

    best_weights = None
    best_valid_loss = np.inf
    best_epoch = 0
    print('starting training for all subjects at %s' %
          (utils.get_current_time()))
    try:
        for epoch in range(max_epochs):
            print('epoch: %d' % (epoch))
            train_losses, training_outputs, training_inputs = [], [], []
            num_batches = (len(train_slices) + batch_size - 1) / batch_size
            t_train_start = time()
            for i, (Xb, yb) in enumerate(
                    batching.batch_iterator(batch_size,
                                            train_slices,
                                            train_data,
                                            train_events,
                                            window_norm=False)):
                t_batch_start = time()
                # hack for faster debugging
                #if i < 70000:
                #    continue
                train_loss, train_output = \
                    train_iter(Xb, yb)
                if np.isnan(train_loss):
                    print('nan loss encountered in minibatch %d' % (i))
                    continue

                train_losses.append(train_loss)
                assert len(yb) == len(train_output)
                for input, output in zip(yb, train_output):
                    training_inputs.append(input)
                    training_outputs.append(output)

                batch_duration = time() - t_batch_start
                if i % 10 == 0:
                    eta = batch_duration * (num_batches - i)
                    m, s = divmod(eta, 60)
                    h, m = divmod(m, 60)
                    print('  training...  (ETA = %d:%02d:%02d)\r' % (h, m, s)),
                    sys.stdout.flush()

            avg_train_loss = np.mean(train_losses)

            training_inputs = np.vstack(training_inputs)
            training_outputs = np.vstack(training_outputs)
            train_roc = roc_auc_score(training_inputs, training_outputs)

            train_duration = time() - t_train_start
            print('')
            print('    train loss: %.6f' % (avg_train_loss))
            print('    train roc:  %.6f' % (train_roc))
            print('    duration:   %.2f s' % (train_duration))

            valid_losses, valid_outputs, valid_inputs = [], [], []
            num_batches = (len(valid_slices) + batch_size - 1) / batch_size
            t_valid_start = time()
            for i, (Xb, yb) in enumerate(
                    batching.batch_iterator(batch_size,
                                            valid_slices,
                                            valid_data,
                                            valid_events,
                                            window_norm=False)):
                t_batch_start = time()
                valid_loss, valid_output = \
                    valid_iter(Xb, yb)
                if np.isnan(valid_loss):
                    print('nan loss encountered in minibatch %d' % (i))
                    continue
                valid_losses.append(valid_loss)
                assert len(yb) == len(valid_output)
                for input, output in zip(yb, valid_output):
                    valid_inputs.append(input)
                    valid_outputs.append(output)

                batch_duration = time() - t_batch_start
                if i % 10 == 0:
                    eta = batch_duration * (num_batches - i)
                    m, s = divmod(eta, 60)
                    h, m = divmod(m, 60)
                    print('  validation...  (ETA = %d:%02d:%02d)\r' %
                          (h, m, s)),
                    sys.stdout.flush()

            # allow training without validation
            if valid_losses:
                avg_valid_loss = np.mean(valid_losses)
                valid_inputs = np.vstack(valid_inputs)
                valid_outputs = np.vstack(valid_outputs)
                valid_roc = roc_auc_score(valid_inputs, valid_outputs)
                valid_duration = time() - t_valid_start
                print('')
                print('    valid loss: %.6f' % (avg_valid_loss))
                print('    valid roc:  %.6f' % (valid_roc))
                print('    duration:   %.2f s' % (valid_duration))
            else:
                print('    no validation...')

            # if we are not doing validation we always want the latest weights
            if not valid_losses:
                best_epoch = epoch
                model_train_loss = avg_train_loss
                model_train_roc = train_roc
                model_valid_roc = -1.
                best_valid_loss = -1.
                best_weights = layers.get_all_param_values(l_out)
            elif avg_valid_loss < best_valid_loss:
                best_epoch = epoch
                model_train_roc = train_roc
                model_valid_roc = valid_roc
                model_train_loss = avg_train_loss
                best_valid_loss = avg_valid_loss
                best_weights = layers.get_all_param_values(l_out)

                temp_file = temp_weights_file % (epoch)
                print('saving temporary best weights to %s' % (temp_file))
                with open(temp_file, 'wb') as ofile:
                    pickle.dump(best_weights,
                                ofile,
                                protocol=pickle.HIGHEST_PROTOCOL)

            if epoch > best_epoch + patience:
                break
                best_epoch = epoch
                new_lr = 0.5 * lr.get_value()
                lr.set_value(np.cast['float32'](new_lr))
                print('setting learning rate to %.6f' % (new_lr))

    except KeyboardInterrupt:
        print('caught Ctrl-C, stopping training...')

    with open(weights_file, 'wb') as ofile:
        print('saving best weights to %s' % (weights_file))
        pickle.dump(best_weights, ofile, protocol=pickle.HIGHEST_PROTOCOL)
    print('finished training for all subjects at %s' %
          (utils.get_current_time()))

    return model_train_loss, best_valid_loss, model_train_roc, model_valid_roc
Ejemplo n.º 19
0
    def __init__(self, config, use_noise=False):

        self.width = config['width']
        self.height = config['height']
        self.channels = config['channels']
        self.actions = config['actions']
        self.history = config['history']

        gru_units = config['gru_units']
        att_units = config['att_units']

        l_action = L.InputLayer((None, ))

        l_input, l_cnn = build_cnn(config, use_noise)

        l_gru = GRULayer([l_action, l_cnn],
                         num_steps=self.history,
                         num_units=gru_units,
                         att_units=att_units)

        l_attention = L.InputLayer((None, l_gru.num_pixel))
        l_hidden1 = L.InputLayer((None, gru_units))
        l_hidden2 = L.InputLayer((None, gru_units))

        l_gru_step = l_gru.get_step_layer(
            [l_action, l_cnn, l_attention, l_hidden1, l_hidden2])

        l_out_step = L.DenseLayer(l_gru_step,
                                  num_units=self.actions,
                                  nonlinearity=lasagne.nonlinearities.softmax)

        l_out_batch = L.DenseLayer(l_gru,
                                   num_units=l_out_step.num_units,
                                   nonlinearity=l_out_step.nonlinearity,
                                   W=l_out_step.W,
                                   b=l_out_step.b)

        self.l_attention = l_attention
        self.l_action = l_action
        self.l_input = l_input

        self.num_params_all = L.count_params(l_out_batch, trainable=True)
        self.params_all = L.get_all_params(l_out_batch, trainable=True)
        #shapes_all = [p.get_value(borrow=True).shape for p in params_all]

        self.num_params_cnn = L.count_params(l_cnn, trainable=True)
        self.params_cnn = L.get_all_params(l_cnn, trainable=True)
        #shapes_cnn = [p.get_value(borrow=True).shape for p in params_cnn]

        self.num_params_gru = self.num_params_all - self.num_params_cnn
        self.params_gru = [
            p for p in self.params_all if p not in self.params_cnn
        ]
        #shapes_gru = [p.get_value(borrow=True).shape for p in params_gru]

        print 'Number of policy parameters: {} > {}({}) = {}({}) + {}({})'.format(
            L.count_params(l_out_step), self.num_params_all,
            len(self.params_all), self.num_params_cnn, len(self.params_cnn),
            self.num_params_gru, len(self.params_gru))

        self.cnn = l_cnn
        self.gru = l_gru
        self.gru_step = l_out_step
        self.gru_batch = l_out_batch

        self.batch_history_shape = (-1, self.history, self.channels,
                                    self.height, self.width)
        self.batch_flatten_shape = (-1, self.channels, self.height, self.width)

        self.t_action = T.ivector('action')
        self.t_state = T.tensor4('state')
        self.t_attention = T.matrix('attention')
        self.t_hidden1 = T.matrix('hidden1')
        self.t_hidden2 = T.matrix('hidden2')

        step_hidden2, step_output = L.get_output(
            [l_gru_step, l_out_step], {
                l_action: self.t_action,
                l_input: self.t_state,
                l_attention: self.t_attention,
                l_hidden1: self.t_hidden1,
                l_hidden2: self.t_hidden2
            },
            deterministic=True)
        step_hidden1 = l_gru.hidden1
        step_attention = l_gru.attention

        print 'Compile policy one step output'
        self._output_step = theano.function([
            self.t_action, self.t_state, self.t_attention, self.t_hidden1,
            self.t_hidden2
        ], [step_attention, step_hidden1, step_hidden2, step_output])
Ejemplo n.º 20
0
def main(config=None,
         init_path='',
         out_path='',
         batchsize=128,
         dataset='C10',
         pre=False,
         stoch_depth=False,
         n=5,
         stype='A',
         bottleneck=False,
         dim_inc_meth='1x1'):
    """Train a resnet on the CIFAR-10 data set.

    Parameters
    ----------
    config : list of dictionaries or ``None`` (``None``)
        The configuration for the training.
    init_model : string (``''``)
        The path (prefix) to the initial model parameters, updates and
        journal files. This is for continuing a previous training.
    out_path : string (``''``)
        The path (prefix) for the output file. This function will save
        the trained model, a journal with training statistics the updates
        for the optimizer and the create data set (this is needed to
        continue the training).
    batch_size : integer (``128``)
        The batch size for training.
    dataset : ``'C10'``, ``'C100'`` or ``'SVHN'`` (``'C10'``)
        The data set to use for training. The options are: CIFAR-10,
        CIFAR-100 and the "Street View House Number" data sets.
    pre : boolean (``False``)
        If ``True`` use the pre-action order.
    stoch_depth: boolean (``False``)
        If ``True`` use the stochastic depth approach, with a linear
        decay and $p_L = 0..5$.
    n : integer (``5``)
        The parameter 'n' from the paper.
    stype : ``'A'``, ``'B'`` or ``'C'`` (``'A'``)
        The type of shortcut.
    bottleneck : boolean (``False``)
        Use bottleneck approach with 3 layers per stack.
    dim_inc : ``'1x1'``, ``'2x2'``, ``'max'``, ``'sum'`` or ``'avg'``
        The method to deal with the increase in dimensions. '1x1' will
        perform a 1x1 convolution and ignore 3/4 of the input. '2x2'
        will perform a 2x2 convolution. This will add some parameters,
        but won't ignore any inputs. 'max', 'sum' and 'avg' will perform
        a 1x1 convolution with a 1x1 followed by the corresponding
        pooling operation, followed by a 1x1 convolution. This will not
        ignore any inputs nor add any parameters to the model.
        NOTE: This argument is ignored if the shortcut type is 'A'.
    """
    # network
    assert dataset in ('C10', 'C100', 'SVHN')
    classes = 100 if dataset == 'C100' else 10
    bases = (PreResNet, ResNet) if pre else (ResNet, )
    if stoch_depth:
        bases = (StochasticDepth, ) + bases
    model_cls = type('ModelClass', bases, {})
    model = model_cls.cifar_model(n=n,
                                  type=stype,
                                  bottleneck=bottleneck,
                                  dim_inc_meth=dim_inc_meth,
                                  classes=classes)
    # trainer
    if dataset == 'SVHN':
        trainer_cls = SVHN_SDTrainer
    else:
        trainer_cls = CIFAR_SDTrainer if stoch_depth else CIFAR_ResNetTrainer
    if init_path:
        trainer = trainer_cls.load_state(model, init_path, batchsize=batchsize)
    else:
        trainer = trainer_cls(model, batchsize=batchsize)
    # dataset
    if not trainer.dataset:
        if dataset == 'SVHN':
            raise NotImplementedError(
                'The SVHN dataset is not yet implemented.')
        elif dataset == 'C10':
            trainer.dataset = CIFAR10(testsplit=0.1)
        elif dataset == 'C100':
            trainer.dataset = CIFAR100(testsplit=0.1)

    # training the network
    print('Training model ({} parameters) ...'.format(
        count_params(model, trainable=True)))
    trainer.train(config)

    # save the network, the updates and the journal
    if not out_path:
        _, acc = trainer.validate()
        date = datetime.now().strftime('%Y-%m-%d_%H:%M')
        bn_str = 'bottleneck' if bottleneck else 'no_bottleneck'
        _type = 'A' if stype == 'A' else '{}_{}'.format(stype, dim_inc_meth)
        mdl_str = 'pre-resnet' if pre else 'resnet'
        if stoch_depth:
            mdl_str += '-sd'
        tmpl = '{}-{}__-__n_{}_-_{}_-_{}__-__acc_{:.2f}_{}'
        out_path = tmpl.format(mdl_str, dataset, n, _type, bn_str, acc * 100,
                               date)
    trainer.save_state(out_path, resume=True)
def train_setup():

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions...")
    print( " with input dimension {0},{1},{2}".format( config.image_height, \
                                                       config.image_width, \
                                                      config. image_channel ) )
    network = cnn_archi( input_var,  \
                         config.image_channel,\
                         config.image_height, config.image_width,\
                         config.output_length )

    print('Number of parameters : {0}'.format(count_params(network)))

    if (config.init_model is not None):
        with np.load(config.init_model) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]

        set_all_param_values(network, param_values)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    prediction = lasagne.layers.get_output(network)
    ent_loss = categorical_crossentropy(prediction, target_var)
    ent_loss = ent_loss.mean()

    l1_regu = config.l1_regu * regularize_network_params(network, l1)
    l2_regu = config.l2_regu * regularize_network_params(network, l2)

    loss = ent_loss + l1_regu + l2_regu
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = get_all_params(network, trainable=True)

    #grads = T.grad( loss, params )
    #scaled_grads = norm_constraint( grads, 5. )
    updates = nesterov_momentum(loss, params, \
                                learning_rate=config.learning_rate, \
                                momentum=config.momentum )
    #updates = rmsprop( loss , params, learning_rate = config.learning_rate )

    for param in get_all_params(network, regularizable=True):
        norm_axis = None
        if param.ndim == 1:
            norm_axis = [0]
        updates[param] = norm_constraint( updates[param], \
                                 5. * compute_norms( param.get_value() ).mean(),
                                 norm_axes = norm_axis  )

    #Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = get_output(network, deterministic=True)
    test_classes = T.argmax(test_prediction, axis=1)
    test_loss = categorical_crossentropy(test_prediction, target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.eq(test_classes, target_var)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var,target_var], \
                               ent_loss,\
                               updates=updates, \
                               allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], \
                             [test_loss, test_prediction, test_acc], \
                             allow_input_downcast=True )

    return network, train_fn, val_fn
Ejemplo n.º 22
0
 def _print_network(network):
     kwargs['logger'].info("\n")
     for layer in get_all_layers(network):
         kwargs['logger'].info(str(layer) +' : ' + str(layer.output_shape))
     kwargs['logger'].info("Total Parameters: " + str(count_params(layer)))
     kwargs['logger'].info("\n")
Ejemplo n.º 23
0
def main():

    # Where we'll save sample data to
    fname = sys.argv[0].split('.py')[0]
    curr_time = datetime.now().strftime('%d%H%M')
    save_dir = '../output/segmentation/images-' + fname + curr_time

    image_path = '../data/seg-tomato-leaf-stem-images.npz'
    label_path = '../data/seg-tomato-leaf-stem-labels.npz'
    #image_path = '../data/seg-tomato-images.npz'
    #label_path = '../data/seg-tomato-labels.npz'
    pretrained = '../data/vgg16.pkl'
    class_dict_path = '../data/segmentation-label-dict.json'

    num_epochs = 300
    lrate = 1e-3
    batch_size = 1
    seed = 1234
    crop_size = 700

    # theano symbolic tensors
    input_var = T.tensor4('x')
    target_var = T.itensor3('y')
    input_shape = (None, 3, None, None)

    # Load data.
    # x_train and x_valid contains the images;
    # y_train and y_test contains the labels for the corresponding images
    # Number of validation/test samples is hard coded to 10 samples.
    # TO DO: change this to a percentage of the total size of the data
    X_train, y_train, X_valid, y_valid = load_data(image_path, label_path,
                                                   seed, 10)
    unique_classes = np.unique(y_train)
    print "class: ", unique_classes

    # Slightly increase importance of segmenting the tomato border class. Note
    # that we can do this for all other classes as well by changing the index.
    with open(class_dict_path) as data_file:
        class_labels = json.load(data_file)
    print "class labels:", class_labels

    class_weights = np.ones(len(class_labels))
    class_weights[class_labels['tomato border']] *= 1.05
    class_weights[class_labels['leaf border']] *= 1.05
    class_weights[class_labels['stem border']] *= 1.05

    # Compute class weights to balance dataset. We first find the value to get
    # an equal contribution from each class, then transform weights to [0, 1]
    # First get the frequency of each class in the training set
    print "bgd count: ", [np.sum(y_train == 5)]
    counts = [np.sum(y_train == class_) for class_ in unique_classes]
    print "counts: ", counts
    for class_name in class_labels.keys():
        print "class: ", class_labels[
            class_name], "class name: ", class_name, " freq: ", counts[
                class_labels[class_name]]
    counts = np.asarray(counts).astype(theano.config.floatX)
    # The factor used to adjust the weights for each freq in counts is (minimum_freq in counts)/freq
    counts = np.min(counts) * (1. / counts)

    # Since 'stem' is the least frequent class in the training set
    # class_weights[class_labels['stem']] *= 1.0
    counts = counts * class_weights
    counts = T.as_tensor_variable(counts)

    print 'Building model'
    softmax, network, network_crf, vgg_layers = \
        build_network(input_var, len(unique_classes))
    print 'Number of parameters: ', nn.count_params(softmax)

    # If training, initialize weights with ImageNet pretrained weights.
    # otherwise, we can load full network weights from file and set all
    # by commenting this and uncommenting subsequent lines of code
    param_values = pickle.load(file(pretrained, mode='r'))['param values']
    nn.set_all_param_values(vgg_layers, param_values[:13 * 2])

    #with np.load('../data/trained-weights.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    #lasagne.layers.set_all_param_values(softmax, param_values)

    # When building the loss, we'll weight class loss by frequency due to
    # consistency of labels and their overall desireability
    output = nn.get_output(softmax, deterministic=False)
    loss = categorical_crossentropy(output, target_var.flatten())
    loss = loss * counts[target_var.flatten()]
    loss = T.mean(loss)

    # When training, we only want to update the newly added layers
    pretrained_layers = nn.get_all_layers(vgg_layers)
    layers = nn.get_all_layers(softmax, treat_as_input=pretrained_layers)
    params = [l.get_params(trainable=True)
              for l in layers]  #[[W,b], [W,b] ... ]
    trainable_params = [p for a in params for p in a]

    updates = lasagne.updates.adamax(loss, trainable_params, lrate)
    #updates = lasagne.updates.rmsprop(loss, trainable_params, lrate)

    # Take the most likely class and compare it to the provided labels
    train_acc = T.mean(T.eq(T.argmax(output, axis=1), target_var.flatten()))

    train_fn = theano.function([input_var, target_var], [loss, train_acc],
                               updates=updates,
                               allow_input_downcast=True)

    # Validation function
    output, preds = nn.get_output([softmax, network_crf], deterministic=True)
    loss = categorical_crossentropy(output, target_var.flatten())
    loss = loss * counts[target_var.flatten()]
    loss = T.mean(loss)

    test_acc = T.mean(T.eq(T.argmax(output, axis=1), target_var.flatten()))

    valid_fn = theano.function([input_var, target_var],
                               [loss, test_acc, preds],
                               allow_input_downcast=True)

    # Early stopping
    best_params = None
    count, best_err = 0, np.inf

    # Train the network: iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_acc = 0
        train_batches = 0

        start_time = time.time()
        for inputs, targets in iterate_minibatches(X_train,
                                                   y_train,
                                                   batch_size,
                                                   shuffle=True):

            inputs, targets = random_crop(inputs, targets, X_train.shape[2],
                                          X_train.shape[3])

            err, acc = train_fn(inputs, targets)
            train_err += err
            train_acc += acc
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        valid_iou = np.zeros((len(unique_classes), ))
        valid_found = np.zeros((len(unique_classes), ))
        val_preds, val_inputs, val_targets = [], [], []
        for inputs, targets in iterate_minibatches(X_valid,
                                                   y_valid,
                                                   batch_size,
                                                   shuffle=False):

            err, acc, preds = valid_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

            val_preds.append(preds)
            val_inputs.append(inputs)
            val_targets.append(targets)

            iou, found = meanIOU(preds, targets, len(unique_classes))
            valid_iou += iou
            valid_found += found

        if epoch % 3 == 0:
            val_preds = np.vstack(val_preds)
            val_inputs = np.vstack(val_inputs) / 255.0
            val_targets = np.vstack(val_targets)
            plot_segmentations(val_inputs, val_preds, val_targets, epoch,
                               save_dir)
            '''
            output_dir = 'output'
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            np.savez(os.path.join(output_dir, 'predictions.npz'), val_preds)
            np.savez(os.path.join(output_dir, 'targets.npz'), val_targets)
            np.savez(os.path.join(output_dir, 'rgb.npz'), val_inputs)
            '''

            confusion(val_preds, val_targets)

        # Then we print the results for this epoch:
        print "Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time)
        print "  training loss:\t\t{:.6f}".format(train_err / train_batches)
        print "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
        print "  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100)
        print "  validation IOU:\t\t{}".format(valid_iou / valid_found)

        # Early stopping
        val_err = val_err / float(val_batches)
        if val_err > best_err * 0.99:
            count += 1
        else:
            count = 0
            best_err = val_err
            best_params = nn.get_all_param_values(softmax)
        if count >= 6:
            nn.set_all_param_values(softmax, best_params)
            break

    # And a full pass over the validation data:
    val_preds, val_inputs, val_targets = [], [], []
    for batch in iterate_minibatches(X_valid,
                                     y_valid,
                                     batch_size,
                                     shuffle=False):
        inputs, targets = batch

        err, acc, preds = valid_fn(inputs, targets)
        val_preds.append(preds)
        val_inputs.append(inputs)
        val_targets.append(targets)

    val_preds = np.vstack(val_preds)
    val_inputs = np.vstack(val_inputs) / 255.0
    val_targets = np.vstack(val_targets)

    print 'Final confusion matrix: '
    confusion(val_preds, val_targets)

    plot_segmentations(val_inputs, val_preds, val_targets, 'final', save_dir)

    np.savez('../data/trained-weights.npz',
             *lasagne.layers.get_all_param_values(softmax))
Ejemplo n.º 24
0
def main():

    # Where we'll save data to
    fname = sys.argv[0].split('.py')[0]
    curr_time = datetime.now().strftime('%d%H%M')
    save_dir = 'sample-' + fname + curr_time

    lrate = 5e-4
    batch_size = 1
    num_epochs = 100
    crop_size = 360
    input_var = T.tensor4('x')
    target_var = T.itensor4('y')

    images = np.load('images.npz')['arr_0'].astype(
        theano.config.floatX) / 255.0
    labels = np.load('labels.npz')['arr_0'].astype(np.int32)

    num_classes = labels.shape[1]

    idx = np.arange(num_classes)
    idx = idx.reshape(1, num_classes, 1, 1)
    labels = labels / 255
    labels = labels.astype(np.int32) * idx
    labels = np.sum(labels, axis=1, keepdims=True)

    np.random.seed(1234)
    idx = np.arange(images.shape[0])
    np.random.shuffle(idx)
    X_train = images[idx[:-10]]
    y_train = labels[idx[:-10]]
    X_valid = images[idx[-10:]]
    y_valid = labels[idx[-10:]]

    # Compute class weights to balance dataset
    counts = []
    for cl in xrange(num_classes):
        class_counts = 0
        for img in y_train:
            class_counts += np.sum(img == cl)
        counts.append(class_counts)
    counts = np.array(counts).astype(theano.config.floatX)

    # We can either upscale the loss (i.e. multiply by a factor > 1), or
    # downscale the loss (multiply by a factor < 1). Here we do the latter
    counts = np.max(counts) / counts
    counts = counts / np.max(counts)
    counts[0] = counts[0] * 1.1  # stem
    counts[1] = counts[1] * 1.1  # tomato
    counts = T.as_tensor_variable(counts)

    # Build DenseNetwork
    input_shape = (None, 3, crop_size, crop_size)
    softmax, network = build_network(input_var, input_shape, num_classes)

    print 'Number of paramters: ', nn.count_params(network)

    preds = nn.get_output(softmax, deterministic=False)
    loss = lasagne.objectives.categorical_crossentropy(preds,
                                                       target_var.flatten())
    loss = loss * counts[target_var.flatten()]
    loss = T.mean(loss) + regularize_network_params(softmax, l2) * 0.0001

    acc = T.mean(T.eq(T.argmax(preds, axis=1), target_var.flatten()))

    params = nn.get_all_params(softmax, trainable=True)
    updates = lasagne.updates.adam(loss, params, lrate)
    train_fn = theano.function([input_var, target_var], [loss, acc],
                               updates=updates,
                               allow_input_downcast=True)

    probs, preds = nn.get_output([softmax, network], deterministic=True)
    loss = lasagne.objectives.categorical_crossentropy(probs,
                                                       target_var.flatten())
    loss = loss * counts[target_var.flatten()]
    loss = T.mean(loss) + regularize_network_params(softmax, l2) * 0.0001

    acc = T.mean(T.eq(T.argmax(probs, axis=1), target_var.flatten()))

    valid_fn = theano.function([input_var, target_var], [loss, acc, preds],
                               allow_input_downcast=True)

    # We iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_acc = 0
        train_batches = 0

        start_time = time.time()
        for batch in iterate_minibatches(X_train,
                                         y_train,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch

            inputs, targets = random_crop(inputs, targets, crop_size,
                                          crop_size)

            err, acc = train_fn(inputs, targets)
            train_err += err
            train_acc += acc
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        valid_iou = np.zeros((num_classes, ))
        val_preds, val_inputs, val_targets = [], [], []
        for batch in iterate_minibatches(X_valid,
                                         y_valid,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch

            input_crop, target_crop = random_crop(inputs, targets, crop_size,
                                                  crop_size)

            err, acc, preds = valid_fn(input_crop, target_crop)
            val_err += err
            val_acc += acc
            val_batches += 1

            val_preds.append(preds)
            val_inputs.append(input_crop)
            val_targets.append(target_crop)

            valid_iou += meanIOU(preds, target_crop, num_classes)

        if epoch % 2 == 0:
            val_preds = np.vstack(val_preds)
            val_inputs = np.vstack(val_inputs)
            val_targets = np.vstack(val_targets)
            plot_predictions(val_inputs, val_preds, val_targets, epoch,
                             save_dir)

        # Then we print the results for this epoch:
        print "Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time)
        print "  training loss:\t\t{:.6f}".format(train_err / train_batches)
        print "  validation loss:\t\t{:.6f}".format(val_err / val_batches)
        print "  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100)
        print "  validation IOU:\t\t{}".format(valid_iou / val_batches)
Ejemplo n.º 25
0
def main(options):
    print 'Build and compile network'
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')
    target_data = T.imatrix('target_data')
    target_mask = T.fmatrix('target_mask')

    skip_scale = theano.shared(convert_to_floatX(options['skip_scale']))

    network, rand_layer_list = build_network(
        input_data=input_data,
        input_mask=input_mask,
        num_inputs=options['num_inputs'],
        num_units_list=options['num_units_list'],
        num_outputs=options['num_outputs'],
        skip_scale=skip_scale,
        dropout_ratio=options['dropout_ratio'],
        weight_noise=options['weight_noise'],
        use_layer_norm=options['use_layer_norm'],
        peepholes=options['peepholes'],
        learn_init=options['learn_init'],
        grad_clipping=options['grad_clipping'],
        gradient_steps=options['gradient_steps'],
        use_projection=options['use_projection'])

    network_params = get_all_params(network, trainable=True)

    print("number of parameters in model: %d" %
          count_params(network, trainable=True))

    if options['reload_model']:
        print('Loading Parameters...')
        pretrain_network_params_val, pretrain_update_params_val, pretrain_total_batch_cnt = pickle.load(
            open(options['reload_model'], 'rb'))

        print('Applying Parameters...')
        set_model_param_value(network_params, pretrain_network_params_val)
    else:
        pretrain_update_params_val = None
        pretrain_total_batch_cnt = 0

    print 'Build network trainer'
    training_fn, trainer_params = set_network_trainer(
        input_data=input_data,
        input_mask=input_mask,
        target_data=target_data,
        target_mask=target_mask,
        num_outputs=options['num_outputs'],
        network=network,
        rand_layer_list=rand_layer_list,
        updater=options['updater'],
        learning_rate=options['lr'],
        grad_max_norm=options['grad_norm'],
        l2_lambda=options['l2_lambda'],
        load_updater_params=pretrain_update_params_val)

    print 'Build network predictor'
    predict_fn = set_network_predictor(input_data=input_data,
                                       input_mask=input_mask,
                                       target_data=target_data,
                                       target_mask=target_mask,
                                       num_outputs=options['num_outputs'],
                                       network=network)

    print 'Load data stream'
    train_datastream = get_datastream(path=options['data_path'],
                                      norm_path=options['norm_data_path'],
                                      which_set='train_si84',
                                      batch_size=options['batch_size'])

    print 'Start training'
    if os.path.exists(options['save_path'] + '_eval_history.npz'):
        evaluation_history = numpy.load(
            options['save_path'] +
            '_eval_history.npz')['eval_history'].tolist()
    else:
        evaluation_history = [[[10.0, 10.0, 1.0], [10.0, 10.0, 1.0]]]
    early_stop_flag = False
    early_stop_cnt = 0
    total_batch_cnt = 0

    try:
        # for each epoch
        for e_idx in range(options['num_epochs']):
            # for each batch
            for b_idx, data in enumerate(
                    train_datastream.get_epoch_iterator()):
                total_batch_cnt += 1
                if pretrain_total_batch_cnt >= total_batch_cnt:
                    continue

                # get input, target data
                input_data = data[0].astype(floatX)
                input_mask = data[1].astype(floatX)

                # get target data
                target_data = data[2]
                target_mask = data[3].astype(floatX)

                # get output
                train_output = training_fn(input_data, input_mask, target_data,
                                           target_mask)
                train_predict_cost = train_output[0]
                network_grads_norm = train_output[1]
                skip_means = train_output[2:]

                # show intermediate result
                if total_batch_cnt % options[
                        'train_disp_freq'] == 0 and total_batch_cnt != 0:
                    # pdb.set_trace()
                    best_idx = numpy.asarray(evaluation_history)[:, 1,
                                                                 2].argmin()
                    print '============================================================================================'
                    print 'Model Name: ', options['save_path'].split('/')[-1]
                    print '============================================================================================'
                    print 'Epoch: ', str(e_idx), ', Update: ', str(
                        total_batch_cnt)
                    print '--------------------------------------------------------------------------------------------'
                    print 'Prediction Cost: ', str(train_predict_cost)
                    print 'Gradient Norm: ', str(network_grads_norm)
                    print '--------------------------------------------------------------------------------------------'
                    print 'Skip Ratio: ', skip_means
                    print 'Skip Scale: ', str(skip_scale.get_value())
                    print '--------------------------------------------------------------------------------------------'
                    print 'Train NLL: ', str(
                        evaluation_history[-1][0][0]), ', BPC: ', str(
                            evaluation_history[-1][0][1]), ', FER: ', str(
                                evaluation_history[-1][0][2])
                    print 'Valid NLL: ', str(
                        evaluation_history[-1][1][0]), ', BPC: ', str(
                            evaluation_history[-1][1][1]), ', FER: ', str(
                                evaluation_history[-1][1][2])
                    print '--------------------------------------------------------------------------------------------'
                    print 'Best NLL: ', str(
                        evaluation_history[best_idx][1][0]), ', BPC: ', str(
                            evaluation_history[best_idx][1]
                            [1]), ', FER: ', str(
                                evaluation_history[best_idx][1][2])

                # evaluation
                if total_batch_cnt % options[
                        'train_eval_freq'] == 0 and total_batch_cnt != 0:
                    train_eval_datastream = get_datastream(
                        path=options['data_path'],
                        norm_path=options['norm_data_path'],
                        which_set='train_si84',
                        batch_size=options['eval_batch_size'])
                    valid_eval_datastream = get_datastream(
                        path=options['data_path'],
                        norm_path=options['norm_data_path'],
                        which_set='test_dev93',
                        batch_size=options['eval_batch_size'])
                    train_nll, train_bpc, train_fer = network_evaluation(
                        predict_fn, train_eval_datastream)
                    valid_nll, valid_bpc, valid_fer = network_evaluation(
                        predict_fn, valid_eval_datastream)

                    # check over-fitting
                    if valid_fer > numpy.asarray(evaluation_history)[:, 1,
                                                                     2].min():
                        early_stop_cnt += 1.
                    else:
                        early_stop_cnt = 0.
                        best_network_params_vals = get_model_param_values(
                            network_params)
                        pickle.dump(
                            best_network_params_vals,
                            open(options['save_path'] + '_best_model.pkl',
                                 'wb'))

                    if early_stop_cnt > 10:
                        early_stop_flag = True
                        break

                    # save results
                    evaluation_history.append(
                        [[train_nll, train_bpc, train_fer],
                         [valid_nll, valid_bpc, valid_fer]])
                    numpy.savez(options['save_path'] + '_eval_history',
                                eval_history=evaluation_history)

                # save network
                if total_batch_cnt % options[
                        'train_save_freq'] == 0 and total_batch_cnt != 0:
                    cur_network_params_val = get_model_param_values(
                        network_params)
                    cur_trainer_params_val = get_update_params_values(
                        trainer_params)
                    cur_total_batch_cnt = total_batch_cnt
                    pickle.dump([
                        cur_network_params_val, cur_trainer_params_val,
                        cur_total_batch_cnt
                    ], open(options['save_path'] + '_last_model.pkl', 'wb'))

                if total_batch_cnt % 1000 == 0 and total_batch_cnt != 0:
                    skip_scale.set_value(
                        convert_to_floatX(skip_scale.get_value() * 1.01))

            if early_stop_flag:
                break

    except KeyboardInterrupt:
        print 'Training Interrupted'
        cur_network_params_val = get_model_param_values(network_params)
        cur_trainer_params_val = get_update_params_values(trainer_params)
        cur_total_batch_cnt = total_batch_cnt
        pickle.dump([
            cur_network_params_val, cur_trainer_params_val, cur_total_batch_cnt
        ], open(options['save_path'] + '_last_model.pkl', 'wb'))
Ejemplo n.º 26
0
def build_resnet_model():

    log.i('BUILDING RESNET MODEL...')

    # Random Seed
    lasagne_random.set_rng(cfg.getRandomState())

    # Input layer for images
    net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0]))

    # First Convolution
    net = l.Conv2DLayer(net,
                        num_filters=cfg.FILTERS[0],
                        filter_size=cfg.KERNEL_SIZES[0],
                        pad='same',
                        W=initialization(cfg.NONLINEARITY),
                        nonlinearity=None)

    log.i(("\tFIRST CONV OUT SHAPE:", l.get_output_shape(net), "LAYER:",
           len(l.get_all_layers(net)) - 1))

    # Residual Stacks
    for i in range(0, len(cfg.FILTERS)):
        net = resblock(net,
                       filters=cfg.FILTERS[i] * cfg.RESNET_K,
                       kernel_size=cfg.KERNEL_SIZES[i],
                       stride=2,
                       num_groups=cfg.NUM_OF_GROUPS[i])
        for _ in range(1, cfg.RESNET_N):
            net = resblock(net,
                           filters=cfg.FILTERS[i] * cfg.RESNET_K,
                           kernel_size=cfg.KERNEL_SIZES[i],
                           num_groups=cfg.NUM_OF_GROUPS[i],
                           preactivated=False)
        log.i(("\tRES STACK", i + 1, "OUT SHAPE:", l.get_output_shape(net),
               "LAYER:", len(l.get_all_layers(net)) - 1))

    # Post Activation
    net = batch_norm(net)
    net = l.NonlinearityLayer(net, nonlinearity=nonlinearity(cfg.NONLINEARITY))

    # Pooling
    net = l.GlobalPoolLayer(net)
    log.i(("\tFINAL POOLING SHAPE:", l.get_output_shape(net), "LAYER:",
           len(l.get_all_layers(net)) - 1))

    # Classification Layer
    net = l.DenseLayer(net,
                       len(cfg.CLASSES),
                       nonlinearity=nonlinearity('identity'),
                       W=initialization('identity'))
    net = l.NonlinearityLayer(net, nonlinearity=nonlinearity('softmax'))

    log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net), "LAYER:",
           len(l.get_all_layers(net))))
    log.i("...DONE!")

    # Model stats
    log.i(("MODEL HAS",
           (sum(hasattr(layer, 'W')
                for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"))
    log.i(("MODEL HAS", l.count_params(net), "PARAMS"))

    return net
Ejemplo n.º 27
0
def build_vgg16(input_var=None, preload_vgg=False):
    # VGG-16, 16-layer model from the paper:
    # "Very Deep Convolutional Networks for Large-Scale Image Recognition"

    net = {}
    net['input'] = InputLayer((None, 3, 224, 224), input_var=input_var)
    net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False)
    net['conv1_2'] = ConvLayer(net['conv1_1'],
                               64,
                               3,
                               pad=1,
                               flip_filters=False)
    net['pool1'] = PoolLayer(net['conv1_2'], 2)
    net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False)
    net['conv2_2'] = ConvLayer(net['conv2_1'],
                               128,
                               3,
                               pad=1,
                               flip_filters=False)
    net['pool2'] = PoolLayer(net['conv2_2'], 2)
    net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False)
    net['conv3_2'] = ConvLayer(net['conv3_1'],
                               256,
                               3,
                               pad=1,
                               flip_filters=False)
    net['conv3_3'] = ConvLayer(net['conv3_2'],
                               256,
                               3,
                               pad=1,
                               flip_filters=False)
    net['pool3'] = PoolLayer(net['conv3_3'], 2)
    net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False)
    net['conv4_2'] = ConvLayer(net['conv4_1'],
                               512,
                               3,
                               pad=1,
                               flip_filters=False)
    net['conv4_3'] = ConvLayer(net['conv4_2'],
                               512,
                               3,
                               pad=1,
                               flip_filters=False)
    net['pool4'] = PoolLayer(net['conv4_3'], 2)
    net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False)
    net['conv5_2'] = ConvLayer(net['conv5_1'],
                               512,
                               3,
                               pad=1,
                               flip_filters=False)
    net['conv5_3'] = ConvLayer(net['conv5_2'],
                               512,
                               3,
                               pad=1,
                               flip_filters=False)
    net['pool5'] = PoolLayer(net['conv5_3'], 2)
    net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
    net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)
    net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096)
    net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5)
    net['fc8'] = DenseLayer(net['fc7_dropout'],
                            num_units=101,
                            nonlinearity=None)
    net['prob'] = NonlinearityLayer(net['fc8'], softmax)

    if preload_vgg is True:
        # preload vgg-166 weights
        with open('vgg16.pkl', 'rb') as f:
            params = pickle.load(f)

        set_all_param_values(net['fc7_dropout'], params['param values'][:-2])

    print("VGG-16 has {} parameters".format(count_params(net['prob'])))
    return net
Ejemplo n.º 28
0
def build_baseline_model():

    log.i('BUILDING BASELINE MODEL...')

    # Random Seed
    lasagne_random.set_rng(cfg.getRandomState())

    # Input layer for images
    net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0]))

    # Stride size (as an alternative to max pooling)
    if cfg.MAX_POOLING:
        s = 1
    else:
        s = 2

    # Convolutinal layer groups
    for i in range(len(cfg.FILTERS)):

        # 3x3 Convolution + Stride
        net = batch_norm(
            l.Conv2DLayer(net,
                          num_filters=cfg.FILTERS[i],
                          filter_size=cfg.KERNEL_SIZES[i],
                          num_groups=cfg.NUM_OF_GROUPS[i],
                          pad='same',
                          stride=s,
                          W=initialization(cfg.NONLINEARITY),
                          nonlinearity=nonlinearity(cfg.NONLINEARITY)))

        # Pooling layer
        if cfg.MAX_POOLING:
            net = l.MaxPool2DLayer(net, pool_size=2)

        # Dropout Layer (we support different types of dropout)
        if cfg.DROPOUT_TYPE == 'channels' and cfg.DROPOUT > 0.0:
            net = l.dropout_channels(net, p=cfg.DROPOUT)
        elif cfg.DROPOUT_TYPE == 'location' and cfg.DROPOUT > 0.0:
            net = l.dropout_location(net, p=cfg.DROPOUT)
        elif cfg.DROPOUT > 0.0:
            net = l.DropoutLayer(net, p=cfg.DROPOUT)

        log.i(('\tGROUP', i + 1, 'OUT SHAPE:', l.get_output_shape(net)))

    # Final 1x1 Convolution
    net = batch_norm(
        l.Conv2DLayer(net,
                      num_filters=cfg.FILTERS[i] * 2,
                      filter_size=1,
                      W=initialization('identity'),
                      nonlinearity=nonlinearity('identity')))

    log.i(('\tFINAL CONV OUT SHAPE:', l.get_output_shape(net)))

    # Global Pooling layer (default mode = average)
    net = l.GlobalPoolLayer(net)
    log.i(("\tFINAL POOLING SHAPE:", l.get_output_shape(net)))

    # Classification Layer (Softmax)
    net = l.DenseLayer(net,
                       len(cfg.CLASSES),
                       nonlinearity=nonlinearity('softmax'),
                       W=initialization('softmax'))

    log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net)))
    log.i("...DONE!")

    # Model stats
    log.i(("MODEL HAS",
           (sum(hasattr(layer, 'W')
                for layer in l.get_all_layers(net))), "WEIGHTED LAYERS"))
    log.i(("MODEL HAS", l.count_params(net), "PARAMS"))

    return net
Ejemplo n.º 29
0
def build_WideResNet(input_var, n=3, k=2):
    '''
    Adapted from https://github.com/Lasagne/Recipes/tree/master/papers/deep_residual_learning.
    Tweaked to be consistent with 'Identity Mappings in Deep Residual Networks', Kaiming He et al. 2016 (https://arxiv.org/abs/1603.05027)
    And 'Wide Residual Networks', Sergey Zagoruyko, Nikos Komodakis 2016 (http://arxiv.org/pdf/1605.07146v1.pdf)
    '''

    n_filters = {0: 16, 1: 16 * k, 2: 32 * k, 3: 64 * k}

    # create a residual learning building block with two stacked 3x3 convlayers and dropout
    def residual_block(l, increase_dim=False, first=False, filters=16):
        if increase_dim:
            first_stride = (2, 2)
        else:
            first_stride = (1, 1)

        if first:
            # hacky solution to keep layers correct
            bn_pre_relu = l
        else:
            # contains the BN -> ReLU portion, steps 1 to 2
            bn_pre_conv = BatchNormLayer(l)
            bn_pre_relu = NonlinearityLayer(bn_pre_conv, rectify)

        # contains the weight -> BN -> ReLU portion, steps 3 to 5
        conv_1 = batch_norm(
            ConvLayer(bn_pre_relu,
                      num_filters=filters,
                      filter_size=(3, 3),
                      stride=first_stride,
                      nonlinearity=rectify,
                      pad='same',
                      W=HeNormal(gain='relu')))

        dropout = DropoutLayer(conv_1, p=0.3)

        # contains the last weight portion, step 6
        conv_2 = ConvLayer(dropout,
                           num_filters=filters,
                           filter_size=(3, 3),
                           stride=(1, 1),
                           nonlinearity=None,
                           pad='same',
                           W=HeNormal(gain='relu'))

        # add shortcut connections
        if increase_dim:
            # projection shortcut, as option B in paper
            projection = ConvLayer(l,
                                   num_filters=filters,
                                   filter_size=(1, 1),
                                   stride=(2, 2),
                                   nonlinearity=None,
                                   pad='same',
                                   b=None)
            block = ElemwiseSumLayer([conv_2, projection])
        elif first:
            # projection shortcut, as option B in paper
            projection = ConvLayer(l,
                                   num_filters=filters,
                                   filter_size=(1, 1),
                                   stride=(1, 1),
                                   nonlinearity=None,
                                   pad='same',
                                   b=None)
            block = ElemwiseSumLayer([conv_2, projection])
        else:
            block = ElemwiseSumLayer([conv_2, l])

        return block

    # Building the network
    l_in = InputLayer(shape=(None, 3, 64, 64), input_var=input_var)

    # first layer=
    l = batch_norm(
        ConvLayer(l_in,
                  num_filters=n_filters[0],
                  filter_size=(3, 3),
                  stride=(1, 1),
                  nonlinearity=rectify,
                  pad='same',
                  W=HeNormal(gain='relu')))

    # first stack of residual blocks
    l = residual_block(l, first=True, filters=n_filters[1])
    for _ in range(1, n):
        l = residual_block(l, filters=n_filters[1])

    # second stack of residual blocks
    l = residual_block(l, increase_dim=True, filters=n_filters[2])
    for _ in range(1, n):
        l = residual_block(l, filters=n_filters[2])

    # third stack of residual blocks
    l = residual_block(l, increase_dim=True, filters=n_filters[3])
    for _ in range(1, n):
        l = residual_block(l, filters=n_filters[3])

    bn_post_conv = BatchNormLayer(l)
    bn_post_relu = NonlinearityLayer(bn_post_conv, rectify)

    # average pooling
    avg_pool = GlobalPoolLayer(bn_post_relu)

    # fully connected layer
    network = DenseLayer(avg_pool,
                         num_units=101,
                         W=HeNormal(),
                         nonlinearity=softmax)

    print("WideResNet has {} parameters".format(count_params(network)))
    return network
Ejemplo n.º 30
0
    input_mask = T.fmatrix('input_mask')
    target_data = T.imatrix('target_data')
    target_mask = T.fmatrix('target_mask')
    network_output = deep_projection_ln_lstm_model_fix(
        input_var=input_data,
        mask_var=input_mask,
        num_inputs=input_dim,
        num_outputs=output_dim,
        num_layers=args.num_layers,
        num_units=args.num_units,
        grad_clipping=args.grad_clipping,
        dropout=args.dropout)

    network = network_output
    network_params = get_all_params(network, trainable=True)
    param_count = count_params(network, trainable=True)
    print('Number of parameters of the network: {:.2f}M'.format(
        float(param_count) / 1000000))

    ######################
    # reload model param #
    ######################
    if args.reload_model:
        print('Loading model: {}'.format(args.reload_model))
        with open(args.reload_model, 'rb') as f:
            [
                pretrain_network_params_val, pretrain_update_params_val,
                pretrain_total_batch_cnt
            ] = pickle.load(f)
        set_model_param_value(network_params, pretrain_network_params_val)
    else:
Ejemplo n.º 31
0
                         logging=logging)

    all_g_layers = ll.get_all_layers(gan_model.g_layers.values())
    all_d_layers = ll.get_all_layers(gan_model.d_layers.values())

    glayer2name = defaultdict(str)
    dlayer2name = defaultdict(str)
    logging.info('~~~~~~~~~~~~~ G model ~~~~~~~~~~~~~~~~~~~~~~')
    glayer2name.update({v: k for k, v in gan_model.g_layers.iteritems()})
    logging.info(get_network_str(all_g_layers, get_network=False,
                                 incomings=True, outgoings=True,
                                 layer2name=glayer2name))

    dlayer2name.update(({v: k for k, v in gan_model.d_layers.iteritems()}))
    logging.info('G total trainable params: %g' %
                 (ll.count_params(gan_model.l_out_g, trainable=True)))
    logging.info('~~~~~~~~~~~~~ D model ~~~~~~~~~~~~~~~~~~~~~~')
    logging.info(get_network_str(all_d_layers, get_network=False,
                                 incomings=True, outgoings=True,
                                 layer2name=dlayer2name))

    logging.info('D total trainable params: %g' %
                 (ll.count_params(gan_model.l_out_d, trainable=True)))

    ##############################################################

    gan_model.build_funcs()
    data_itr = batch(all_data, batch_size)

    tic = None
    hist = defaultdict(list)