Beispiel #1
0
def train(
        dim_out=500,  # hidden layer dim for outputs
        ctx_dim=1024,  # context vector dimensionality
        dim=1024,  # the number of LSTM units
        n_actions=3101,  # number of actions to predict
        n_layers_att=1,
        n_layers_out=1,
        n_layers_init=1,
        ctx2out=False,
        patience=50,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        temperature_inverse=1.0,
        lrate=0.001,
        selector=False,
        maxlen=5,  # maximum length of the video
        optimizer='sgd',
        batch_size=16,
        valid_batch_size=16,
        saveto='model.npz',
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        dataset='flickr8k',  # dummy dataset, replace with video ones
        dictionary=None,  # word dictionary
        use_dropout=False,
        reload_=False,
        training_stride=1,
        testing_stride=8,
        last_n=16,
        fps=30):

    # Model options
    model_options = locals().copy()
    #model_options = validate_options(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print "Reloading options"
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    print '-----'
    print 'Booting up all data handlers'
    data_pb = TrainProto(batch_size, maxlen, training_stride, dataset, fps)
    dh = DataHandler(data_pb)
    dataset_size = dh.GetDatasetSize()
    num_train_batches = dataset_size / batch_size
    if dataset_size % batch_size != 0:
        num_train_batches += 1

    valid = True  # not None
    test = True  # not None

    data_test_train_pb = TestTrainProto(valid_batch_size, maxlen,
                                        testing_stride, dataset, fps)
    dh_test_train = DataHandler(data_test_train_pb)
    test_train_dataset_size = dh_test_train.GetDatasetSize()
    num_test_train_batches = test_train_dataset_size / valid_batch_size
    if test_train_dataset_size % valid_batch_size != 0:
        num_test_train_batches += 1

    data_test_valid_pb = TestValidProto(valid_batch_size, maxlen,
                                        testing_stride, dataset, fps)
    dh_test_valid = DataHandler(data_test_valid_pb)
    test_valid_dataset_size = dh_test_valid.GetDatasetSize()
    num_test_valid_batches = test_valid_dataset_size / valid_batch_size
    if test_valid_dataset_size % valid_batch_size != 0:
        num_test_valid_batches += 1

    data_test_test_pb = TestTestProto(valid_batch_size, maxlen, testing_stride,
                                      dataset, fps)
    dh_test_test = DataHandler(data_test_test_pb)
    test_test_dataset_size = dh_test_test.GetDatasetSize()
    num_test_test_batches = test_test_dataset_size / valid_batch_size
    if test_test_dataset_size % valid_batch_size != 0:
        num_test_test_batches += 1
    print 'Data handlers ready'
    print '-----'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        print "Reloading model"
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, use_noise, \
          inps,\
          cost, \
          opts_out, preds, i_gate = \
          build_model(tparams, model_options)
    '''
    get_i_gate = theano.function(inps[0:2], i_gate, profile=False, on_unused_input='ignore')
    print 'build get_i_gate felished'

    x, vid, n_ex = dh_test_train.GetBatch(data_test_train_pb)
    mask = numpy.ones((maxlen, batch_size)).astype('float32')
    if n_ex != batch_size:
        mask[:,n_ex:] = numpy.zeros((maxlen, batch_size-n_ex)).astype('float32')

    i_gate_np = get_i_gate(x,mask)
    print len(i_gate_np)
    print len(i_gate_np[0])
    print len(i_gate_np[0][0])
    print i_gate_np[0][0][0].shape
    weig = numpy.zeros((7,7,30,batch_size))
    for i in xrange(7):
        for j in xrange(7):
            for k in xrange(30):
                weig[i,j,k,:] = numpy.mean(i_gate_np[k][j][i],axis=1)
    dic = {'weig':weig, 'vid':vid}
    sio.savemat('weig.mat', {'dic':dic})

    train_err = 0
    valid_err = 0
    test_err = 0

    '''

    # before any regularizer
    f_log_probs = theano.function(inps, -cost, profile=False)
    f_preds = theano.function(inps,
                              preds,
                              profile=False,
                              on_unused_input='ignore')

    cost = cost.mean()
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    cost += 0.0001 * i_gate.sum()

    #if alpha_c > 0.:
    #    alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
    #    alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
    #    cost += alpha_reg

    # gradient computation
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = numpy.load(saveto)['history_errs'].tolist()
    best_p = None
    bad_count = 0

    uidx = 0

    try:

        for epochidx in xrange(max_epochs):
            # If the input sequences are of variable length get mask from the data loader instead of setting them all to one
            mask = numpy.ones((maxlen, batch_size)).astype('float32')
            print 'Epoch ', epochidx
            n_examples_seen = 0
            estop = False
            if epochidx > 0:
                dh.Reset()

            for tbidx in xrange(num_train_batches):
                n_examples_seen += batch_size
                uidx += 1
                use_noise.set_value(1.)

                pd_start = time.time()
                x, y, n_ex = dh.GetBatch(data_pb)
                if n_ex != batch_size:
                    mask[:, n_ex:] = numpy.zeros(
                        (maxlen, batch_size - n_ex)).astype('float32')
                pd_duration = time.time() - pd_start

                if x == None:
                    print 'Minibatch with zero sample under length ', maxlen
                    continue
                ud_start = time.time()

                cost = f_grad_shared(x, mask, y)
                if uidx == 1:
                    print 'Original Cost ', cost / x.shape[3]
                f_update(lrate)
                ud_duration = time.time() - ud_start

                if n_ex != batch_size:
                    mask[:, n_ex:] = numpy.ones(
                        (maxlen, batch_size - n_ex)).astype('float32')

                if numpy.isnan(cost):
                    print 'NaN detected in cost'
                    return 1., 1., 1.
                if numpy.isinf(cost):
                    print 'INF detected in cost'
                    return 1., 1., 1.

                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', epochidx, 'Update ', uidx, 'Cost ', cost / x.shape[
                        3], 'PD ', pd_duration, 'UD ', ud_duration

                if numpy.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p != None:
                        params = copy.copy(best_p)
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_errs, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                    print 'Done'

                if numpy.mod(uidx, validFreq) == 0:

                    use_noise.set_value(0.)
                    train_err = 0
                    valid_err = 0
                    test_err = 0
                    print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
                    #train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
                    if valid is not None:
                        valid_err = pred_acc(saveto,
                                             valid_batch_size,
                                             f_preds,
                                             maxlen,
                                             data_test_valid_pb,
                                             dh_test_valid,
                                             test_valid_dataset_size,
                                             num_test_valid_batches,
                                             last_n,
                                             test=True)
                    #if test is not None:
                    #    test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True)

                    history_errs.append([valid_err, test_err])
                    if epochidx == 0 or valid_err >= numpy.array(
                            history_errs)[:, 0].max():
                        best_p = unzip(
                            tparams)  # p for min valid err / max valid acc

                    print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err

            if n_ex == batch_size:
                print 'Seen %d training examples' % (n_examples_seen)
            else:
                print 'Seen %d training examples' % (n_examples_seen -
                                                     batch_size + n_ex)
            use_noise.set_value(0.)
            train_err = 0
            valid_err = 0
            test_err = 0
            print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
            #train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
            if valid is not None:
                valid_err = pred_acc(saveto,
                                     valid_batch_size,
                                     f_preds,
                                     maxlen,
                                     data_test_valid_pb,
                                     dh_test_valid,
                                     test_valid_dataset_size,
                                     num_test_valid_batches,
                                     last_n,
                                     test=True)
            if test is not None:
                test_err = pred_acc(saveto,
                                    valid_batch_size,
                                    f_preds,
                                    maxlen,
                                    data_test_test_pb,
                                    dh_test_test,
                                    test_test_dataset_size,
                                    num_test_test_batches,
                                    last_n,
                                    test=True)

            history_errs.append([valid_err, test_err])

            if epochidx == 0 or valid_err >= numpy.array(
                    history_errs)[:, 0].max():
                best_p = unzip(tparams)  # p for min valid err / max valid acc

            print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err
    finally:  #except KeyboardInterrupt:

        if best_p is not None:
            zipp(best_p, tparams)

        use_noise.set_value(0.)
        train_err = 0
        valid_err = 0
        test_err = 0
        print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
        #train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False)
        if valid is not None:
            valid_err = pred_acc(saveto,
                                 valid_batch_size,
                                 f_preds,
                                 maxlen,
                                 data_test_valid_pb,
                                 dh_test_valid,
                                 test_valid_dataset_size,
                                 num_test_valid_batches,
                                 last_n,
                                 test=True)
        if test is not None:
            test_err = pred_acc(saveto,
                                valid_batch_size,
                                f_preds,
                                maxlen,
                                data_test_test_pb,
                                dh_test_test,
                                test_test_dataset_size,
                                num_test_test_batches,
                                last_n,
                                test=True)

        print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err
        params = copy.copy(best_p)
        numpy.savez(saveto,
                    zipped_params=best_p,
                    train_err=train_err,
                    valid_err=valid_err,
                    test_err=test_err,
                    history_errs=history_errs,
                    **params)

        print model_options

    return train_err, valid_err, test_err
Beispiel #2
0
def train(
        dim_out=100,  # hidden layer dim for outputs
        ctx_dim=512,  # context vector dimensionality
        dim=1000,  # the number of LSTM units
        n_actions=3,  # number of actions to predict
        n_layers_att=1,
        n_layers_out=1,
        n_layers_init=1,
        ctx2out=False,
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        temperature_inverse=1.0,
        lrate=0.01,
        selector=False,
        maxlen=30,  # maximum length of the video
        optimizer='adam',
        batch_size=16,
        valid_batch_size=16,
        saveto='model.npz',
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        dataset='flickr8k',  # dummy dataset, replace with video ones
        dictionary=None,  # word dictionary
        use_dropout=False,
        reload_=False,
        training_stride=1,
        testing_stride=8,
        last_n=16,
        fps=100,
        data_dir='/home/pmorerio/datasets/IIT_IFM/'):

    # Model options
    model_options = locals().copy()
    #model_options = validate_options(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print "Reloading options"
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    print '-----'
    print 'Booting up all data handlers'
    print 'Training set for actual training (randomized)'
    data_pb = TrainProto(batch_size, maxlen, training_stride, dataset,
                         data_dir, fps)
    dh = DataHandler(data_pb)
    dataset_size = dh.GetDatasetSize()
    num_train_batches = dataset_size / batch_size
    if dataset_size % batch_size != 0:
        num_train_batches += 1
    print num_train_batches, ' batches'

    valid = None  # not None
    test = True  # not None

    print 'Training set for training accuracy'  # the training set is loaded twice: for actual training and for computing training error
    data_test_train_pb = TestTrainProto(valid_batch_size, maxlen,
                                        testing_stride, dataset, data_dir, fps)
    dh_test_train = DataHandler(data_test_train_pb)
    test_train_dataset_size = dh_test_train.GetDatasetSize()
    num_test_train_batches = test_train_dataset_size / valid_batch_size
    if test_train_dataset_size % valid_batch_size != 0:
        num_test_train_batches += 1
    print num_test_train_batches, ' batches'

    if valid == True:
        print 'Validation set for validation accuracy'
        data_test_valid_pb = TestValidProto(valid_batch_size, maxlen,
                                            testing_stride, dataset, data_dir,
                                            fps)
        dh_test_valid = DataHandler(data_test_valid_pb)
        test_valid_dataset_size = dh_test_valid.GetDatasetSize()
        num_test_valid_batches = test_valid_dataset_size / valid_batch_size
        if test_valid_dataset_size % valid_batch_size != 0:
            num_test_valid_batches += 1
        print num_test_valid_batches, ' batches'

    print 'Test set for test accuracy'
    data_test_test_pb = TestTestProto(valid_batch_size, maxlen, testing_stride,
                                      dataset, data_dir, fps)
    dh_test_test = DataHandler(data_test_test_pb)
    test_test_dataset_size = dh_test_test.GetDatasetSize()
    num_test_test_batches = test_test_dataset_size / valid_batch_size
    if test_test_dataset_size % valid_batch_size != 0:
        num_test_test_batches += 1
    print num_test_test_batches, ' batches'

    print 'Data handlers ready'
    print '-----'

    print 'Building model'
    params = init_params(model_options)  # actual parameter initialization
    # reload parameters
    if reload_ and os.path.exists(saveto):
        print "Reloading model"
        params = load_params(saveto, params)

    # simply initializes Theano shared variable according to param
    # numpy arrays -> theano shared variables
    tparams = init_tparams(params)

    # In order, we get:
    #   1) trng - theano random number generator
    #   2) use_noise - flag that turns on dropout
    #   3) inps - inputs for f_grad_shared
    #	4) alphas - the attention weigths
    #   4) cost - log likelihood for each sentence
    #   5) opts_out - optional outputs (e.g selector)
    #	6) preds - the computed labels

    trng, use_noise, \
          inps, alphas, \
          cost, \
          opts_out, preds = \
          build_model(tparams, model_options)   # builds the whole computation graph

    # before any regularizer
    f_log_probs = theano.function(inps, -cost, profile=False)
    f_preds = theano.function(inps,
                              preds,
                              profile=False,
                              on_unused_input='ignore')

    cost = cost.mean()

    # add L2 regularization costs
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # add attention penalty to the cost
    #if alpha_c > 0.:
    #alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
    #alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean()
    #cost += alpha_reg

    # add ATTENTION FOCUS to the cost
    if alpha_c > 0.:
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = -alpha_c * (
            alphas * tensor.log(alphas + 1e-8)).sum(0).sum(0).mean()
        cost += alpha_reg

    # Backpropagation
    # gradient computation
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    # f_grad_shared computes the cost and updates adaptive learning rate variables
    # f_update updates the weights of the model
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    history_acc = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_acc = numpy.load(saveto)['history_acc'].tolist()
    best_p = None
    bad_count = 0

    uidx = 0

    train_acc = 0
    valid_acc = 0
    test_acc = 0

    for epochidx in xrange(max_epochs):
        # If the input sequences are of variable length get mask from the data loader instead of setting them all to one
        mask = numpy.ones((maxlen, batch_size)).astype('float32')
        print 'Epoch ', epochidx
        n_examples_seen = 0
        estop = False  # not used
        #if epochidx > 0:
        dh.Reset()  #  training data is shuffled at each epoch in Reset()

        udtime = 0
        pdtime = 0
        for tbidx in xrange(num_train_batches):
            n_examples_seen += batch_size
            uidx += 1
            use_noise.set_value(1.)

            pd_start = time.time()
            x, y, n_ex = dh.GetBatch(
                data_pb
            )  # looks really slow. this is maybe why also predictions are slow (must get batches for all train/test/valid)
            if n_ex != batch_size:
                mask[:, n_ex:] = numpy.zeros(
                    (maxlen, batch_size - n_ex)).astype('float32')
            pdtime += time.time() - pd_start  # pd stands for prepare data?

            #if x == None: # this gives a Warning. Replaced with -> if x is None:
            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                continue
            ud_start = time.time()

            cost = f_grad_shared(x, mask, y)
            f_update(lrate)
            udtime += time.time() - ud_start  # ud stands for use data?

            if n_ex != batch_size:
                mask[:, n_ex:] = numpy.ones(
                    (maxlen, batch_size - n_ex)).astype('float32')

            if numpy.isnan(cost):
                print 'NaN detected in cost'
                return 1., 1., 1.
            if numpy.isinf(cost):
                print 'INF detected in cost'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', epochidx, ' Update', uidx, ' Cost', cost, ' PD', pdtime / float(
                    dispFreq), ' UD', udtime / float(dispFreq)
                pdtime = 0
                udtime = 0

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p != None:
                    params = copy.copy(best_p)
                else:
                    params = unzip(tparams)
                numpy.savez(saveto, history_acc=history_acc, **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                train_acc = 0
                valid_acc = 0
                test_acc = 0
                print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
                train_acc = pred_acc(saveto,
                                     valid_batch_size,
                                     f_preds,
                                     maxlen,
                                     data_test_train_pb,
                                     dh_test_train,
                                     test_train_dataset_size,
                                     num_test_train_batches,
                                     last_n,
                                     test=False)
                if valid is not None:
                    valid_acc = pred_acc(saveto,
                                         valid_batch_size,
                                         f_preds,
                                         maxlen,
                                         data_test_valid_pb,
                                         dh_test_valid,
                                         test_valid_dataset_size,
                                         num_test_valid_batches,
                                         last_n,
                                         test=True)
                if test is not None:
                    test_acc = pred_acc(saveto,
                                        valid_batch_size,
                                        f_preds,
                                        maxlen,
                                        data_test_test_pb,
                                        dh_test_test,
                                        test_test_dataset_size,
                                        num_test_test_batches,
                                        last_n,
                                        test=True)

                history_acc.append([train_acc, valid_acc, test_acc])

                if uidx == 0 or valid_acc >= numpy.array(history_acc)[:,
                                                                      1].max():
                    best_p = unzip(
                        tparams)  # p for min valid err / max valid acc

                print 'Accuracy: Train', train_acc, 'Valid', valid_acc, 'Test', test_acc
            #here ends the  cycle over the batches
        if n_ex == batch_size:
            print 'Seen %d training examples' % (n_examples_seen)
        else:
            print 'Seen %d training examples' % (n_examples_seen - batch_size +
                                                 n_ex)
        use_noise.set_value(0.)
        train_acc = 0
        valid_acc = 0
        test_acc = 0
        print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
        train_acc = pred_acc(saveto,
                             valid_batch_size,
                             f_preds,
                             maxlen,
                             data_test_train_pb,
                             dh_test_train,
                             test_train_dataset_size,
                             num_test_train_batches,
                             last_n,
                             test=False)
        if valid is not None:
            valid_acc = pred_acc(saveto,
                                 valid_batch_size,
                                 f_preds,
                                 maxlen,
                                 data_test_valid_pb,
                                 dh_test_valid,
                                 test_valid_dataset_size,
                                 num_test_valid_batches,
                                 last_n,
                                 test=True)
        if test is not None:
            test_acc = pred_acc(saveto,
                                valid_batch_size,
                                f_preds,
                                maxlen,
                                data_test_test_pb,
                                dh_test_test,
                                test_test_dataset_size,
                                num_test_test_batches,
                                last_n,
                                test=True)

        history_acc.append([train_acc, valid_acc, test_acc])

        if epochidx == 0 or valid_acc >= numpy.array(history_acc)[:, 1].max():
            best_p = unzip(tparams)  # p for min valid err / max valid acc

        print 'Accuracy: Train', train_acc, 'Valid', valid_acc, 'Test', test_acc
        # here ends the cycle over the epochs

    # use the best  parameters for final checkpoint (if they exist)
    if best_p is not None:
        zipp(best_p, tparams)

    # if best param were found with validation, calculate accuracy with them
    if valid is not None:
        use_noise.set_value(0.)
        train_acc = 0
        valid_acc = 0
        test_acc = 0
        print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)'
        train_acc = pred_acc(saveto,
                             valid_batch_size,
                             f_preds,
                             maxlen,
                             data_test_train_pb,
                             dh_test_train,
                             test_train_dataset_size,
                             num_test_train_batches,
                             last_n,
                             test=False)
        valid_acc = pred_acc(saveto,
                             valid_batch_size,
                             f_preds,
                             maxlen,
                             data_test_valid_pb,
                             dh_test_valid,
                             test_valid_dataset_size,
                             num_test_valid_batches,
                             last_n,
                             test=True)
        if test is not None:
            test_acc = pred_acc(saveto,
                                valid_batch_size,
                                f_preds,
                                maxlen,
                                data_test_test_pb,
                                dh_test_test,
                                test_test_dataset_size,
                                num_test_test_batches,
                                last_n,
                                test=True)

        print 'Accuracy: Train', train_acc, 'Valid', valid_acc, 'Test', test_acc

    params = copy.copy(best_p)
    numpy.savez(saveto,
                zipped_params=best_p,
                train_acc=train_acc,
                valid_acc=valid_acc,
                test_acc=test_acc,
                history_acc=history_acc,
                **params)

    print model_options

    return train_acc, valid_acc, test_acc