コード例 #1
0
def main(params):
    # see documentation in capgen.py for more details on hyperparams
    _, validerr, _ = train(saveto=params["model"],
                           attn_type=params["attn-type"],
                           reload_=params["reload"],
                           dim_word=params["dim-word"],
                           ctx_dim=params["ctx-dim"],
                           dim=params["dim"],
                           n_layers_att=params["n-layers-att"],
                           n_layers_out=params["n-layers-out"],
                           n_layers_lstm=params["n-layers-lstm"],
                           n_layers_init=params["n-layers-init"],
                           n_words=params["n-words"],
                           lstm_encoder=params["lstm-encoder"],
                           decay_c=params["decay-c"],
                           alpha_c=params["alpha-c"],
                           prev2out=params["prev2out"],
                           ctx2out=params["ctx2out"],
                           lrate=params["learning-rate"],
                           optimizer=params["optimizer"],
                           selector=params["selector"],
                           max_epochs = params["max-epochs"],
                           patience=10,
                           maxlen=100,
                           batch_size=64,
                           valid_batch_size=64,
                           validFreq= params['valid-freq'], # 2000,
                           dispFreq=1,
                           saveFreq=1000,
                           sampleFreq=250,
                           dataset=params["dataset"],
                           use_dropout=params["use-dropout"],
                           use_dropout_lstm=params["use-dropout-lstm"],
                           save_per_epoch=params["save-per-epoch"])
    print "Final cost: {:.2f}".format(validerr.mean())
コード例 #2
0
def main(params):
    # see documentation in capgen.py for more details on hyperparams
    _, validerr, _ = train(saveto=params["model"],
                           attn_type=params["attn-type"],
                           reload_=params["reload"],
                           dim_word=params["dim-word"],
                           ctx_dim=params["ctx-dim"],
                           dim=params["dim"],
                           n_layers_att=params["n-layers-att"],
                           n_layers_out=params["n-layers-out"],
                           n_layers_lstm=params["n-layers-lstm"],
                           n_layers_init=params["n-layers-init"],
                           n_words=params["n-words"],
                           lstm_encoder=params["lstm-encoder"],
                           decay_c=params["decay-c"],
                           alpha_c=params["alpha-c"],
                           prev2out=params["prev2out"],
                           ctx2out=params["ctx2out"],
                           lrate=params["learning-rate"],
                           optimizer=params["optimizer"],
                           selector=params["selector"],
                           patience=10,
                           maxlen=100,
                           batch_size=64,
                           valid_batch_size=64,
                           validFreq=2000,
                           dispFreq=1,
                           saveFreq=1000,
                           sampleFreq=250,
                           dataset="coco",
                           use_dropout=params["use-dropout"],
                           use_dropout_lstm=params["use-dropout-lstm"],
                           save_per_epoch=params["save-per-epoch"])
    print "Final cost: {:.2f}".format(validerr.mean())
コード例 #3
0
def main():
    m_score = train(
        dim_word=512,  # word vector dimensionality
        ctx_dim=512,  # context vector dimensionality
        dim=1000,  # the number of LSTM units
        attn_type="deterministic",  # Soft atten
        n_layers_att=2,  # number of layers used to compute the attention weights
        n_layers_out=1,  # number of layers used to compute logit
        n_layers_lstm=1,  # number of lstm layers
        n_layers_init=1,  # number of layers to initialize LSTM at time 0
        lstm_encoder=False,  # if True, run bidirectional LSTM on input units
        prev2out=True,  # Feed previous word into logit
        ctx2out=True,  # Feed attention weighted ctx into logit
        alpha_entropy_c=0.002,  # hard attn param
        RL_sumCost=True,  # hard attn param
        semi_sampling_p=0.5,  # hard attn param
        temperature=1.,  # hard attn param
        patience=10,
        max_epochs=5000,
        dispFreq=1,
        decay_c=0.,  # weight decay coeff
        alpha_c=0.,  # doubly stochastic coeff
        lrate=0.01,  # used only for SGD
        selector=True,  # selector (see paper)
        n_words=7632,  # vocab size
        maxlen=100,  # maximum length of the description
        optimizer='adam',
        batch_size=64,
        valid_batch_size=64,
        saveto='model2.npz',  # relative path of saved model file
        validFreq=1000,
        saveFreq=100,  # save the parameters after every saveFreq updates
        sampleFreq=500,  # generate some samples after every sampleFreq updates
        dataset='flickr8k',
        dictionary=None,  # word dictionary
        use_dropout=
        False,  # setting this true turns on dropout at various points
        use_dropout_lstm=False,  # dropout on lstm gates
        reload_=False,
        save_per_epoch=False,
        dev_references='./Flickr8k/cap_features/ref/dev',
        test_references='./Flickr8k/cap_features/ref/test',
        use_metrics=True,
        metric='Bleu_4')  # this saves down the model every epoch
    print "Bleu_4: %.4f" % m_score
コード例 #4
0
ファイル: train.py プロジェクト: flipvrijn/arctic-captions
def main(args):
    monitor = Monitor('{}/{}_status.json'.format(args['out_dir'].rstrip('/'), args["model"]))

    try:
        if args['type'] == 'normal':
            from capgen import train

            _, validerr, _ = train(out_dir=args['out_dir'].rstrip('/'),
                                   data_dir=args['data_dir'].rstrip('/'),
                                   saveto=args["model"],
                                   attn_type='deterministic',
                                   reload_=args['reload'],
                                   dim_word=512,
                                   ctx_dim=512,
                                   dim=1800,
                                   n_layers_att=2,
                                   n_layers_out=1,
                                   n_layers_lstm=1,
                                   n_layers_init=2,
                                   n_words=10000,
                                   lstm_encoder=False,
                                   decay_c=0.,
                                   alpha_c=1.,
                                   prev2out=True,
                                   ctx2out=True,
                                   lrate=0.01,
                                   optimizer='adam',
                                   selector=True,
                                   patience=10,
                                   maxlen=100,
                                   batch_size=64,
                                   valid_batch_size=64,
                                   validFreq=2000,
                                   dispFreq=1,
                                   saveFreq=1000,
                                   sampleFreq=250,
                                   dataset="coco",
                                   use_dropout=True,
                                   use_dropout_lstm=False,
                                   save_per_epoch=False,
                                   monitor=monitor)
            print "Final cost: {:.2f}".format(validerr.mean())
        elif args['type'] == 't_attn':
            from capgen_text import train

            out_dir = args['out_dir'].rstrip('/')
            saveto  = args['model']
            _, validerr, _ = train(out_dir=out_dir,
                                   data_dir=args['data_dir'].rstrip('/'),
                                   saveto=saveto,
                                   attn_type='deterministic',
                                   reload_=args['reload'],
                                   dim_word=512,
                                   ctx_dim=512,
                                   tex_dim=args['tex_dim'],
                                   dim=1800,
                                   n_layers_att=2,
                                   n_layers_out=1,
                                   n_layers_lstm=1,
                                   n_layers_init=2,
                                   n_words=10000,
                                   lstm_encoder=False,
                                   lstm_encoder_context=args['lenc'],
                                   decay_c=0.,
                                   alpha_c=1.,
                                   prev2out=True,
                                   ctx2out=True,
                                   tex2out=True,
                                   lrate=0.01,
                                   optimizer='adam',
                                   selector=True,
                                   patience=10,
                                   maxlen=100,
                                   batch_size=32,
                                   valid_batch_size=32,
                                   validFreq=2000,
                                   dispFreq=1,
                                   saveFreq=1000,
                                   sampleFreq=250,
                                   dataset="coco",
                                   use_dropout=True,
                                   use_dropout_lstm=False,
                                   save_per_epoch=False,
                                   monitor=monitor)
            print "Final cost: {:.2f}".format(validerr.mean())

            # Store data preprocessing type in the options file
            with open('{}/{}.pkl'.format(out_dir, saveto)) as f_opts:
                opts = pkl.load(f_opts)
                opts['preproc_type'] = args['preproc_type']
                preproc_params = {}
                for param in args['preproc_params'].split(','):
                    if param:
                        key, value = param.split('=')
                        if value.isdigit():
                            value = int(value)
                        preproc_params[key] = value
                opts['preproc_params'] = preproc_params
                pkl.dump(opts, f_opts)
    except (KeyboardInterrupt, SystemExit):
        print 'Interrupted!'
        monitor.error_message = 'Interrupted!'
        monitor.status = 12
    except Exception, e:
        print 'Unexpected error!'
        monitor.error_message = str(e)
        monitor.status = 12
        raise e
コード例 #5
0
import sys
sys.path.insert(0, '../../')

from capgen import train

# training
train(dataset='flickr30k')