Exemple #1
0
def train(data_dir='data/memes/',
          dim_proj=512,
          maxlen=30,
          batch_size=256,
          keep_ratio=1.,
          shuffle_data=True,
          learning_rate=0.001,
          global_steps=50000,
          disp_freq=100,
          save_freq=1000,
          test_freq=1000,
          saveto_file='params.npz',
          weight_decay=0.0005,
          reload_model=False,
          train=True):
    """
    Topo-LSTM model training.
    """
    options = locals().copy()
    saveto = data_dir + saveto_file

    # loads graph
    G, node_index = data_utils.load_graph(data_dir)
    print nx.info(G)
    options['n_words'] = len(node_index)

    print options

    # creates and initializes shared variables.
    print 'Initializing variables...'
    params = init_params(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(saveto, params)
    tparams = init_tparams(params)

    # builds Topo-LSTM model
    print 'Building model...'
    model = tprnn_model.build_model(tparams, options)

    print 'Loading test data...'
    test_examples = data_utils.load_examples(data_dir,
                                             dataset='test',
                                             node_index=node_index,
                                             maxlen=maxlen,
                                             G=G)
    test_loader = data_utils.Loader(test_examples, options=options)
    print 'Loaded %d test examples' % len(test_examples)

    if train:
        # prepares training data.
        print 'Loading train data...'
        train_examples = data_utils.load_examples(
            data_dir,
            dataset='train',
            keep_ratio=options['keep_ratio'],
            node_index=node_index,
            maxlen=maxlen,
            G=G)
        train_loader = data_utils.Loader(train_examples, options=options)
        print 'Loaded %d training examples.' % len(train_examples)

        # compiles updates.
        optimizer = downhill.build(algo='adam',
                                   loss=model['cost'],
                                   params=tparams.values(),
                                   inputs=model['data'])

        updates = optimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=learning_rate)

        f_update = theano.function(model['data'],
                                   model['cost'],
                                   updates=list(updates))

        # training loop.
        start_time = timeit.default_timer()

        # downhill.minimize(
        #     loss=cost,
        #     algo='adam',
        #     train=train_loader,
        #     # inputs=input_list + [labels],
        #     # params=tparams.values(),
        #     # patience=0,
        #     max_gradient_clip=1,
        #     # max_gradient_norm=1,
        #     learning_rate=learning_rate,
        #     monitors=[('cost', cost)],
        #     monitor_gradients=False)

        n_examples = len(train_examples)
        batches_per_epoch = n_examples // options['batch_size'] + 1
        n_epochs = global_steps // batches_per_epoch + 1

        global_step = 0
        cost_history = []
        for _ in range(n_epochs):
            for _ in range(batches_per_epoch):
                cost = f_update(*train_loader())
                cost_history += [cost]

                if global_step % disp_freq == 0:
                    print 'global step %d, cost: %f' % (global_step, cost)

                # dump model parameters.
                if global_step % save_freq == 0:
                    params = unzip(tparams)
                    np.savez(saveto, **params)
                    pickle.dump(options, open('%s.pkl' % saveto, 'wb'), -1)

                # evaluate on test data.
                if global_step % test_freq == 0:
                    scores = evaluate(model['f_prob'], test_loader)
                    print 'eval scores: ', scores
                    end_time = timeit.default_timer()
                    print 'time used: %d seconds.' % (end_time - start_time)

                global_step += 1

    scores = evaluate(model['f_prob'], test_loader)
    pprint.pprint(scores)
Exemple #2
0
def train(data_dir='data/memes/',
          dim_proj=256,
          dim_att=128,
          maxlen=30,
          batch_size=256,
          keep_ratio=1.,
          shuffle_data=True,
          learning_rate=0.001,
          global_steps=50000,
          disp_freq=100,
          save_freq=100,
          test_freq=100,
          saveto_file='params.npz',
          tmsaveto_file='timeparams.npz',
          weight_decay=0.0005,
          sigmasqr=1,
          tdim=1.,
          reload_model=False,
          train=True):
    """
    Topo-LSTM model training.
    tdim: scale time down by how many times
    """
    options = locals().copy()
    #savedstep = '0'
    saveto = data_dir + saveto_file
    tmsaveto = data_dir + tmsaveto_file

    # loads graph
    Gp, node_index = data_utils.load_graph(data_dir)
    #print nx.info(G)
    options['n_events'] = len(node_index)

    print options

    # creates and initializes shared variables.
    print 'Initializing variables...'
    params = init_params(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(saveto, params)
    tparams = init_tparams(params)

    timeparams = init_timeparams(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(tmsaveto, timeparams)
    timetparams = init_tparams(timeparams)

    # builds Topo-LSTM model
    print 'Building model...'
    model = tpgru_model.build_model(tparams, timetparams, options)

    print 'Loading test data...'
    test_examples = data_utils.load_examples(data_dir,
                                             dataset='test',
                                             node_index=node_index,
                                             maxlen=maxlen,
                                             Gp=Gp)
    test_loader = data_utils.Loader(test_examples, options=options)
    print 'Loaded %d test examples' % len(test_examples)

    if train:
        # prepares training data.
        print 'Loading train data...'
        train_examples = data_utils.load_examples(
            data_dir,
            dataset='train',
            keep_ratio=options['keep_ratio'],
            node_index=node_index,
            maxlen=maxlen,
            Gp=Gp)
        train_loader = data_utils.Loader(train_examples, options=options)
        print 'Loaded %d training examples.' % len(train_examples)

        # compiles updates.
        optimizer = downhill.build(algo='adam',
                                   loss=model['cost'],
                                   params=tparams.values(),
                                   inputs=model['data'])

        updates = optimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=learning_rate)

        f_update = theano.function(model['data'],
                                   model['cost'],
                                   updates=list(updates))

        toptimizer = downhill.build(algo='adam',
                                    loss=model['timecost'],
                                    params=timetparams.values(),
                                    inputs=model['timedata'])

        tupdates = toptimizer.get_updates(max_gradient_elem=5.,
                                          learning_rate=0.005)

        f_t_update = theano.function(model['timedata'],
                                     model['timecost'],
                                     updates=list(tupdates))

        # training loop.
        start_time = timeit.default_timer()

        n_examples = len(train_examples)
        batches_per_epoch = n_examples // options['batch_size'] + 1
        n_epochs = global_steps // batches_per_epoch + 1

        global_step = 0
        #cost_history = []
        for _ in range(n_epochs):
            for _ in range(batches_per_epoch):
                batch_data = train_loader()
                cost = f_update(*(batch_data[:-3] + (batch_data[-2], )))
                #cost_history += [cost]
                timecost = f_t_update(*(batch_data[:-2] + (batch_data[-1], )))

                if global_step % disp_freq == 0:
                    print 'global step %d, cost: %f' % (global_step, cost)
                    print 'timecost: %f' % (timecost)

                # dump model parameters.
                if global_step % save_freq == 0:
                    params = unzip(tparams)
                    np.savez(data_dir + saveto_file, **params)
                    pickle.dump(
                        options, open('%s.pkl' % (data_dir + saveto_file),
                                      'wb'), -1)
                    timeparams = unzip(timetparams)
                    np.savez(data_dir + tmsaveto_file, **timeparams)

                # evaluate on test data.
                if global_step % test_freq == 0:
                    scores = evaluate(model['f_prob'], test_loader,
                                      model['f_tprob'], options['tdim'])
                    print 'eval scores: ', scores
                    end_time = timeit.default_timer()
                    print 'time used: %d seconds.' % (end_time - start_time)

                global_step += 1

    scores = evaluate(model['f_prob'], test_loader, model['f_tprob'],
                      options['tdim'])
    pprint.pprint(scores)