def train(data_dir='data/memes/', dim_proj=512, maxlen=30, batch_size=256, keep_ratio=1., shuffle_data=True, learning_rate=0.001, global_steps=50000, disp_freq=100, save_freq=1000, test_freq=1000, saveto_file='params.npz', weight_decay=0.0005, reload_model=False, train=True): """ Topo-LSTM model training. """ options = locals().copy() saveto = data_dir + saveto_file # loads graph G, node_index = data_utils.load_graph(data_dir) print nx.info(G) options['n_words'] = len(node_index) print options # creates and initializes shared variables. print 'Initializing variables...' params = init_params(options) if reload_model: print 'reusing saved model.' load_params(saveto, params) tparams = init_tparams(params) # builds Topo-LSTM model print 'Building model...' model = tprnn_model.build_model(tparams, options) print 'Loading test data...' test_examples = data_utils.load_examples(data_dir, dataset='test', node_index=node_index, maxlen=maxlen, G=G) test_loader = data_utils.Loader(test_examples, options=options) print 'Loaded %d test examples' % len(test_examples) if train: # prepares training data. print 'Loading train data...' train_examples = data_utils.load_examples( data_dir, dataset='train', keep_ratio=options['keep_ratio'], node_index=node_index, maxlen=maxlen, G=G) train_loader = data_utils.Loader(train_examples, options=options) print 'Loaded %d training examples.' % len(train_examples) # compiles updates. optimizer = downhill.build(algo='adam', loss=model['cost'], params=tparams.values(), inputs=model['data']) updates = optimizer.get_updates(max_gradient_elem=5., learning_rate=learning_rate) f_update = theano.function(model['data'], model['cost'], updates=list(updates)) # training loop. start_time = timeit.default_timer() # downhill.minimize( # loss=cost, # algo='adam', # train=train_loader, # # inputs=input_list + [labels], # # params=tparams.values(), # # patience=0, # max_gradient_clip=1, # # max_gradient_norm=1, # learning_rate=learning_rate, # monitors=[('cost', cost)], # monitor_gradients=False) n_examples = len(train_examples) batches_per_epoch = n_examples // options['batch_size'] + 1 n_epochs = global_steps // batches_per_epoch + 1 global_step = 0 cost_history = [] for _ in range(n_epochs): for _ in range(batches_per_epoch): cost = f_update(*train_loader()) cost_history += [cost] if global_step % disp_freq == 0: print 'global step %d, cost: %f' % (global_step, cost) # dump model parameters. if global_step % save_freq == 0: params = unzip(tparams) np.savez(saveto, **params) pickle.dump(options, open('%s.pkl' % saveto, 'wb'), -1) # evaluate on test data. if global_step % test_freq == 0: scores = evaluate(model['f_prob'], test_loader) print 'eval scores: ', scores end_time = timeit.default_timer() print 'time used: %d seconds.' % (end_time - start_time) global_step += 1 scores = evaluate(model['f_prob'], test_loader) pprint.pprint(scores)
def train(data_dir='data/memes/', dim_proj=256, dim_att=128, maxlen=30, batch_size=256, keep_ratio=1., shuffle_data=True, learning_rate=0.001, global_steps=50000, disp_freq=100, save_freq=100, test_freq=100, saveto_file='params.npz', tmsaveto_file='timeparams.npz', weight_decay=0.0005, sigmasqr=1, tdim=1., reload_model=False, train=True): """ Topo-LSTM model training. tdim: scale time down by how many times """ options = locals().copy() #savedstep = '0' saveto = data_dir + saveto_file tmsaveto = data_dir + tmsaveto_file # loads graph Gp, node_index = data_utils.load_graph(data_dir) #print nx.info(G) options['n_events'] = len(node_index) print options # creates and initializes shared variables. print 'Initializing variables...' params = init_params(options) if reload_model: print 'reusing saved model.' load_params(saveto, params) tparams = init_tparams(params) timeparams = init_timeparams(options) if reload_model: print 'reusing saved model.' load_params(tmsaveto, timeparams) timetparams = init_tparams(timeparams) # builds Topo-LSTM model print 'Building model...' model = tpgru_model.build_model(tparams, timetparams, options) print 'Loading test data...' test_examples = data_utils.load_examples(data_dir, dataset='test', node_index=node_index, maxlen=maxlen, Gp=Gp) test_loader = data_utils.Loader(test_examples, options=options) print 'Loaded %d test examples' % len(test_examples) if train: # prepares training data. print 'Loading train data...' train_examples = data_utils.load_examples( data_dir, dataset='train', keep_ratio=options['keep_ratio'], node_index=node_index, maxlen=maxlen, Gp=Gp) train_loader = data_utils.Loader(train_examples, options=options) print 'Loaded %d training examples.' % len(train_examples) # compiles updates. optimizer = downhill.build(algo='adam', loss=model['cost'], params=tparams.values(), inputs=model['data']) updates = optimizer.get_updates(max_gradient_elem=5., learning_rate=learning_rate) f_update = theano.function(model['data'], model['cost'], updates=list(updates)) toptimizer = downhill.build(algo='adam', loss=model['timecost'], params=timetparams.values(), inputs=model['timedata']) tupdates = toptimizer.get_updates(max_gradient_elem=5., learning_rate=0.005) f_t_update = theano.function(model['timedata'], model['timecost'], updates=list(tupdates)) # training loop. start_time = timeit.default_timer() n_examples = len(train_examples) batches_per_epoch = n_examples // options['batch_size'] + 1 n_epochs = global_steps // batches_per_epoch + 1 global_step = 0 #cost_history = [] for _ in range(n_epochs): for _ in range(batches_per_epoch): batch_data = train_loader() cost = f_update(*(batch_data[:-3] + (batch_data[-2], ))) #cost_history += [cost] timecost = f_t_update(*(batch_data[:-2] + (batch_data[-1], ))) if global_step % disp_freq == 0: print 'global step %d, cost: %f' % (global_step, cost) print 'timecost: %f' % (timecost) # dump model parameters. if global_step % save_freq == 0: params = unzip(tparams) np.savez(data_dir + saveto_file, **params) pickle.dump( options, open('%s.pkl' % (data_dir + saveto_file), 'wb'), -1) timeparams = unzip(timetparams) np.savez(data_dir + tmsaveto_file, **timeparams) # evaluate on test data. if global_step % test_freq == 0: scores = evaluate(model['f_prob'], test_loader, model['f_tprob'], options['tdim']) print 'eval scores: ', scores end_time = timeit.default_timer() print 'time used: %d seconds.' % (end_time - start_time) global_step += 1 scores = evaluate(model['f_prob'], test_loader, model['f_tprob'], options['tdim']) pprint.pprint(scores)