def main(): args = dict() args['corpus_path'] = '../vector_traing_corpus/clusterTrajectory_100m_5minutes_50eps_5minPts_uniq_2windows.txt' args['train_data_path'] = '../vector_traing_corpus/clusterTrajectory_100m_5minutes_50eps_5minPts_uniq_2windows.txt' args['dbscanner_path'] = '../utils/dbscaner_100m_5minutes_50eps_5minPts.pkl' args['batch_size'] = 1 args['neg_sample_size'] = 5 args['alpha'] = 0.75 # smooth out unigram frequencies args['table_size'] = 1000 # table size from which to sample neg samples args['min_frequency'] = 1 # threshold for vocab frequency args['lr'] = 0.05 args['min_lr'] = 0.005 args['embed_size'] = 128 args['sampling'] = False args['epoches'] = 70 args['save_every_n'] = 200 args['save_dir'] = './save_windowns{}'.format(args['neg_sample_size']) dataloader = Dataloader(args) args['vocab_size'] = dataloader.vocab_size pickle.dump(dataloader, open('./variable/dataloader.pkl', 'w')) pickle.dump(args, open('./variable/args.pkl', 'w')) model = Model(args) saver = tf.train.Saver(max_to_keep=10) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter('./log', sess.graph) count = 0 for e in range(args['epoches']): dataloader.reset_pointer() for i in range(dataloader.batch_num): count += 1 start = time.time() x, y = dataloader.next_batch() # labels = dataloader.labels args['lr'] = args['lr'] + dataloader.decay feed = {model.x: x, model.y: y, model.lr: args['lr']} summary, train, loss = sess.run([model.merged, model.train, model.loss], feed_dict=feed) summary_writer.add_summary(summary, count) end = time.time() if count % 100 == 0: print('round_num: {}/{}... '.format(e + 1, args['epoches']), 'Training steps: {}... '.format(count), 'Training error: {:.4f}... '.format(loss), 'Learning rate: {:.4f}... '.format(args['lr']), '{:.4f} sec/batch'.format((end - start))) if (count % args['save_every_n'] == 0): saver.save(sess, "{path}/i{counter}.ckpt".format(path = args['save_dir'], counter=count)) saver.save(sess, "{path}/i{counter}.ckpt".format(path=args['save_dir'], counter=count)) summary_writer.close()
def main(): args = dict() args[ 'train_data_path'] = '../predict_training_corpus/clusterTrajectory_100m_5minutes_50eps_5minPts_uniq_3numseqs_train.txt' args[ 'test_data_path'] = '../predict_training_corpus/clusterTrajectory_100m_5minutes_50eps_5minPts_uniq_3numseqs_test.txt' args[ 'dbscanner_path'] = '../utils/dbscaner_100m_5minutes_50eps_5minPts.pkl' args['save_dir'] = './save_3numsteps' args['embedding_path'] = '../word2vec/variable/embedding.pkl' args['batch_size'] = 5 args['lstm_size'] = 128 args['lstm_layer'] = 1 args['weight_decay'] = 0.00001 args['is_training'] = True args['keep_prob'] = 0.5 args['lr'] = 0.001 args['epochs'] = 70 args['save_every_n'] = 200 dataloader = Dataloader(args) args['num_steps'] = dataloader.num_steps args['feature_dim'] = dataloader.feature_dim args['classes'] = dataloader.classes pickle.dump(args, open('./utils/args.pkl', 'wb')) model = Model(args) saver = tf.train.Saver(max_to_keep=10) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) count = 0 for e in range(args['epochs']): dataloader.reset() # new_state = sess.run(model.initial_state) for i in range(dataloader.batch_num): count += 1 x, y = dataloader.next_batch() start = time.time() feed = {model.input: x, model.target: y} batch_loss, new_state, _ = sess.run( [model.loss, model.final_state, model.optimizer], feed_dict=feed) end = time.time() if count % 100 == 0: print('round_num: {}/{}... '.format(e + 1, args['epochs']), 'Training steps: {}... '.format(count), 'Training error: {:.4f}... '.format(batch_loss), '{:.4f} sec/batch'.format((end - start))) if (count % args['save_every_n'] == 0): # aaa = 1 saver.save( sess, "{path}/i{counter}_l{lstm_size}.ckpt".format( path=args['save_dir'], counter=count, lstm_size=args['lstm_size'])) saver.save( sess, "{path}/i{counter}_l{lstm_size}.ckpt".format( path=args['save_dir'], counter=count, lstm_size=args['lstm_size']))