def train(): global logfile_path global train1file global train0file global test1file global idf_file_path batch_size = int(256) filter_sizes = [1, 2, 3] num_filters = 1000 words_num_dim = 50 #normal embedding size embedding_size = 300 #new embedding size with idf #embedding_size = 301 learning_rate = 0.001 n_epochs = 20000 validation_freq = 50 keep_prob_value = 0.7 margin_size = 0.05 logfile_path = os.path.join(logfile_path, 'CNN-' \ + GetNowTime() + '-' \ + '-log.txt') log("New start ...", logfile_path) log(str(time.asctime(time.localtime(time.time()))), logfile_path) log("batch_size = " + str(batch_size), logfile_path) log("filter_sizes = " + str(filter_sizes), logfile_path) log("num_filters = " + str(num_filters), logfile_path) log("embedding_size = " + str(embedding_size), logfile_path) log("learning_rate = " + str(learning_rate), logfile_path) log("n_epochs = " + str(n_epochs), logfile_path) log("margin_size = " + str(margin_size), logfile_path) log("words_num_dim = " + str(words_num_dim), logfile_path) log("validation_freq = " + str(validation_freq), logfile_path) log("keep_prob_value = " + str(keep_prob_value), logfile_path) log("train_1_file = " + str(train1file.split('/')[-1]), logfile_path) log("train_0_file = " + str(train0file.split('/')[-1]), logfile_path) log("test_file = " + str(test1file.split('/')[-1]), logfile_path) log("vector_file = " + str(vectorsfile.split('/')[-1]), logfile_path) log("idf_file_path = " + str(idf_file_path.split('/')[-1]), logfile_path) log("lda_train_file_path = " + str(lda_train_file_path.split('/')[-1]), logfile_path) log("lda_test_file_path = " + str(lda_test_file_path.split('/')[-1]), logfile_path) vocab = build_vocab() #word_embeddings is list, shape = numOfWords*100 #for normal embeddings word_embeddings = load_word_embeddings(vocab, embedding_size) #for new embeddings with idf features #word_embeddings = load_word_embeddings_with_idf(vocab, embedding_size, idf_file_path) trainList = load_train1_list() testList = load_test_list() train0Dict = load_train0_dict() #train_x1.shape = 256*100 #train_x1, train_x2, train_x3 = load_train_data(trainList, vocab, batch_size, words_num_dim) train_x1, train_x2, train_x3 = load_train_data_from_2files( train0Dict, trainList, vocab, batch_size, words_num_dim) x1, x2, x3 = T.matrix('x1'), T.matrix('x2'), T.matrix('x3') keep_prob = T.fscalar('keep_prob') model = QACnn(input1=x1, input2=x2, input3=x3, keep_prob=keep_prob, word_embeddings=word_embeddings, batch_size=batch_size, sequence_len=train_x1.shape[1], embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, margin_size=margin_size) dbg_x1 = model.dbg_x1 dbg_outputs_1 = model.dbg_outputs_1 cost, cos12, cos13 = model.cost, model.cos12, model.cos13 params, accuracy = model.params, model.accuracy grads = T.grad(cost, params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] p1, p2, p3 = T.matrix('p1'), T.matrix('p2'), T.matrix('p3') prob = T.fscalar('prob') train_model = theano.function([p1, p2, p3, prob], [cost, accuracy, dbg_x1, dbg_outputs_1], updates=updates, givens={ x1: p1, x2: p2, x3: p3, keep_prob: prob }) v1, v2, v3 = T.matrix('v1'), T.matrix('v2'), T.matrix('v3') validate_model = theano.function( inputs=[v1, v2, v3, prob], outputs=[cos12, cos13], #updates=updates, givens={ x1: v1, x2: v2, x3: v3, keep_prob: prob }) epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 #train_x1, train_x2, train_x3 = load_train_data(trainList, vocab, batch_size) train_x1, train_x2, train_x3 = load_train_data_from_2files( train0Dict, trainList, vocab, batch_size, words_num_dim) #print train_x3.shape cost_ij, acc, dbg_x1, dbg_outputs_1 = train_model( train_x1, train_x2, train_x3, keep_prob_value) log( 'load data done ...... epoch:' + str(epoch) + ' cost:' + str(cost_ij) + ', acc:' + str(acc), logfile_path) if epoch % validation_freq == 0: log('Evaluation ......', logfile_path) validation(validate_model, testList, vocab, batch_size, words_num_dim)
def train(): global logfile_path global trainfile global train0file global test1file batch_size = int(256) embedding_size = 300 learning_rate = 0.005 n_epochs = 20000 words_num_dim = 1200 validation_freq = 10 filter_sizes = [1, 2, 3, 5] num_filters = 500 margin_size = 0.05 logfile_path = os.path.join(logfile_path, 'LSTM-' + GetNowTime() + '-' \ + 'batch_size-' + str(batch_size) + '-' \ + 'num_filters-' + str(num_filters) + '-' \ + 'embedding_size-' + str(embedding_size) + '-' \ + 'n_epochs-' + str(n_epochs) + '-' \ + 'freq-' + str(validation_freq) + '-' \ + '-log.txt') log("New start ...", logfile_path) log(str(time.asctime(time.localtime(time.time()))), logfile_path) log("batch_size = " + str(batch_size), logfile_path) log("filter_sizes = " + str(filter_sizes), logfile_path) log("num_filters = " + str(num_filters), logfile_path) log("embedding_size = " + str(embedding_size), logfile_path) log("learning_rate = " + str(learning_rate), logfile_path) log("words_num_dim = " + str(words_num_dim), logfile_path) log("n_epochs = " + str(n_epochs), logfile_path) log("margin_size = " + str(margin_size), logfile_path) log("validation_freq = " + str(validation_freq), logfile_path) log("train_1_file = " + str(trainfile.split('/')[-1]), logfile_path) log("train_0_file = " + str(train0file.split('/')[-1]), logfile_path) log("test_file = " + str(test1file.split('/')[-1]), logfile_path) log("vector_file = " + str(vectorsfile.split('/')[-1]), logfile_path) vocab = build_vocab() word_embeddings = load_word_embeddings(vocab, embedding_size) trainList = load_train_list() testList = load_test_list() train0Dict = load_train0_dict() train_x1, train_x2, train_x3, mask1, mask2, mask3 = load_train_data_from_2files(train0Dict, trainList, vocab, batch_size, words_num_dim) x1, x2, x3 = T.fmatrix('x1'), T.fmatrix('x2'), T.fmatrix('x3') m1, m2, m3 = T.fmatrix('m1'), T.fmatrix('m2'), T.fmatrix('m3') model = LSTM( input1=x1, input2=x2, input3=x3, mask1=m1, mask2=m2, mask3=m3, word_embeddings=word_embeddings, batch_size=batch_size, sequence_len=train_x1.shape[0], #row is sequence_len embedding_size=embedding_size, filter_sizes=filter_sizes, num_filters=num_filters, margin_size = margin_size) cost, cos12, cos13 = model.cost, model.cos12, model.cos13 params, accuracy = model.params, model.accuracy grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] p1, p2, p3 = T.fmatrix('p1'), T.fmatrix('p2'), T.fmatrix('p3') q1, q2, q3 = T.fmatrix('q1'), T.fmatrix('q2'), T.fmatrix('q3') train_model = theano.function( [p1, p2, p3, q1, q2, q3], [cost, accuracy], updates=updates, givens={ x1: p1, x2: p2, x3: p3, m1: q1, m2: q2, m3: q3 } ) v1, v2, v3 = T.matrix('v1'), T.matrix('v2'), T.matrix('v3') u1, u2, u3 = T.matrix('u1'), T.matrix('u2'), T.matrix('u3') validate_model = theano.function( inputs=[v1, v2, v3, u1, u2, u3], outputs=[cos12, cos13], #updates=updates, givens={ x1: v1, x2: v2, x3: v3, m1: u1, m2: u2, m3: u3 } ) epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 train_x1, train_x2, train_x3, mask1, mask2, mask3 = load_train_data_from_2files(train0Dict, trainList, vocab, batch_size, words_num_dim) #print('train_x1, train_x2, train_x3') #print(train_x1.shape, train_x2.shape, train_x3.shape) cost_ij, acc = train_model(train_x1, train_x2, train_x3, mask1, mask2, mask3) log('load data done ...... epoch:' + str(epoch) + ' cost:' + str(cost_ij) + ', acc:' + str(acc), logfile_path) if epoch % validation_freq == 0: log('Evaluation ......', logfile_path) validation(validate_model, testList, vocab, batch_size, words_num_dim)