def main(): #p = processing(prediction_fname='etr.csv') id = int(sys.argv[1]) train_fpath = config.train_folder + "ACT%d_competition_training.csv" % id test_fpath = config.test_folder + "ACT%d_competition_test.csv" % id train_data, train_descriptors, train_molecules, train_targets = utils.read_train(train_fpath) test_data, test_descriptors, test_molecules = utils.read_test(test_fpath) # Combine data and targets train_all = np.column_stack((train_data, train_targets)) #np.random.shuffle(train_all) num = train_all.shape[0] * 0.8 train = train_all[0:num, ] test = train_all[num:, ] train_x = train[:, 0:(train.shape[1]-1)] train_y = train[:, -1] test_x = test[:, 0:(test.shape[1]-1)] test_y = test[:, -1] for i in range(1000, 1500, 100): #fh = open(config.result_folder + "etr_%d.csv" % i, "w") #fh.write("MOLECULE,Prediction\n") model = extratree(train_x, train_y, i) prediction = model.predict(test_x) print "%d: %f" % (i, r_squared(prediction, test_y))
def load_data(if_norm=True): print('Loading data set ...') load_time = time.time() test_data = read_test('./data/test.csv') test_data = np.array(test_data) train_data = read_train('./data/train.csv') train_data = np.array(train_data) loaded_time = time.time() - load_time print('test_data: {0}, train_data shape: {1}'.format( test_data.shape, train_data.shape)) train_features, train_labels = train_data[:, 1:-1], train_data[:, -1].astype(int) test_features = test_data[:, 1:] if if_norm is True: test_min, test_max = test_features.min(), test_features.max() test_features_norm = (test_features - test_min) / (test_max - test_min) train_min, train_max = train_features.min(), train_features.max() train_features_norm = (train_features - train_min) / (train_max - train_min) print('Data set loaded successfully in {0:.4f} seconds.'.format( loaded_time)) return test_features_norm, train_features_norm, train_labels else: print('Data set loaded successfully in {0:.4f} seconds.'.format( loaded_time)) return test_features, train_features, train_labels
def main(ID): train_file = 'train_data.txt' test_file = 'test_data.txt' variables = [ 'ID', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'class' ] print("========= Reading train dataset =========") # TO DO: # print("Here") train_dict = read_data(train_file) # print(train_dict[1:3]) # use the read data function you created to read the train data print("======== Done reading =========.\n") print("========= Reading test data =========") # TO-DO test_dict = read_test(test_file) # Read the test data print("========= Done reading =========.\n") print("==== Training classifier =====") # TO-DO model = RuleBasedModel() model.get_data(train_dict) model.get_test(test_dict) # Initialize the classifier you built in model.py and return the necessary values print("======== Done training classifier ===========.\n") print("========= Classifying test samples =======") # TO-DO list_predict = model.predict(test_dict, test=True) # use your classifier to do predictions on all the test samples print("========== Done classifying =======") accuracy, numCorrect, total_samples = model.calculate_accuracy(test_dict, test=True) # TO-DO # Evalutate your classifier with the Accuracy function you implemented and return the necessary outputs print( f"Model's Accuracy {round(accuracy)} %, model correctly predicted {numCorrect} out of {total_samples}" ) if ID != None: print(ID) prediction_for_id = model.predict_with_ID(ID, test=True) print( f"The prediction of row with id:{ID} in the test set is class:{prediction_for_id}" ) print('================================================================') print("finished.\n")
def main(): id = int(sys.argv[1]) train_fpath = config.train_folder + "ACT%d_competition_training.csv" % id test_fpath = config.test_folder + "ACT%d_competition_test.csv" % id train_data, train_descriptors, train_molecules, train_targets = utils.read_train(train_fpath) test_data, test_descriptors, test_molecules = utils.read_test(test_fpath) model = randomforest(train_data, train_targets, 1000) #for ind, fi in enumerate(model.feature_importances_): prediction = model.predict(test_data) write_pred(id, test_molecules, prediction)
def main(): #path="/Users/benjaminbenteke/Desktop/AMMI_bootCampProject/AMMI2021_Bootcamp_project/" #change the path in train_file = "train_data.txt" test_file = "test_data.txt" variables = [ 'ID', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'class' ] print("========= Reading train dataset =========") # TO DO: train = read_data(variables, train_file) # use the read data function you created to read the train data print("======== Done reading =========.\n") print("========= Reading test data =========") # TO-DO test = read_test(variables, test_file) # Read the test data print("========= Done reading =========.\n") print("==== Training classifier =====") # TO-DO dt = RuleBasedModel(train, test) dt.classify() # Initialize the classifier you built in model.py and return the necessary values print("======== Done training classifier ===========.\n") print("========= Classifying test samples =======") # TO-DO # use your classifier to do predictions on all the test samples print("========== Done classifying =======") # TO-DO # class_test=dt. # Evalutate your classifier with the Accuracy function you implemented and return the necessary outputs accu = dt.accuracy() numCorrect = (accu * len(test)) / 100 total_samples = len(test) print("Model's Accuracy {}% model correctly predicted {} out of {}".format( accu, numCorrect, total_samples)) #print(f"Model's Accuracy {round(accu)} %, model correctly predicted {numCorrect} out of {total_samples}") print('================================================================') print("finished.\n")
def read_test_nb(filename, transformers): lines = [] ids = [] for parts in utils.read_test(filename, True): # desc = cleantext.clean(parts[4], False) desc = parts[4] lines.append(desc) ids.append(parts[0]) features = lines for transformer in transformers: features = transformer.transform(features) print 'features: ', features.shape[0] print 'ids: ', len(ids) return features, ids
def main(): for i in range(7, 8): fh = open(config.result_folder + "lasso.csv", "w") fh.write("MOLECULE,Prediction\n") train_fpath = config.train_folder + "ACT%d_competition_training.csv" % i test_fpath = config.test_folder + "ACT%d_competition_test.csv" % i train_data, train_descriptors, train_molecules, train_targets = utils.read_train(train_fpath) test_data, test_descriptors, test_molecules = utils.read_test(test_fpath) model = lasso(train_data, train_targets) results = model.predict(test_data) for ind, result in enumerate(results): fh.write("%s,%f\n" % (test_molecules[ind], result)) fh.close()
def check_solutions(solutions_: List[Solution], tasks: List[str]) -> List[List[int]]: def check(task, func, input_, iterations): max_score = 0 for _ in range(iterations): max_score = max(max_score, test(task, func, input_)) return max_score results = [] for solution in solutions_: curr_result = [] total = 0 for task in tasks: input_ = read_test(task) sc = check(task, solution.func, input_, solution.iterations) total += sc curr_result.append(sc) curr_result.append(total) results.append(curr_result) return results
def main(): #global n_words # Prepare training and testing data opt = COptions(args) opt_t = COptions(args) # opt_t.n_hid = opt.n_z loadpath = (opt.data_dir + "/" + opt.data_name) #if opt.not_philly else '/hdfs/msrlabs/xiag/pt-data/cons/data_cleaned/twitter_small.p' print "loadpath:" + loadpath x = cPickle.load(open(loadpath, "rb")) train, val, test = x[0], x[1], x[2] wordtoix, ixtoword = x[3], x[4] if opt.test: test_file = opt.data_dir + opt.test_file test = read_test(test_file, wordtoix) # test = [ x for x in test if all([2<len(x[t])<opt.maxlen - 4 for t in range(opt.num_turn)])] # train_filtered = [ x for x in train if all([2<len(x[t])<opt.maxlen - 4 for t in range(opt.num_turn)])] # val_filtered = [ x for x in val if all([2<len(x[t])<opt.maxlen - 4 for t in range(opt.num_turn)])] # print ("Train: %d => %d" % (len(train), len(train_filtered))) # print ("Val: %d => %d" % (len(val), len(val_filtered))) # train, val = train_filtered, val_filtered # del train_filtered, val_filtered opt.n_words = len(ixtoword) opt_t.n_words = len(ixtoword) opt_t.maxlen = opt_t.maxlen - opt_t.filter_shape + 1 opt_t.update_params(args) print datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y") print dict(opt) print('Total words: %d' % opt.n_words) # print dict(opt) # if opt.model == 'cnn_rnn': # opt_t.maxlen = opt_t.maxlen - opt_t.filter_shape + 1 # opt_t.update_params(args) # print dict(opt_t) #for d in ['/gpu:0', '/gpu:1', '/gpu:2', '/gpu:3']: for d in ['/gpu:0']: with tf.device(d): src_ = [tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) for _ in range(opt.n_context)] tgt_ = tf.placeholder(tf.int32, shape=[opt_t.batch_size, opt_t.sent_len]) z_ = tf.placeholder(tf.float32, shape=[opt_t.batch_size , opt.n_z * (2 if opt.local_feature else 1)]) is_train_ = tf.placeholder(tf.bool, name = 'is_train') res_1_ = get_features(src_, tgt_, is_train_, opt, opt_t) res_2_ = generate_resp(src_, tgt_, z_, is_train_, opt, opt_t) merged = tf.summary.merge_all() #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006 #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph()) uidx = 0 graph_options=tf.GraphOptions(build_cost_model=1) #config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=True, graph_options=tf.GraphOptions(build_cost_model=1)) config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=True, graph_options=graph_options) # config.gpu_options.per_process_gpu_memory_fraction = 0.70 #config = tf.ConfigProto(device_count={'GPU':0}) #config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() run_metadata = tf.RunMetadata() with tf.Session(config = config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: #pdb.set_trace() t_vars = tf.trainable_variables() #t_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) #tf.trainable_variables() # if opt.load_from_pretrain: # d_vars = [var for var in t_vars if var.name.startswith('d_')] # g_vars = [var for var in t_vars if var.name.startswith('g_')] # g_vars = [var for var in t_vars if var.name.startswith('g_')] # g_vars = [var for var in t_vars if var.name.startswith('g_')] # g_vars = [var for var in t_vars if var.name.startswith('g_')] # g_vars = [var for var in t_vars if var.name.startswith('g_')] # l_vars = [var for var in t_vars if var.name.startswith('l_')] # #restore_from_save(g_vars, sess, opt, prefix = 'g_', load_path=opt.restore_dir + "/save/generator2") # restore_from_save(d_vars, sess, opt, load_path = opt.restore_dir + "/save/" + opt.global_d) # if opt.local_feature: # restore_from_save(l_vars, sess, opt, load_path = opt.restore_dir + "/save/" + opt.local_d) # else: loader = restore_from_save(t_vars, sess, opt, load_path = opt.save_path) except Exception as e: print 'Error: '+str(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) loss_d , loss_g = 0, 0 if opt.test: iter_num = np.int(np.floor(len(test)/opt.batch_size))+1 res_all = [] val_tgt_all =[] for i in range(iter_num): test_index = range(i * opt.batch_size,(i+1) * opt.batch_size) sents = [test[t%len(test)] for t in test_index] for idx in range(opt.n_context,opt.num_turn): src = [[sents[i][idx-turn] for i in range(opt.batch_size)] for turn in range(opt.n_context,0,-1)] tgt = [sents[i][idx] for i in range(opt.batch_size)] val_tgt_all.extend(tgt) if opt.feed_generated and idx!= opt.n_context: src[-1] = [[x for x in p if x!=0] for p in res_all[-opt.batch_size:]] x_batch = [prepare_data_for_cnn(src_i, opt) for src_i in src] # Batch L y_batch = prepare_data_for_rnn(tgt, opt_t, is_add_GO = False) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, {tgt_: y_batch, is_train_: 0}) # do not use False res_1 = sess.run(res_1_, feed_dict=feed) z_all = np.array(res_1['z']) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, {tgt_: y_batch, z_: z_all, is_train_: 0}) # do not use False res_2 = sess.run(res_2_, feed_dict=feed) res_all.extend(res_2['syn_sent']) # bp() val_tgt_all = reshaping(val_tgt_all, opt) res_all = reshaping(res_all, opt) save_path = opt.log_path + '.resp.txt' if os.path.exists(save_path): os.remove(save_path) for idx in range(len(test)*(opt.num_turn-opt.n_context)): with open(save_path, "a") as resp_f: resp_f.write(u' '.join([ixtoword[x] for x in res_all[idx] if x != 0 and x != 2]).encode('utf-8').strip() + ('\n' if idx%(opt.num_turn-opt.n_context) == opt.num_turn-opt.n_context-1 else '\t') ) print ("save to:" + save_path) if opt.verbose: save_path = opt.log_path + '.tgt.txt' if os.path.exists(save_path): os.remove(save_path) for idx in range(len(test)*(opt.num_turn-opt.n_context)): with open(save_path, "a") as tgt_f: tgt_f.write(u' '.join([ixtoword[x] for x in val_tgt_all[idx] if x != 0 and x != 2]).encode('utf-8').strip() + ('\n' if idx%(opt.num_turn-opt.n_context) == opt.num_turn-opt.n_context-1 else '\t') ) print ("save to:" + save_path) val_set = [prepare_for_bleu(s) for s in val_tgt_all] gen = [prepare_for_bleu(s) for s in res_all] [bleu1s,bleu2s,bleu3s,bleu4s] = cal_BLEU_4(gen, {0: val_set}, is_corpus = opt.is_corpus) etp_score, dist_score = cal_entropy(gen) # print save_path print 'Val BLEU: ' + ' '.join([str(round(it,3)) for it in (bleu1s,bleu2s,bleu3s,bleu4s)]) # print 'Val Rouge: ' + ' '.join([str(round(it,3)) for it in (rouge1,rouge2,rouge3,rouge4)]) print 'Val Entropy: ' + ' '.join([str(round(it,3)) for it in (etp_score[0],etp_score[1],etp_score[2],etp_score[3])]) print 'Val Diversity: ' + ' '.join([str(round(it,3)) for it in (dist_score[0],dist_score[1],dist_score[2],dist_score[3])]) # print 'Val Relevance(G,A,E): ' + ' '.join([str(round(it,3)) for it in (rel_score[0],rel_score[1],rel_score[2])]) print 'Val Avg. length: ' + str(round(np.mean([len([y for y in x if y!=0]) for x in res_all]),3)) if opt.embedding_score: with open("../../ssd0/consistent_dialog/data/GoogleNews-vectors-negative300.bin.p", 'rb') as pfile: embedding = cPickle.load(pfile) rel_score = cal_relevance(gen, val_set, embedding) print 'Val Relevance(G,A,E): ' + ' '.join([str(round(it,3)) for it in (rel_score[0],rel_score[1],rel_score[2])]) if not opt.global_feature or opt.bit == None: exit(0) if opt.test: iter_num = np.int(np.floor(len(test)/opt.batch_size))+1 for int_idx in range(opt.int_num): res_all = [] z1,z2,z3 = [],[],[] val_tgt_all =[] for i in range(iter_num): test_index = range(i * opt.batch_size,(i+1) * opt.batch_size) sents = [test[t%len(test)] for t in test_index] for idx in range(opt.n_context,opt.num_turn): src = [[sents[i][idx-turn] for i in range(opt.batch_size)] for turn in range(opt.n_context,0,-1)] tgt = [sents[i][idx] for i in range(opt.batch_size)] val_tgt_all.extend(tgt) if opt.feed_generated and idx!= opt.n_context: src[-1] = [[x for x in p if x!=0] for p in res_all[-opt.batch_size:]] x_batch = [prepare_data_for_cnn(src_i, opt) for src_i in src] # Batch L y_batch = prepare_data_for_rnn(tgt, opt_t, is_add_GO = False) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, {tgt_: y_batch, is_train_: 0}) # do not use False res_1 = sess.run(res_1_, feed_dict=feed) z_all = np.array(res_1['z']) z_all[:,opt.bit] = np.array([1.0/np.float(opt.int_num-1) * int_idx for _ in range(opt.batch_size)]) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, {tgt_: y_batch, z_: z_all, is_train_: 0}) # do not use False res_2 = sess.run(res_2_, feed_dict=feed) res_all.extend(res_2['syn_sent']) z1.extend(res_1['z']) z2.extend(z_all) z3.extend(res_2['z_hat']) # bp() val_tgt_all = reshaping(val_tgt_all, opt) res_all = reshaping(res_all, opt) z1 = reshaping(z1, opt) z2 = reshaping(z2, opt) z3 = reshaping(z3, opt) save_path = opt.log_path + 'bit' + str(opt.bit) + '.'+ str(1.0/np.float(opt.int_num-1) * int_idx) +'.int.txt' if os.path.exists(save_path): os.remove(save_path) for idx in range(len(test)*(opt.num_turn-opt.n_context)): with open(save_path, "a") as resp_f: resp_f.write(u' '.join([ixtoword[x] for x in res_all[idx] if x != 0 and x != 2]).encode('utf-8').strip() + ('\n' if idx%(opt.num_turn-opt.n_context) == opt.num_turn-opt.n_context-1 else '\t') ) print ("save to:" + save_path) save_path_z = opt.log_path + 'bit' + str(opt.bit) + '.'+ str(1.0/np.float(opt.int_num-1) * int_idx) +'.z.txt' if os.path.exists(save_path_z): os.remove(save_path_z) for idx in range(len(test)*(opt.num_turn-opt.n_context)): with open(save_path_z, "a") as myfile: #ary = np.array([z1[idx][opt.bit], z2[idx][opt.bit], z3[idx][opt.bit]]) #myfile.write(np.array2string(ary, formatter={'float_kind':lambda x: "%.2f" % x}) + ('\n' if idx%(opt.num_turn-opt.n_context) == opt.num_turn-opt.n_context-1 else '\t')) myfile.write(str(z3[idx][opt.bit]) + ('\n' if idx%(opt.num_turn-opt.n_context) == opt.num_turn-opt.n_context-1 else '\t')) val_set = [prepare_for_bleu(s) for s in val_tgt_all] gen = [prepare_for_bleu(s) for s in res_all] [bleu1s,bleu2s,bleu3s,bleu4s] = cal_BLEU_4(gen, {0: val_set}, is_corpus = opt.is_corpus) etp_score, dist_score = cal_entropy(gen) print save_path print 'Val BLEU: ' + ' '.join([str(round(it,3)) for it in (bleu1s,bleu2s,bleu3s,bleu4s)]) # print 'Val Rouge: ' + ' '.join([str(round(it,3)) for it in (rouge1,rouge2,rouge3,rouge4)]) print 'Val Entropy: ' + ' '.join([str(round(it,3)) for it in (etp_score[0],etp_score[1],etp_score[2],etp_score[3])]) print 'Val Diversity: ' + ' '.join([str(round(it,3)) for it in (dist_score[0],dist_score[1],dist_score[2],dist_score[3])]) # print 'Val Relevance(G,A,E): ' + ' '.join([str(round(it,3)) for it in (rel_score[0],rel_score[1],rel_score[2])]) print 'Val Avg. length: ' + str(round(np.mean([len([y for y in x if y!=0]) for x in res_all]),3))
def main(): #global n_words # Prepare training and testing data opt = COptions(args) opt_t = COptions(args) loadpath = (opt.data_dir + "/" + opt.data_name) print "loadpath:" + loadpath x = cPickle.load(open(loadpath, "rb")) train, val, test = x[0], x[1], x[2] wordtoix, ixtoword = x[3], x[4] if opt.test: test_file = opt.data_dir + "/newdata2/test.txt" test = read_test(test_file, wordtoix) test = [ x for x in test if all( [2 < len(x[t]) < opt.maxlen - 4 for t in range(opt.num_turn)]) ] train_filtered = [ x for x in train if all([2 < len(x[t]) < opt.maxlen - 4 for t in range(opt.num_turn)]) ] val_filtered = [ x for x in val if all([2 < len(x[t]) < opt.maxlen - 4 for t in range(opt.num_turn)]) ] print("Train: %d => %d" % (len(train), len(train_filtered))) print("Val: %d => %d" % (len(val), len(val_filtered))) train, val = train_filtered, val_filtered del train_filtered, val_filtered opt.n_words = len(ixtoword) opt_t.n_words = len(ixtoword) opt_t.maxlen = opt_t.maxlen - opt_t.filter_shape + 1 opt_t.update_params(args) print datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y") print dict(opt) print('Total words: %d' % opt.n_words) for d in ['/gpu:0']: with tf.device(d): src_ = [ tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) for _ in range(opt.n_context) ] tgt_ = tf.placeholder(tf.int32, shape=[opt_t.batch_size, opt_t.sent_len]) is_train_ = tf.placeholder(tf.bool, name='is_train') res_, gan_cost_g_, train_op_g = conditional_s2s( src_, tgt_, is_train_, opt, opt_t) merged = tf.summary.merge_all() uidx = 0 graph_options = tf.GraphOptions(build_cost_model=1) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, graph_options=graph_options) config.gpu_options.per_process_gpu_memory_fraction = 0.90 np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() run_metadata = tf.RunMetadata() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() if opt.load_from_pretrain: d_vars = [ var for var in t_vars if var.name.startswith('d_') ] g_vars = [ var for var in t_vars if var.name.startswith('g_') ] l_vars = [ var for var in t_vars if var.name.startswith('l_') ] restore_from_save(d_vars, sess, opt, load_path=opt.restore_dir + "/save/" + opt.global_d) if opt.local_feature: restore_from_save(l_vars, sess, opt, load_path=opt.restore_dir + "/save/" + opt.local_d) else: loader = restore_from_save(t_vars, sess, opt, load_path=opt.save_path) except Exception as e: print 'Error: ' + str(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) loss_d, loss_g = 0, 0 if opt.test: iter_num = np.int(np.floor(len(test) / opt.batch_size)) + 1 res_all = [] for i in range(iter_num): test_index = range(i * opt.batch_size, (i + 1) * opt.batch_size) sents = [val[t] for t in test_index] for idx in range(opt.n_context, opt.num_turn): src = [[ sents[i][idx - turn] for i in range(opt.batch_size) ] for turn in range(opt.n_context, 0, -1)] tgt = [sents[i][idx] for i in range(opt.batch_size)] x_batch = [ prepare_data_for_cnn(src_i, opt) for src_i in src ] # Batch L y_batch = prepare_data_for_rnn(tgt, opt_t, is_add_GO=False) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, { tgt_: y_batch, is_train_: 0 }) # do not use False res = sess.run(res_, feed_dict=feed) res_all.extend(res['syn_sent']) # bp() res_all = reshaping(res_all, opt) for idx in range(len(test) * (opt.num_turn - opt.n_context)): with open(opt.log_path + '.resp.txt', "a") as resp_f: resp_f.write(u' '.join([ ixtoword[x] for x in res_all[idx] if x != 0 and x != 2 ]).encode('utf-8').strip() + ( '\n' if idx % (opt.num_turn - opt.n_context) == 0 else '\t')) print("save to:" + opt.log_path + '.resp.txt') exit(0) for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] for idx in range(opt.n_context, opt.num_turn): src = [[ sents[i][idx - turn] for i in range(opt.batch_size) ] for turn in range(opt.n_context, 0, -1)] tgt = [sents[i][idx] for i in range(opt.batch_size)] x_batch = [ prepare_data_for_cnn(src_i, opt) for src_i in src ] # Batch L y_batch = prepare_data_for_rnn(tgt, opt_t, is_add_GO=False) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, { tgt_: y_batch, is_train_: 1 }) _, loss_g = sess.run([train_op_g, gan_cost_g_], feed_dict=feed) if uidx % opt.print_freq == 0: print("Iteration %d: loss G %f" % (uidx, loss_g)) res = sess.run(res_, feed_dict=feed) if opt.global_feature: print "z loss: " + str(res['z_loss']) if "nn" in opt.agg_model: print "z pred_loss: " + str(res['z_loss_pred']) print "Source:" + u' '.join( [ixtoword[x] for s in x_batch for x in s[0] if x != 0]).encode('utf-8').strip() print "Target:" + u' '.join([ ixtoword[x] for x in y_batch[0] if x != 0 ]).encode('utf-8').strip() print "Generated:" + u' '.join([ ixtoword[x] for x in res['syn_sent'][0] if x != 0 ]).encode('utf-8').strip() print "" sys.stdout.flush() summary = sess.run(merged, feed_dict=feed) train_writer.add_summary(summary, uidx) if uidx % opt.valid_freq == 1: VALID_SIZE = 4096 valid_multiplier = np.int( np.floor(VALID_SIZE / opt.batch_size)) res_all, val_tgt_all, loss_val_g_all = [], [], [] if opt.global_feature: z_loss_all = [] for val_step in range(valid_multiplier): valid_index = np.random.choice(len(val), opt.batch_size) sents = [val[t] for t in valid_index] for idx in range(opt.n_context, opt.num_turn): src = [[ sents[i][idx - turn] for i in range(opt.batch_size) ] for turn in range(opt.n_context, 0, -1)] tgt = [ sents[i][idx] for i in range(opt.batch_size) ] val_tgt_all.extend(tgt) x_batch = [ prepare_data_for_cnn(src_i, opt) for src_i in src ] # Batch L y_batch = prepare_data_for_rnn(tgt, opt_t, is_add_GO=False) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, { tgt_: y_batch, is_train_: 0 }) # do not use False loss_val_g = sess.run([gan_cost_g_], feed_dict=feed) loss_val_g_all.append(loss_val_g) res = sess.run(res_, feed_dict=feed) res_all.extend(res['syn_sent']) if opt.global_feature: z_loss_all.append(res['z_loss']) print("Validation: loss G %f " % (np.mean(loss_val_g_all))) if opt.global_feature: print "z loss: " + str(np.mean(z_loss_all)) print "Val Source:" + u' '.join( [ixtoword[x] for s in x_batch for x in s[0] if x != 0]).encode('utf-8').strip() print "Val Target:" + u' '.join([ ixtoword[x] for x in y_batch[0] if x != 0 ]).encode('utf-8').strip() print "Val Generated:" + u' '.join([ ixtoword[x] for x in res['syn_sent'][0] if x != 0 ]).encode('utf-8').strip() print "" if opt.global_feature: with open(opt.log_path + '.z.txt', "a") as myfile: myfile.write("Iteration" + str(uidx) + "\n") myfile.write("z_loss %f" % (np.mean(z_loss_all)) + "\n") myfile.write("Val Source:" + u' '.join([ ixtoword[x] for s in x_batch for x in s[0] if x != 0 ]).encode('utf-8').strip() + "\n") myfile.write("Val Target:" + u' '.join( [ixtoword[x] for x in y_batch[0] if x != 0]).encode('utf-8').strip() + "\n") myfile.write("Val Generated:" + u' '.join([ ixtoword[x] for x in res['syn_sent'][0] if x != 0 ]).encode('utf-8').strip() + "\n") myfile.write("Z_input, Z_recon, Z_tgt") myfile.write( np.array2string(res['z'][0], formatter={ 'float_kind': lambda x: "%.2f" % x }) + "\n") myfile.write( np.array2string(res['z_hat'][0], formatter={ 'float_kind': lambda x: "%.2f" % x }) + "\n\n") myfile.write( np.array2string(res['z_tgt'][0], formatter={ 'float_kind': lambda x: "%.2f" % x }) + "\n\n") val_set = [prepare_for_bleu(s) for s in val_tgt_all] gen = [prepare_for_bleu(s) for s in res_all] [bleu1s, bleu2s, bleu3s, bleu4s] = cal_BLEU_4(gen, {0: val_set}, is_corpus=opt.is_corpus) etp_score, dist_score = cal_entropy(gen) print 'Val BLEU: ' + ' '.join([ str(round(it, 3)) for it in (bleu1s, bleu2s, bleu3s, bleu4s) ]) print 'Val Entropy: ' + ' '.join([ str(round(it, 3)) for it in (etp_score[0], etp_score[1], etp_score[2], etp_score[3]) ]) print 'Val Diversity: ' + ' '.join([ str(round(it, 3)) for it in (dist_score[0], dist_score[1], dist_score[2], dist_score[3]) ]) print 'Val Avg. length: ' + str( round( np.mean([ len([y for y in x if y != 0]) for x in res_all ]), 3)) print "" summary = sess.run(merged, feed_dict=feed) summary2 = tf.Summary(value=[ tf.Summary.Value(tag="bleu-2", simple_value=bleu2s), tf.Summary.Value(tag="etp-4", simple_value=etp_score[3]) ]) test_writer.add_summary(summary, uidx) test_writer.add_summary(summary2, uidx) if uidx % opt.save_freq == 0: saver.save(sess, opt.save_path)
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) import argparse import datasets import requests import time from requests.exceptions import ConnectionError from tabulate import tabulate from tqdm import tqdm from utils import nli_label2int, read_dev, read_test _option2dataset = { 'dev': read_dev(), 'test': read_test(), } def main(dataset_option: str, endpoint: str, batch_size=32): try: languages, premises, hypotheses, labels = _option2dataset[ dataset_option] except KeyError as e: logging.error(f'{e} is not a valid choice as a dataset_option') logging.error( f'This should have failed through argparse. Contact the teaching assistants.' ) exit(1) except Exception as e:
def main(): opt = COptions(args) opt_t = COptions(args) loadpath = (opt.data_dir + "/" + opt.data_name) print "loadpath:" + loadpath x = cPickle.load(open(loadpath, "rb")) train, val, test = x[0], x[1], x[2] wordtoix, ixtoword = x[3], x[4] if opt.test: test_file = opt.data_dir + opt.test_file test = read_test(test_file, wordtoix) opt.n_words = len(ixtoword) opt_t.n_words = len(ixtoword) opt_t.maxlen = opt_t.maxlen - opt_t.filter_shape + 1 opt_t.update_params(args) print datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y") print dict(opt) print('Total words: %d' % opt.n_words) for d in ['/gpu:0']: with tf.device(d): src_ = [tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) for _ in range(opt.n_context)] tgt_ = tf.placeholder(tf.int32, shape=[opt_t.batch_size, opt_t.sent_len]) is_train_ = tf.placeholder(tf.bool, name = 'is_train') res_1_ = get_features(src_, tgt_, is_train_, opt, opt_t) merged = tf.summary.merge_all() uidx = 0 graph_options=tf.GraphOptions(build_cost_model=1) config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=True, graph_options=graph_options) np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() run_metadata = tf.RunMetadata() with tf.Session(config = config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() if opt.load_from_pretrain: d_vars = [var for var in t_vars if var.name.startswith('d_')] l_vars = [var for var in t_vars if var.name.startswith('l_')] restore_from_save(d_vars, sess, opt, load_path = opt.restore_dir + "/save/" + opt.global_d) if opt.local_feature: restore_from_save(l_vars, sess, opt, load_path = opt.restore_dir + "/save/" + opt.local_d) else: loader = restore_from_save(t_vars, sess, opt, load_path = opt.save_path) except Exception as e: print 'Error: '+str(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) loss_d , loss_g = 0, 0 if opt.test: iter_num = np.int(np.floor(len(test)/opt.batch_size))+1 z_all, z_all_l = [], [] for i in range(iter_num): test_index = range(i * opt.batch_size,(i+1) * opt.batch_size) sents = [test[t%len(test)] for t in test_index] src = [[sents[i][0] for i in range(opt.batch_size)]] tgt = [sents[i][0] for i in range(opt.batch_size)] x_batch = [prepare_data_for_cnn(src_i, opt) for src_i in src] print "Source:" + u' '.join([ixtoword[x] for s in x_batch for x in s[0] if x != 0]).encode('utf-8').strip() y_batch = prepare_data_for_rnn(tgt, opt_t, is_add_GO = False) feed = merge_two_dicts( {i: d for i, d in zip(src_, x_batch)}, {tgt_: y_batch, is_train_: 0}) res_1 = sess.run(res_1_, feed_dict=feed) z_all.extend(res_1['z']) z_all_l.extend(res_1['z_l']) save_path_z = opt.log_path + '.global.z.txt' print save_path_z if os.path.exists(save_path_z): os.remove(save_path_z) with open(save_path_z, "a") as myfile: for line in z_all[:len(test)]: for z_it in line: myfile.write(str(z_it) + '\t') myfile.write('\n') save_path_z = opt.log_path + '.local.z.txt' print save_path_z if os.path.exists(save_path_z): os.remove(save_path_z) with open(save_path_z, "a") as myfile: for line in z_all_l[:len(test)]: for z_it in line: myfile.write(str(z_it) + '\t') myfile.write('\n')