def __init__(self): self.glove_path = os.path.join(config.data_dir, "glove.6B.{}d.txt".format(config.embedding_size)) self.emb_matrix, self.word2id, self.id2word = get_glove(self.glove_path, config.embedding_size) self.train_context_path = os.path.join(config.data_dir, "train.context") self.train_qn_path = os.path.join(config.data_dir, "train.question") self.train_ans_path = os.path.join(config.data_dir, "train.span") self.dev_context_path = os.path.join(config.data_dir, "dev.context") self.dev_qn_path = os.path.join(config.data_dir, "dev.question") self.dev_ans_path = os.path.join(config.data_dir, "dev.span")
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or \ os.path.join(DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings timer.start("glove_getter") emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) id2idf = get_idf(os.path.abspath(FLAGS.idf_path), word2id) logger.warn("Get glove embedding of size {} takes {:.2f} s".format( FLAGS.embedding_size, timer.stop("glove_getter"))) # Print out Tensorflow version # print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ ensumble = FLAGS.ensumble print(ensumble) if not ensumble and not FLAGS.attn_layer and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --attn_layer or --train_dir") # Define train_dir if not FLAGS.experiment_name: FLAGS.experiment_name = "A_{}_E_{}_D_{}".format( FLAGS.attn_layer, FLAGS.embedding_size, FLAGS.dropout) checkptr_name = FLAGS.experiment_name + "/glove{}".format( FLAGS.embedding_size) FLAGS.train_dir = FLAGS.train_dir or\ os.path.join(EXPERIMENTS_DIR, checkptr_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True is_training = (FLAGS.mode == "train") if not ensumble: # Initialize model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix, id2idf, is_training) else: ensumbler = Ensumbler(ensumble, config, id2word, word2id, emb_matrix, id2idf) # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model qa_model.initialize_from_checkpoint(sess, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model qa_model.initialize_from_checkpoint(sess, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores f1, em = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True) logger.info("Dev: F1 = {0:.3}, EM = {0:.3}".format(f1, em)) elif FLAGS.mode == "eval": if ensumble: # train train_f1, train_em = ensumbler.check_f1_em(train_context_path, train_qn_path, train_ans_path, "train", FLAGS.n_eval) # dev dev_f1, dev_em = ensumbler.check_f1_em(dev_context_path, dev_qn_path, dev_ans_path, "dev", FLAGS.n_eval) else: with tf.Session(config=config) as sess: # Load best model qa_model.initialize_from_checkpoint(sess, FLAGS.ckpt_load_dir, expect_exists=True) logger.info("Model initialzed from checkpoint") # train train_f1, train_em = qa_model.check_f1_em( sess, train_context_path, train_qn_path, train_ans_path, "train", num_samples=10, print_to_screen=False) # dev dev_f1, dev_em = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=False) logger.error("Train: F1 = {:.3}, EM = {:.3}".format( train_f1, train_em)) logger.error("Dev: F1 = {:.3}, EM = {:.3}".format(dev_f1, dev_em)) elif FLAGS.mode == "official_eval": if not ensumble: if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path" ) if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir" ) # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) if ensumble: answers_dict = ensumbler.generate_answers(qn_uuid_data, context_token_data, qn_token_data) else: with tf.Session(config=config) as sess: # Load model from ckpt_load_dir qa_model.initialize_from_checkpoint(sess, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, id2idf, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory global bestmodel_dir bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings global emb_matrix, word2id, id2word emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Initialize model global qa_model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix) # Some GPU settings global config config = tf.ConfigProto() config.gpu_options.allow_growth = True global sess sess = tf.Session(config=config) global global_context global_context = 'INSOFE has awarded over Rs 3.2 Crores in merit scholarships in the last 2 years alone. INSOFE recognizes top performers and rewards them for demonstrating outstanding achievement at every phase of the program based on their performance and eligibility criteria. At each phase of the program, top performers are awarded rankings based on which scholarship winners are announced. Top performers can potentially win scholarships ranging from Rs 25,000 to entire program fee and this can be attained on the successful completion of the program.' global global_context_list global_context_list = [ 'INSOFE has awarded over Rs 3.2 Crores in merit scholarships in the last 2 years alone. INSOFE recognizes top performers and rewards them for demonstrating outstanding achievement at every phase of the program based on their performance and eligibility criteria. At each phase of the program, top performers are awarded rankings based on which scholarship winners are announced. Top performers can potentially win scholarships ranging from Rs 25,000 to entire program fee and this can be attained on the successful completion of the program.', 'INSOFE is working on developing a video surveillance tool with enhanced smart capabilities. The tool identifies the violation and sends out instant automated response without requiring any manual interference. Since the current process involves manually going through the footage and checking for violations, it is not only a time-consuming process but also requires manual hours and effort. The tool makes the entire process automated with an Embedded Machine Learning chip Question', 'Dr Dakshinamurthy, is the Founder and President of INSOFE. He did his PhD in Materials Science and Engineering from Carnegie Mellon University. He is known for simplifying complex ideas and communicating them clearly and excitingly. Dr Sridhar Pappu is the Executive VP - Academics of INSOFE. He leads the academic administration of the institute and ensures the highest standards in learning for the students. He teaches statistics. He loves data, soo much that he wears two fitness trackers.' ] # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # app.run(host='0.0.0.0', port=443, ssl_context=('/home/gem/.ssh/certificate.pem', '/home/gem/.ssh/private-key.pem')) app.run( host='0.0.0.0', port=443, ssl_context= ('/etc/letsencrypt/live/gem.eastus2.cloudapp.azure.com/fullchain.pem', '/etc/letsencrypt/live/gem.eastus2.cloudapp.azure.com/privkey.pem'))
try: import vocab except ImportError: import sys sys.path.insert(0, '/Users/mbaumer/side_projects/ruth-bader-ginsbot/python/') import vocab import keras data = pd.read_csv('../data/supreme_court_dialogs_corpus_v1.01/supreme.conversations.txt',sep='\ \+\+\+\$\+\+\+\ ', names=['case_id','utterance_id','after_previous','speaker','isJustice','justice_vote','presenting_side','utterance']) justice_lines = data[(data['isJustice'] == 'JUSTICE') & ((data['justice_vote'] == 'PETITIONER') | (data['justice_vote'] == 'RESPONDENT'))] tokens = map(nltk.word_tokenize,justice_lines['utterance']) emb_matrix, word2id, id2word = vocab.get_glove('../data/glove/glove.6B.50d.txt',50) N_words = 0 N_unk = 0 list_list_tokens = [] for sentence in tokens: list_tokens = [] for word in sentence: N_words += 1 token_id = word2id.get(word,1) list_tokens.append(token_id) if token_id == 1: N_unk += 1 list_list_tokens.append(list_tokens) print ('Nwords:', N_words) print ('Nunk:', N_unk)
def main(unused_argv): #First check the FLAGS enter correctly (format), python version and tensorflow version #Then check if train_dir or experiment_dir defined #set bestmodel path which named best_checkpoint #set context, question. ans path #read glove #Initialise the model architecture #gpu setting #mode choice if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) if sys.version_info[0] != 3: raise Exception( "ERROR: You must use Python 3 but you are running Python %i" % sys.version_info[0]) print( "This code was developed and tested on TensorFlow 1.8.0. Your TensorFlow version: %s" % tf.__version__) if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) #glove path emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) char2id, id2char = get_char_embed() #path for context, question, ans_span train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") #Initialise the model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix, id2char, char2id) # Some GPU settings config = tf.ConfigProto() #set configuration for sess.run config.gpu_options.allow_growth = True #make gpu storage usage based for condition #different modes if FLAGS.mode == "train": #setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump( FLAGS.flag_values_dict(), fout ) #NoteL changed from FLAGS.__flags to FLAGS.flag_values_dict() after tensorflow 1.5 # Make bestmodel dir if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: #Added tfdbg # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) #Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True) """ elif FLAGS.mode == "official_eval": if FLAGS.json_in_path == "": raise Exception("For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception("For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data(FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print ("Writing predictions to %s..." % FLAGS.json_out_path) with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print ("Wrote predictions to %s" % FLAGS.json_out_path) """ else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_competition_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") if not FLAGS.attention_type or not FLAGS.reduction_type: raise Exception( "You have to specify both --attention_type (dot_product, bidaf, self_attention) and --reduction_type (max, mean) to proceed." ) FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") bestmodel_dir_dev_loss = os.path.join(FLAGS.train_dir, "best_checkpoint_dev_loss") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_headline_path = os.path.join(FLAGS.data_dir, "train.headline") train_body_path = os.path.join(FLAGS.data_dir, "train.body") train_ans_path = os.path.join(FLAGS.data_dir, "train.stance") dev_headline_path = os.path.join(FLAGS.data_dir, "dev.headline") dev_body_path = os.path.join(FLAGS.data_dir, "dev.body") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.stance") test_headline_path = os.path.join(FLAGS.data_dir, "test.headline") test_body_path = os.path.join(FLAGS.data_dir, "test.body") test_ans_path = os.path.join(FLAGS.data_dir, "test.stance") # Initialize model qa_model = QAModel( FLAGS, id2word, word2id, emb_matrix ) # create entire computation graph, add loss, opimizer etc... # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True attention_type = FLAGS.attention_type reduction_type = FLAGS.reduction_type if attention_type == 'dot_product' and reduction_type == 'max': FLAGS.max_gradient_norm = 10.0 if attention_type == 'bidaf': FLAGS.hidden_size = 120 if attention_type == 'self_attention': FLAGS.self_attn_zsize = 60 FLAGS.hidden_size = 70 # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) if not os.path.exists(bestmodel_dir_dev_loss): os.makedirs(bestmodel_dir_dev_loss) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.custom_train(sess, train_body_path, train_headline_path, train_ans_path, dev_headline_path, dev_body_path, dev_ans_path) elif FLAGS.mode == "check_eval": if FLAGS.ckpt_load_dir == "": raise Exception( "For check_eval mode, you need to specify --ckpt_load_dir") with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) dev_score = qa_model.check_score_cm(sess, dev_body_path, dev_headline_path, dev_ans_path, "dev", num_samples=0) print("Dev score:=>", dev_score) test_score = qa_model.check_score_cm(sess, test_body_path, test_headline_path, test_ans_path, "test", num_samples=0) print("Test score:=>", test_score) elif FLAGS.mode == "official_competition_eval": if FLAGS.ckpt_load_dir == "": raise Exception( "For official_competition_eval mode, you need to specify --ckpt_load_dir" ) competition_dataset = DataSet( name="competition_test", path=FLAGS.data_dir) # Dataset competition read from csv. #Retreive list of body/ article ids for competition dataset comp_body_ids = list( competition_dataset.articles.keys()) # get a list of article ids #Retrieve stance for all body ids comp_stances_list = get_stances(competition_dataset, comp_body_ids) # get body and headline tokens body_token_data_list, headline_token_data_list, input_body_id_list, headline_list = get_preprocessed_data( competition_dataset, comp_stances_list, 'competition') with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) #As text not number pred_label_answer_list = get_answers(sess, qa_model, word2id, body_token_data_list, headline_token_data_list) #stance_df = pd.DataFrame() #stance_df['Stance'] = pred_label_answer_list #stance_df.to_csv(os.path.join(FLAGS.result_output_path,"stance.csv"), index=False) np.savetxt(os.path.join(FLAGS.result_output_path, "stance.csv"), pred_label_answer_list, delimiter="\n", fmt='%s') else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception("ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception("You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join(DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") # Initialize model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix) # Some GPU settings config=tf.ConfigProto() config.gpu_options.allow_growth = True # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler(os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True) elif FLAGS.mode == "official_eval": if FLAGS.json_in_path == "": raise Exception("For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception("For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data(FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Check for ensemble model param setting if FLAGS.enable_ensemble_model and (FLAGS.mode != "official_eval" or not FLAGS.ensemble_model_names): raise Exception( "ERROR: model ensemble is only supported in official_eval mode, you must specify ensemble_model_names" ) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if (not FLAGS.enable_ensemble_model and not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval" ) or (FLAGS.enable_ensemble_model and not FLAGS.ensemble_model_names): raise Exception( "You need to specify either --experiment_name or --train_dir, or ensemble_model_names if ensemble is enabled." ) FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Build character level vocab mappings char2id, id2char = get_char_mapping() # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") if not FLAGS.enable_ensemble_model: # Initialize model only when ensemble model is disabled. qa_model_name = FLAGS.model_name + '_model' QAModel = importlib.import_module(qa_model_name).QAModel print('model loaded from: %s' % qa_model_name) qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix, char2id, id2char) # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True) elif FLAGS.mode == "official_eval": if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if not FLAGS.enable_ensemble_model and FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir or use ensemble_model_names" ) # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) if FLAGS.enable_ensemble_model: print('FLAGS.ensemble_model_names: %s' % FLAGS.ensemble_model_names) print('FLAGS.sum_weights: %s' % FLAGS.sum_weights) # KV is 'label': ('model_file', 'exp_name', 'codalab_bundle_name', 'has_cnn', weight), ensemble_label_to_model_meta = { 'binco_legacy': [ 'binco_legacy_model', 'binco_30b15_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'binco_30b15', False, 0.6692 ], # 0.6900 (74.0, 63.5) 'chsebinco_real': [ 'chsebinco_model', 'chsebinco_real_1c999_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'chsebinco_real_1c999', True, 0.6733 ], # 0.6958, (74.7, 64.0) 'chsebinco_legacy': [ 'chsebinco_legacy_model', 'chsebinco_4a81a_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'chsebinco_4a81a', True, 0.6507 ], # 0.6954, (?, ?) 'chgasebinco': [ 'chgasebinco_model', 'chgasebinco_1c999_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'chgasebinco_1c999', True, 0.7045 ], # 0.7101 (76.6, 66.4) 'chgasebinco_91ca1': [ 'chgasebinco_model', 'chgasebinco_91ca1_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'chgasebinco_91ca1', True, 0.69 ], # 0.68 (? ?) 'chgasebinco_888ca': [ 'chgasebinco_model', 'chgasebinco_888ca_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'chgasebinco_888ca', True, 0.69 ], # 0.67 (? ?) 'chgasebinco_888ca_run2': [ 'chgasebinco_model', 'chgasebinco_888ca_run2_hidden=100_lr=0.001_batch=100_context=400_qn=27', 'chgasebinco_888ca_run2', True, 0.69 ], # 0.6911 (? ?) } model_labels = FLAGS.ensemble_model_names.split(';') if len(model_labels) == 1 and model_labels[0].lower() == 'all': model_labels = ensemble_label_to_model_meta.keys() else: for label in model_labels: assert label in ensemble_label_to_model_meta # A list to store the output of all predictions # each entry is a map, storing the start and end dist for that batch. # len(ensemble_model_pred) is len(model_labels) # len(ensemble_model_pred[0]) is number of batches # len(ensemble_model_pred[0]['start']) is batch_size # len(ensemble_model_pred[0]['end']) is batch_size ensemble_model_pred = [] sum_weights_list = [] for label in model_labels: tf.reset_default_graph() model_name, model_exp_name, cl_bundle_name, has_cnn, weight = ensemble_label_to_model_meta[ label] sum_weights_list += str(weight), print "Loading model: %s" % model_name # TODO(binbinx): change this to appropriate models QAModel = importlib.import_module(model_name).QAModel qa_model = (QAModel(FLAGS, id2word, word2id, emb_matrix, char2id, id2char) if has_cnn else QAModel( FLAGS, id2word, word2id, emb_matrix)) with tf.Session(config=config) as sess: # Initialize bestmodel directory ckpt_load_dir = (os.path.join( EXPERIMENTS_DIR, model_exp_name, "best_checkpoint") if not FLAGS.is_codalab_eval else cl_bundle_name) # Load model from ckpt_load_dir initialize_model(sess, qa_model, ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer # WE MUST USE A DEEPCOPY HERE!! qn_uuid_data_ = copy.deepcopy(qn_uuid_data) context_token_data_ = copy.deepcopy(context_token_data) qn_token_data_ = copy.deepcopy(qn_token_data) answers_dict = generate_answers(sess, qa_model, word2id, char2id, qn_uuid_data_, context_token_data_, qn_token_data_, ensemble_model_pred) sum_weights = ';'.join( sum_weights_list) if FLAGS.sum_weights.lower( ) == 'default' else FLAGS.sum_weights pred_start_batches, pred_end_batches = resolve_ensemble_model_preds( ensemble_model_pred, sum_weights=sum_weights) final_ans_dict = generate_answers_with_start_end( FLAGS, word2id, char2id, qn_uuid_data, context_token_data, qn_token_data, pred_start_batches, pred_end_batches) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(final_ans_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, char2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write( unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(): print('Housing Price Prediction Project') args = parser.parse_args() printArgs(args) #root = '/home/gangwu/cs224n/housingprice' root = '/home/ooo/projects/housingprice' exp_path = root + '/experiment/' + args.experiment_name os.system('mkdir -p ' + exp_path) print('experiment path: %s' % exp_path) #input_size = 128 input_size = 224 # after crop testCSVfile = root + '/csvFiles/clean.csv' imageDir = root + '/images' glove_path = '../data/glove/glove.6B.50d.txt' hidden_dim = 50 embedding_size = 50 emb_matrix, word2id, id2word = get_glove(glove_path, embedding_size) dataset = readCSV(testCSVfile, imageDir, word2id) num_train, num_dev = splitTrainDevSet(dataset, 0.7) # percentage of data to load pct = 1.0 batch_size = 128 #pct = 0.005 device = getDevice() model = getModel(args.mode, device, input_size, hidden_dim, emb_matrix) if args.mode == 'train': # resnet50 batch size: train = 100, dev = 256 # p100: 64 trainBatcher = Batcher(dataset, percent=pct, preload=False, batchSize=batch_size, num_train=num_train, tgtSet='train') loader = trainBatcher.loader devBatcher = Batcher(dataset, percent=pct, preload=False, batchSize=batch_size, num_train=num_train, tgtSet='dev') dev_loader = devBatcher.loader #optimizer = optim.SGD(model.getParameters(), lr=0.001, momentum=0.9) optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999)) trainer = Trainer(args.mode, model, loader, dev_loader, optimizer, device, exp_path) print('Start training...') trainer.train(epoch=60) ''' elif args.mode == 'test': testBatcher = Batcher(percent=pct, preload=False, batchSize=512, targetSet='test') test_loader = testBatcher.loader trainer = Trainer(model, None, None, None, device, exp_path) print('Start evaluation on test set...') trainer.eval(test_loader, 'test') ''' else: raise Exception('Unknown mode %s. Exiting...' % args.mode) print('Done!')
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.graph") train_qn_path = os.path.join(FLAGS.data_dir, "train.instruction") train_ans_path = os.path.join(FLAGS.data_dir, "train.answer") dev_context_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".graph") dev_qn_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".instruction") dev_ans_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".answer") # Create vocabularies of the appropriate sizes for output answer. context_vocab_path = os.path.join( FLAGS.data_dir, "vocab%d.context" % FLAGS.context_vocabulary_size) # ans_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d." % FLAGS.ans_vocabulary_size) # initialize the vocabulary. context_vocab, rev_context_vocab = create_vocabulary( context_vocab_path, train_context_path, FLAGS.context_vocabulary_size) # Initialize model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix, context_vocab, rev_context_vocab, context_vocab) # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir # with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: # json.dump(FLAGS(sys.argv), fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": """ To show a few examples without attention map. """ with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) eval_context_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".graph") eval_qn_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".instruction") eval_ans_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".answer") _, _, _, _ = qa_model.check_f1_em( sess, eval_context_path, eval_qn_path, eval_ans_path, FLAGS.file_in_path, num_samples=FLAGS.print_num, print_to_screen=True) #, summary_writer=summary_writer) # summary_writer.close() elif FLAGS.mode == "show_attention": """ To show a few examples of attention map. """ with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) eval_context_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".graph") eval_qn_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".instruction") eval_ans_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".answer") qa_model.demo(sess, eval_context_path, eval_qn_path, eval_ans_path, FLAGS.file_in_path, num_samples=FLAGS.print_num, print_to_screen=True, shuffle=False) # , summary_writer=summary_writer) elif FLAGS.mode == "official_eval": with tf.Session(config=config) as sess: if FLAGS.ckpt_load_dir: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) else: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) eval_context_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".graph") eval_qn_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".instruction") eval_ans_path = os.path.join(FLAGS.data_dir, FLAGS.file_in_path + ".answer") f1, em, edit_dist, rem = qa_model.check_f1_em( sess, eval_context_path, eval_qn_path, eval_ans_path, FLAGS.file_in_path, num_samples=0, print_to_screen=False, write_out=FLAGS.write_out, file_out=FLAGS.file_out_path, shuffle=False) logging.info( "F1 score: %f, EM score: %f, edit distance: %f, rough EM score: %f" % (f1, em, edit_dist, rem)) else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(): print("Starting LSTM training for CLS GAN ...") ########## SAVED VARIABLES ######### new_epoch = 0 train_losses = {"generator": [], "discriminator": []} val_losses = {"generator": [], "discriminator": []} losses = {'train': train_losses, 'val': val_losses} model_options = constants.MAIN_MODEL_OPTIONS caption_dict = load_flowers_capt_dict( data_dir='Data') # filename --> [captions] img_dict = load_image_dict() # filename --> 28 x 28 image if os.path.exists('Data/vocab/glove_matrix.torch'): paths = [ 'Data/vocab/glove_matrix.torch', 'Data/vocab/word_to_idx.torch', 'Data/vocab/idx_to_word.torch' ] embeddings, word2id, id2word = load_glove(paths) else: emb_matrix, word2id, id2word = get_glove(constants.GLOVE_PATH, constants.EMBED_DIM) embeddings = torch.from_numpy(emb_matrix).float() torch.save(embeddings, 'Data/vocab/glove_matrix.torch') torch.save(word2id, 'Data/vocab/word_to_idx.torch') torch.save(id2word, 'Data/vocab/idx_to_word.torch') print("shape of embedding size: ", embeddings.size()) lstm = LSTM(model_options, embeddings) lstm_weights(lstm) if torch.cuda.is_available(): lstm = lstm.cuda() generator, discriminator = choose_model(model_options) g_optimizer, d_optimizer = choose_optimizer(generator, discriminator) lstm_optimizer = optim.Adam(lstm.parameters(), lr=constants.LR, betas=constants.BETAS) ########## RESUME OPTION ########## if args.resume: print("Resuming from epoch " + args.resume) checkpoint = torch.load(constants.SAVE_PATH + 'weights/epoch' + str(args.resume)) new_epoch = checkpoint['epoch'] + 1 generator.load_state_dict(checkpoint['g_dict']) discriminator.load_state_dict(checkpoint['d_dict']) lstm.load_state_dict(checkpoint['lstm_dict']) g_optimizer.load_state_dict(checkpoint['g_optimizer']) d_optimizer.load_state_dict(checkpoint['d_optimizer']) losses = torch.load(constants.SAVE_PATH + 'losses') ########## VARIABLES ########## noise_vec = torch.FloatTensor(constants.BATCH_SIZE, model_options['z_dim'], 1, 1) # text_vec = torch.FloatTensor(constants.BATCH_SIZE, model_options['caption_vec_len']) real_img = torch.FloatTensor(constants.BATCH_SIZE, model_options['image_channels'], constants.IMAGE_SIZE, constants.IMAGE_SIZE) real_caption = torch.FloatTensor(constants.BATCH_SIZE, model_options['caption_vec_len']) if constants.USE_CLS: wrong_img = torch.FloatTensor(constants.BATCH_SIZE, model_options['image_channels'], constants.IMAGE_SIZE, constants.IMAGE_SIZE) wrong_caption = torch.FloatTensor(constants.BATCH_SIZE, model_options['caption_vec_len']) # Add cuda GPU option if torch.cuda.is_available(): noise_vec = noise_vec.cuda() # text_vec = text_vec.cuda() real_img = real_img.cuda() # real_caption = real_caption.cuda() if constants.USE_CLS: wrong_img = wrong_img.cuda() ################################ # Now get batch of captions and glove embeddings # Use this batch as input to BiRNN w LSTM cells # Use generator loss to update lstm -- look into line 229, main.py # TODO: Loop over epochs in constants.NUM_EPOCHS ################################ num_iterations = 0 for epoch in range(constants.NUM_EPOCHS): print("Epoch %d" % (epoch)) st = time.time() for i, batch_iter in enumerate( grouper(caption_dict.keys(), constants.BATCH_SIZE)): batch_keys = [x for x in batch_iter if x is not None] if len(batch_keys) < constants.BATCH_SIZE: continue curr_batch_size = len(batch_keys) init_model(discriminator, generator, lstm) ########## BATCH DATA ######### noise_batch = torch.randn(curr_batch_size, model_options['z_dim'], 1, 1) caption_embeds, real_embeds = text_model(batch_keys, caption_dict, word2id, lstm) real_img_batch = torch.Tensor( choose_real_image(img_dict, batch_keys)) if constants.USE_CLS: wrong_img_batch = torch.Tensor( util.choose_wrong_image(train_image_dict, batch_keys)) if torch.cuda.is_available(): noise_batch = noise_batch.cuda() real_img_batch = real_img_batch.cuda() if constants.USE_CLS: wrong_img_batch = wrong_img_batch.cuda() # Fill in tensors with batch data noise_vec.resize_as_(noise_batch).copy_(noise_batch) # text_vec.resize_as_(caption_embeds).copy_(caption_embeds) # real_caption.resize_as_(real_embeds).copy_(real_embeds) real_img.resize_as_(real_img_batch).copy_(real_img_batch) if constants.USE_CLS: wrong_img.resize_as_(wrong_img_batch).copy_(wrong_img_batch) # Returns variable tensor of size (BATCH_SIZE, 1, 4800) # caption_embeds, real_embeds = text_model(batch_keys, caption_dict, word2id, lstm) # real_img_batch = torch.Tensor(choose_real_image(img_dict, batch_keys)) # wrong_img_batch = torch.Tensor(choose_wrong_image(img_dict, batch_keys)) # Run through generator gen_image = generator.forward(Variable(text_vec), Variable(noise_vec)) real_img_passed = discriminator.forward(Variable(real_img_batch), Variable(real_caption)) fake_img_passed = discriminator.forward(gen_image.detach(), Variable(real_caption)) wrong_img_passed = discriminator.forward(Variable(wrong_img_batch), Variable(real_caption)) ########## TRAIN DISCRIMINATOR ########## # Overall loss function for discriminator # L_D = log(y_r) + log(1 - y_f) # Loss of Vanilla GAN with CLS # log(1 - y_w) is the caption loss sensitivity CLS (makes sure that captions match the image) # L_D = log(y_r) + log(1 - y_w) + log(1 - y_f) # Add one-sided label smoothing to the real images of the discriminator d_real_loss = f.binary_cross_entropy( real_img_passed, torch.ones_like(real_img_passed) - model_options['label_smooth']) d_fake_loss = f.binary_cross_entropy( fake_img_passed, torch.zeros_like(fake_img_passed)) d_wrong_loss = f.binary_cross_entropy( wrong_img_passed, torch.zeros_like(wrong_img_passed)) d_loss = d_real_loss + d_fake_loss + d_wrong_loss d_loss.backward() d_optimizer.step() ########## TRAIN GENERATOR ########## generator.zero_grad() for p in discriminator.parameters(): p.requires_grad = False # Regenerate the image noise_vec = torch.randn(constants.BATCH_SIZE, model_options['z_dim'], 1, 1) if torch.cuda.is_available(): noise_vec = noise_vec.cuda() gen_image = generator.forward(Variable(text_vec), Variable(noise_vec)) new_fake_img_passed = discriminator.forward(gen_image, real_embeds) g_loss = f.binary_cross_entropy(new_fake_img_passed, torch.ones_like(fake_img_passed)) g_loss.backward() g_optimizer.step() ########## TRAIN LSTM ############## lstm.zero_grad() lstm_loss = g_loss lstm_loss.backward() lstm_optimizer.step() if i % constants.LOSS_SAVE_IDX == 0: losses['train']['generator'].append((g_loss.data[0], epoch, i)) losses['train']['discriminator'].append( (d_loss.data[0], epoch, i)) num_iterations += 1 print('batch ' + str(i) + ' complete.') print('Total number of iterations: ', num_iterations) print('Training G Loss: ', g_loss.data[0]) print('Training D Loss: ', d_loss.data[0]) epoch_time = time.time() - st print("Time: ", epoch_time) # Save losses torch.save(losses, constants.SAVE_PATH + 'losses') # Save images vutils.save_image(gen_image[0].data.cpu(), constants.SAVE_PATH + 'images/gen0_epoch' + str(epoch) + '.png', normalize=True) vutils.save_image(gen_image[1].data.cpu(), constants.SAVE_PATH + 'images/gen1_epoch' + str(epoch) + '.png', normalize=True) # Save model if epoch % constants.CHECKPOINT_FREQUENCY == 0 and epoch != 0 or epoch == constants.NUM_EPOCHS - 1: save_checkpoint = { 'epoch': epoch, 'g_dict': generator.state_dict(), 'd_dict': discriminator.state_dict(), 'lstm_dict': lstm.state_dict(), 'g_optimizer': g_optimizer.state_dict(), 'd_optimizer': d_optimizer.state_dict(), } torch.save(save_checkpoint, constants.SAVE_PATH + 'weights/epoch' + str(epoch))
def main(unused_argv): # Check the supplied arguments if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) if not FLAGS.experiment_name: raise Exception("You need to specify --experiment_name") if not FLAGS.ckpt_load_dir and FLAGS.mode == "eval": raise Exception("You need to specify a directory to load the checkpoint for eval") if (not FLAGS.data_source) or (FLAGS.data_source != "ssd" and FLAGS.data_source != "ram"): raise Exception("You need to specify how to load data. Choose from ram and ssd.") FLAGS.MAIN_DIR = os.path.dirname(os.path.abspath(__file__)) # Absolute path of the directory containing main.py FLAGS.DATA_DIR = os.path.join(FLAGS.MAIN_DIR, "data") # Absolute path of the data/ directory FLAGS.EXPERIMENTS_DIR = os.path.join(FLAGS.MAIN_DIR, "experiments") # Absolute path of the experiments/ directory FLAGS.train_dir = os.path.join(FLAGS.EXPERIMENTS_DIR, FLAGS.experiment_name) FLAGS.bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") FLAGS.train_res_dir = os.path.join(FLAGS.train_dir, "myCaptions.json") # Store the prediction results (for evaluation) during training FLAGS.glove_path = os.path.join(FLAGS.MAIN_DIR, "glove.6B.300d.trimmed.txt") FLAGS.goldAnn_train_dir = os.path.join(FLAGS.MAIN_DIR, "coco/annotations/captions_train2014.json") FLAGS.goldAnn_val_dir = os.path.join(FLAGS.MAIN_DIR, "coco/annotations/captions_val2014.json") # Load embedding matrix and vocab mappings random_init = (FLAGS.special_token == "train") emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, 300, random_init=random_init) # Initialize model caption_model = CaptionModel(FLAGS, id2word, word2id, emb_matrix) # Some GPU settings config=tf.ConfigProto() config.gpu_options.allow_growth = True #################################################################################### #################################################################################### if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler(os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Make bestmodel dir if necessary if not os.path.exists(FLAGS.bestmodel_dir): os.makedirs(FLAGS.bestmodel_dir) with tf.Session(config=config) as sess: initialize_model(sess, caption_model, FLAGS.train_dir, expect_exists=False) # Load most recent model caption_model.train(sess) #################################################################################### #################################################################################### # Sample evaluation command: python main.py --mode=eval --experiment_name=baseline --ckpt_load_dir=./experiments/baseline/best_checkpoint elif FLAGS.mode == "eval": print("Starting official evaluation...") with tf.Session(config=config) as sess: initialize_model(sess, caption_model, FLAGS.ckpt_load_dir, expect_exists=True) scores = caption_model.check_metric(sess, mode='val', num_samples=0) # Replace mode with 'test' if want to evaluate on test set for metric_name, metric_score in scores.items(): print("{}: {}".format(metric_name, metric_score)) else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # if FLAGS.mode != 'loadProbs': # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") # Initialize model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix) # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir # with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: # json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=20, print_to_screen=True) elif FLAGS.mode == "official_eval": if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path elif FLAGS.mode == "official_eval_with_bidaf": if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict, bidaf_dict, self_dict1, self_dict2, out_dict = generate_answers_with_bidaf( sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path print "Writing sims to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path + '-bidaf', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(bidaf_dict, ensure_ascii=False))) print "Wrote sims to %s" % FLAGS.json_out_path print "Writing self sims1 to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path + '-self1', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(self_dict1, ensure_ascii=False))) print "Wrote self sims1 to %s" % FLAGS.json_out_path print "Writing self sims2 to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path + '-self2', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(self_dict2, ensure_ascii=False))) print "Wrote self sims2 to %s" % FLAGS.json_out_path print "Writing preds to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path + '-preds', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(out_dict, ensure_ascii=False))) print "Wrote preds to %s" % FLAGS.json_out_path elif FLAGS.mode == 'saveProbs': if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = save_answer_probs(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'wb') as f: pickle.dump(answers_dict, f, protocol=2) # f.write(unicode(pickle.dumps(answers_dict, ensure_ascii=False))) # f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path elif FLAGS.mode == 'loadProbs': if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) # word2id = pickle.load(open('word2id', 'rb')) # pickle.dump(word2id, open('word2id', 'wb')) print 'Loaded data' dictLists = [] for file in os.listdir('./pickles'): f = os.path.join('./pickles', file) print 'Loading predictions from ', f prob_dict = pickle.load(open(f, 'rb')) dictLists += [prob_dict] # mainDict = {} # stdiDict = {} # for probs in dictLists: # for k in dictLists[0].keys(): # stdi = 1.0 / (np.std(np.array(probs[k][0])) + np.std(np.array(probs[k][1])) + 1e-2) # stdiDict[k] = stdi # try: # mainDict[k] = (mainDict[k][0] + stdi * np.array(probs[k][0]), mainDict[k][1] + stdi* np.array(probs[k][1])) # except KeyError: # mainDict[k] = (stdi* np.array(probs[k][0]), stdi*np.array(probs[k][1])) uuid2ans = {} # maps uuid to string containing predicted answer detokenizer = MosesDetokenizer() # for k in mainDict.keys(): # start_dist = mainDict[k][0] / stdiDict[k] # end_dist = mainDict[k][1] / stdiDict[k] # # Take argmax to get start_pos and end_post, both shape (batch_size) # end_dp = np.zeros(end_dist.shape) # # start_pos = np.argmax(start_dist) # # end_pos = np.argmax(end_dist) # end_dp[-1]=end_dist[-1] # for i in range(len(end_dist)-2,-1,-1): # end_dp[i]=np.amax([end_dist[i],end_dp[i+1]]) # start_pos=np.argmax(start_dist*end_dp) # end_pos = start_pos + np.argmax(end_dist[start_pos:]) # uuid2ans[k] = (start_pos, end_pos) for k in dictLists[0].keys(): spanDict = {} for probs in dictLists: start_dist = np.array(probs[k][0]) end_dist = np.array(probs[k][1]) # Take argmax to get start_pos and end_post, both shape (batch_size) end_dp = np.zeros(end_dist.shape) end_dp[-1] = end_dist[-1] for i in range(len(end_dist) - 2, -1, -1): end_dp[i] = np.amax([end_dist[i], end_dp[i + 1]]) start_pos = np.argmax(start_dist * end_dp) end_pos = start_pos + np.argmax(end_dist[start_pos:]) try: spanDict[(start_pos, end_pos)] += [ start_dist[start_pos] * end_dist[end_pos] ] except KeyError: spanDict[(start_pos, end_pos)] = [ start_dist[start_pos] * end_dist[end_pos] ] best_span = (0, 0) best_span_votes = 0 best_span_prob = 0 for span in spanDict.keys(): if len(spanDict[span]) > best_span_votes: best_span = span best_span_votes = len(spanDict[span]) best_span_prob = max(spanDict[span]) elif len( spanDict[span] ) == best_span_votes and best_span_prob < max(spanDict[span]): best_span = span best_span_votes = len(spanDict[span]) best_span_prob = max(spanDict[span]) uuid2ans[k] = (best_span[0], best_span[1]) result = {} data_size = len(qn_uuid_data) num_batches = ((data_size - 1) / FLAGS.batch_size) + 1 batch_num = 0 print "Generating answers..." for batch in get_batch_generator(word2id, qn_uuid_data, context_token_data, qn_token_data, FLAGS.batch_size, FLAGS.context_len, FLAGS.question_len): # For each example in the batch: for ex_idx in range(FLAGS.batch_size): # Detokenize and add to dict try: uuid = batch.uuids[ex_idx] pred_start, pred_end = uuid2ans[uuid] # Original context tokens (no UNKs or padding) for this example context_tokens = batch.context_tokens[ ex_idx] # list of strings # Check the predicted span is in range assert pred_start in range(len(context_tokens)) assert pred_end in range(len(context_tokens)) # Predicted answer tokens pred_ans_tokens = context_tokens[pred_start:pred_end + 1] # list of strings result[uuid] = detokenizer.detokenize(pred_ans_tokens, return_str=True) except IndexError: pass batch_num += 1 if batch_num % 10 == 0: print "Generated answers for %i/%i batches = %.2f%%" % ( batch_num, num_batches, batch_num * 100.0 / num_batches) print "Finished generating answers for dataset." answers_dict = result # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval": raise Exception( "You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") # Initialize model qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix) # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True) elif FLAGS.mode == "official_eval": if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) print "qn_uuid_data" print qn_uuid_data print "#" * 100 print "context_token_data" print context_token_data print "#" * 100 print "qn_token_data" print qn_token_data with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def get_glove(glove_dim, glove_file=None, glove_dir=glove_dir): if glove_file is None: glove_prefix = os.path.join(glove_dir, 'glove.6B.') glove_suffix = 'd.txt' glove_file = glove_prefix + str(glove_dim) + glove_suffix return vocab.get_glove(glove_file, glove_dim)
self.context_len = context_len self.question_len = question_len self.embedding_size = embedding_size self.char_size = char_size self.num_of_char = num_of_char self.max_word_len = max_word_len self.dropout = dropout self.elmo_dir = elmo_dir FLAGS = FLAGS(batch_size, hidden_size, context_len, question_len, embedding_size, char_size, num_of_char, max_word_len, dropout, elmo_dir) glove_path = "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squadV2/data/glove.6B.50d.txt" emb_matrix, word2id, id2word = get_glove(glove_path, FLAGS.embedding_size) char2id, id2char = get_char_embed() class QAModel(object): def __init__(self, FLAGS, id2word, word2id, emb_matrix, id2char, char2id): self.FLAGS = FLAGS self.id2word = id2word self.word2id = word2id self.emb_matrix = emb_matrix self.id2char = id2char self.char2id = char2id self.batcher = Batcher( "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/elmo_vocab.txt", 50)
def main(unused_argv): # Print an error message if you've entered flags incorrectly if len(unused_argv) != 1: raise Exception("There is a problem with how you entered flags: %s" % unused_argv) # Check for Python 2 if sys.version_info[0] != 2: raise Exception( "ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0]) # Print out Tensorflow version print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__ # Define train_dir if not FLAGS.experiment_name and not FLAGS.train_dir and \ FLAGS.mode != "official_eval" and FLAGS.mode!= "ensemble_write" and FLAGS.mode!= "ensemble_predict": raise Exception( "You need to specify either --experiment_name or --train_dir") FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name) # Initialize bestmodel directory bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint") # Define path for glove vecs FLAGS.glove_path = FLAGS.glove_path or os.path.join( DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size)) # Load embedding matrix and vocab mappings emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size) # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers train_context_path = os.path.join(FLAGS.data_dir, "train.context") train_qn_path = os.path.join(FLAGS.data_dir, "train.question") train_ans_path = os.path.join(FLAGS.data_dir, "train.span") dev_context_path = os.path.join(FLAGS.data_dir, "dev.context") dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question") dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span") small_context_path = os.path.join(FLAGS.data_dir, "small.context") small_qn_path = os.path.join(FLAGS.data_dir, "small.question") small_ans_path = os.path.join(FLAGS.data_dir, "small.span") qa_model = None # Initialize model if FLAGS.model_name == "baseline": print("Using baseline model") qa_model = QABaselineModel(FLAGS, id2word, word2id, emb_matrix) elif FLAGS.model_name == "bidaf": qa_model = QABidafModel(FLAGS, id2word, word2id, emb_matrix) elif FLAGS.model_name == "selfattn": print("Using Self Attention") qa_model = QASelfAttnModel(FLAGS, id2word, word2id, emb_matrix) elif FLAGS.model_name == "stack": print("Using stack BIDAF/SA") qa_model = QAStackModel(FLAGS, id2word, word2id, emb_matrix) elif FLAGS.model_name == "pointer": print("Using pointer model") qa_model = QAPointerModel(FLAGS, id2word, word2id, emb_matrix) # Some GPU settings config = tf.ConfigProto() config.gpu_options.allow_growth = True # Split by mode if FLAGS.mode == "train": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "test": # Setup train dir and logfile if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) file_handler = logging.FileHandler( os.path.join(FLAGS.train_dir, "log.txt")) logging.getLogger().addHandler(file_handler) # Save a record of flags as a .json file in train_dir with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # Make bestmodel dir if necessary if not os.path.exists(bestmodel_dir): os.makedirs(bestmodel_dir) with tf.Session(config=config) as sess: # Load most recent model initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False) # Train qa_model.train(sess, small_context_path, small_qn_path, small_ans_path, dev_qn_path, dev_context_path, dev_ans_path) elif FLAGS.mode == "show_examples": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Show examples with F1/EM scores _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True) elif FLAGS.mode == "visualize": with tf.Session(config=config) as sess: # Load best model initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True) # Get distribution of begin and end spans. begin_total, end_total, f1_em_scores = qa_model.get_spans( sess, dev_context_path, dev_qn_path, dev_ans_path, "dev") np.save(os.path.join(FLAGS.train_dir, "begin_span"), begin_total) np.save(os.path.join(FLAGS.train_dir, "end_span"), end_total) np.save(os.path.join(FLAGS.train_dir, "f1_em"), f1_em_scores) # Visualize distribution of Context to Question attention c2q_attn = qa_model.get_c2q_attention(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=0) np.save(os.path.join(FLAGS.train_dir, "c2q_attn"), c2q_attn) q2c_attn = qa_model.get_q2c_attention(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=0) if len(q2c_attn > 0): np.save(os.path.join(FLAGS.train_dir, "q2c_attn"), q2c_attn) else: print 'This model doesn\'t have question to context attention' self_attn = qa_model.get_self_attention(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=20) if len(self_attn > 0): np.save(os.path.join(FLAGS.train_dir, "self_attn"), self_attn) else: print 'This model doesn\'t have self attention' elif FLAGS.mode == "ensemble_write": if FLAGS.json_in_path == "": raise Exception( "For ensembling mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For ensembling mode, you need to specify --ckpt_load_dir") if FLAGS.ensemble_name == "": raise Exception( "For ensembling mode, you need to specify --ensemble_name") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) distributions = generate_distributions(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # np uuid -> [start_dist, end_dist] # Write the uuid->answer mapping a to json file in root dir save_path = os.path.join( FLAGS.ensemble_dir, "distribution_" + FLAGS.ensemble_name + '.json') print "Writing distributions to %s..." % save_path with io.open(save_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(distributions, ensure_ascii=False))) print "Wrote distributions to %s" % save_path elif FLAGS.mode == "ensemble_predict": if FLAGS.json_in_path == "": raise Exception( "For ensembling mode, you need to specify --json_in_path") models = ['stack', 'pointer'] distributions = [ os.path.join(FLAGS.ensemble_dir, "distribution_" + m + ".json") for m in models ] total_dict = {} for d in distributions: with open(d) as prediction_file: print d predictions = json.load(prediction_file) for (key, item) in predictions.items(): if total_dict.get(key, None) is None: total_dict[key] = np.asarray(item) else: total_dict[key] += np.asarray(item) for (key, item) in total_dict.items(): total_dict[key][0] /= len(models) total_dict[key][1] /= len(models) # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) answers_dict = generate_answers_from_dist(None, qa_model, total_dict, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path elif FLAGS.mode == "official_eval": if FLAGS.json_in_path == "": raise Exception( "For official_eval mode, you need to specify --json_in_path") if FLAGS.ckpt_load_dir == "": raise Exception( "For official_eval mode, you need to specify --ckpt_load_dir") # Read the JSON data from file qn_uuid_data, context_token_data, qn_token_data = get_json_data( FLAGS.json_in_path) with tf.Session(config=config) as sess: # Load model from ckpt_load_dir initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True) # Get a predicted answer for each example in the data # Return a mapping answers_dict from uuid to answer answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data) # Write the uuid->answer mapping a to json file in root dir print "Writing predictions to %s..." % FLAGS.json_out_path with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers_dict, ensure_ascii=False))) print "Wrote predictions to %s" % FLAGS.json_out_path else: raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)