def run(size): # print ((unused_argv)) # if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly # raise Exception("Problem with flags: %s" % unused_argv) FLAGS.min_dec_steps = size//4 FLAGS.max_dec_steps = size FLAGS.max_enc_steps = size tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = log_path FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode =="train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) print("vocab path is ",FLAGS.vocab_path) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode!='decode': raise Exception("The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen'] hps_dict = {} #print("This is FLAGS -->",FLAGS) for val in FLAGS: # for each flag // New modification for TF 1.5 if val in hparam_list: # if it's in the list hps_dict[val] = FLAGS[val].value # add it to the dict // New modification for TF 1.5 hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print("creating model...") model = SummarizationModel(hps, vocab) setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace(max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode() # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def _load_model(): # These imports are slow - lazy import. import tensorflow as tf from data import Vocab from model import Hps, Settings, SummarizationModel global _settings, _hps, _vocab, _sess, _model # Define settings and hyperparameters _settings = Settings( embeddings_path='', log_root='', trace_path='', # traces/traces_blog', ) _hps = Hps( # parameters important for decoding attn_only_entities=False, batch_size=_beam_size, copy_only_entities=False, emb_dim=128, enc_hidden_dim=200, dec_hidden_dim=300, max_dec_steps=1, max_enc_steps=400, mode='decode', output_vocab_size=20000, restrictive_embeddings=False, save_matmul=False, tied_output=True, two_layer_lstm=True, # other parameters adagrad_init_acc=.1, adam_optimizer=True, copy_common_loss_wt=0., cov_loss_wt=0., high_attn_loss_wt=0., lr=.15, max_grad_norm=2., people_loss_wt=0., rand_unif_init_mag=.02, scatter_loss_wt=0., sharp_loss_wt=0., trunc_norm_init_std=1e-4, ) # Define model _vocab = Vocab(_vocab_path, _vocab_size) _model = SummarizationModel(_settings, _hps, _vocab) _model.build_graph() # Load model from disk saver = tf.train.Saver() config = tf.ConfigProto( allow_soft_placement=True, #intra_op_parallelism_threads=1, #inter_op_parallelism_threads=1, ) _sess = tf.Session(config=config) ckpt_state = tf.train.get_checkpoint_state(_model_dir) saver.restore(_sess, ckpt_state.model_checkpoint_path)
def main(unused_argv): if len(unused_argv)!=1: raise Exception('Problem with flags: %s'%unused_argv) FLAGS.log_root=os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): raise Exception('log directory %s does not exist.'%FLAGS.log_root) vocab=Vocab(FLAGS.vocab_path, FLAGS.vocab_size) hparam_list=['mode','lr','adagrad_init_acc','rand_unif_init_mag','trunc_norm_init_std','max_grad_norm','hidden_dim','emb_dim','batch_size','max_dec_steps','max_enc_steps','coverage','cov_loss_wt','pointer_gen'] hps_dict={} for key,val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key]=val # add it to the dict hps=namedtuple("HParams", hps_dict.keys())(**hps_dict) model=SummarizationModel(hps,vocab) result_map=[] model.build_graph() sess=tf.Session(config=get_config()) trained_model_folder=os.path.join(FLAGS.log_root,'train') evaluation_folder=os.path.join(FLAGS.log_root,'eval') ckpt_list=get_ckpt_list(trained_model_folder, max_ckpt_num=FLAGS.max_ckpt_num, interval=FLAGS.interval) if os.path.exists(evaluation_folder+os.sep+'result.pkl'): result_map=cPickle.load(open(evaluation_folder+os.sep+'result.pkl','rb')) ckpt_list_included=[] ckpt_list_extra=[] for ckpt_file, loss in result_map: ckpt_list_included.append(ckpt_file) for ckpt_file in ckpt_list: if not ckpt_file in ckpt_list_included: ckpt_list_extra.append(ckpt_file) ckpt_list=ckpt_list_extra print('%d ckpt already included in the existing result.pkl, skip ...'%len(ckpt_list_included)) print('There are %d ckpts to evaluate'%len(ckpt_list)) for idx,ckpt_file in enumerate(ckpt_list): print('Start analyzing checkpoint %d/%d'%(idx+1,len(ckpt_list))) saver=tf.train.Saver(max_to_keep=3) load_ckpt(saver,sess,os.path.join(trained_model_folder,ckpt_file)) batcher=Batcher(FLAGS.data_path,vocab,hps,single_pass=True) avg_loss=eval(model,batcher,vocab,sess) print('check point:%s, Average loss in validation set: %.3f'%(ckpt_file, avg_loss)) result_map.append([ckpt_file,avg_loss]) if not os.path.exists(evaluation_folder): os.makedirs(evaluation_folder) cPickle.dump(result_map,open(evaluation_folder+os.sep+'result.pkl','wb')) if sys.version_info.major==2: result_map=sorted(result_map,lambda x,y:-1 if x[1]>y[1] else 1) else: result_map=sorted(result_map,key=lambda x:x[1],reverse=True) print('==Summary==') for ckpt,avg_loss in result_map: print('check point: %s, average loss: %.3f'%(ckpt,avg_loss)) cPickle.dump(result_map,open(evaluation_folder+os.sep+'result.pkl','wb')) print('results saved in %s'%(evaluation_folder+os.sep+'result.pkl'))
def main(unused_argv, sess_config=None, server_target=None): # if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly # raise Exception("Problem with flags: %s" % unused_argv) vocab, hps = default_setup() if hps.inference: print "Inference Mode" batcher = RawTextBatcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder( model, batcher, vocab) if sess_config is None else BeamSearchDecoder( model, batcher, vocab, sess_config, server_target) decoder.decode( withRouge=False ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) if hps.mode == 'train': print "creating model..." model = SummarizationModel(hps, vocab) setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError( "The 'mode' flag must be one of train/eval/decode")
def calc_features(cnn_dm_train_data_path, hps, vocab, batcher, save_path): if not os.path.exists(save_path): os.makedirs(save_path) decode_model_hps = hps # This will be the hyperparameters for the decoder model model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.calc_importance_features(cnn_dm_train_data_path, hps, save_path, 1000)
def main(): tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary FLAGS.batch_size = FLAGS.beam_size hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once)
def main(): global model global vocab global hps global decoder FLAGS.mode = 'decode' FLAGS.vocab_path = '../vocab' FLAGS.log_root = '../models' FLAGS.exp_name = 'pretrained_model_tf1.2.1' FLAGS.max_enc_steps = 400 FLAGS.max_dec_steps = 120 FLAGS.coverage = 1 FLAGS.single_pass = True tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode=="train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode!='decode': raise Exception("The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen'] hps_dict = {} for key,val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) tf.set_random_seed(111) # a seed value for randomness decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace(max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) abstract = "tim ist toll" article = "tim ist toll" batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass, abstract=abstract, article=article) decoder = BeamSearchDecoder(model, batcher, vocab)
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness # if hps.mode == 'train': print("creating model...") model = SummarizationModel(hps, vocab) setup_training(model, batcher)
def main(unused_argv): if len(unused_argv) != 1: raise Exception("Problem with flags: %s" % unused_argv) if FLAGS.mode not in ['train', 'eval', 'decode']: raise ValueError("The 'mode' flag must be one of train/eval/decode") tf.logging.set_verbosity(tf.logging.INFO) tf.set_random_seed(FLAGS.random_seed) print('INFO: Starting seq2seq_attention model in {} mode...'.format( FLAGS.mode)) if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size hparam_list = [ 'mode', 'lr', 'adagrad_acc', 'norm_unif', 'norm_trunc', 'norm_grad', 'pointer', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps' ] hps_dict = {} for key, val in FLAGS.__flags.iteritems(): if key in hparam_list: hps_dict[key] = val hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) batcher = Batcher(FLAGS.data_path, vocab, hps, onetime=FLAGS.onetime) if hps.mode == 'train': print('INFO: creating model...') model = SummarizationModel(hps, vocab) train(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) cval(model, batcher, vocab) elif hps.mode == 'decode': decode_mdl_hps = hps decode_mdl_hps = hps._replace(max_dec_steps=1) model = SummarizationModel(decode_mdl_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder._decode()
def __init__(self, vocab_path, log_root): self.pointer_gen = True self.single_pass = True self.batch_size = self.beam_size = 4 self.vocab_size = 50000 self.vocab_path = vocab_path self.log_root = log_root # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = { 'mode': 'decode', 'lr': 0.15, 'adagrad_init_acc': 0.1, 'rand_unif_init_mag': 0.02, 'trunc_norm_init_std': 1e-4, 'max_grad_norm': 2.0, 'hidden_dim': 256, 'emb_dim': 128, 'batch_size': self.batch_size, 'max_dec_steps': 100, 'max_enc_steps': 400, 'coverage': 1, 'cov_loss_wt': 1.0, 'pointer_gen': True, 'min_dec_steps': 35, 'beam_size': self.beam_size } self.hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) self.vocab = Vocab(self.vocab_path, self.vocab_size) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. # decode_model_hps = self.hps # This will be the hyperparameters for the decoder model decode_model_hps = self.hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries tf.set_random_seed(111) # a seed value for randomness self.model = SummarizationModel(decode_model_hps, self.vocab, self.log_root) self.decoder = BeamSearchDecoder(self.model, self.vocab, True, self.hps, self.pointer_gen, self.log_root)
def training_on_flink(context, sess_config, server_target): vocab, hps = default_setup() if hps.mode == 'train': # batcher = Batcher(FLAGS.data_path, vocab, hps, FLAGS.single_pass) batcher = FlinkTrainBatcher(context, vocab, hps) tf.logging.info("creating model...") model = SummarizationModel(hps, vocab) # trainer = FlinkTestTrainer(hps, batcher, sess_config, server_target) trainer = FlinkTrainer(hps, model, batcher, sess_config, server_target) trainer.train() else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen'] hps_dict = {} for key, val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) print "hps.hidden_dim:", hps.hidden_dim # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print "creating model..." model = SummarizationModel(hps, vocab) print "finish create model..." setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) run_eval(model, batcher) elif hps.mode == 'decode': decode_model_hps = hps._replace(max_dec_steps=1) model = SummarizationModel(decode_model_hps, vocab) run_decode(model, batcher, vocab) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): if len(unused_argv) != 1: raise Exception('Problem with flags: %s' % str(unused_argv)) try: assert (FLAGS.mode == 'train') except: raise ValueError('mode must be "train" while now it is "%s"' % FLAGS.mode) FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) data_manager = BinaryDataManager(binary_file=FLAGS.data_path, single_pass=True) data_manager.load_data() model_hp_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_enc_steps', 'max_dec_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] model_hp_dict = {} for key, value in FLAGS.__flags.iteritems(): if key in model_hp_list: model_hp_dict[key] = value model_settings = namedtuple('HParams', model_hp_dict.keys())(**model_hp_dict) model_settings = model_settings._replace(max_dec_steps=1) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # Lauch extractive model cur_path = os.path.abspath('.') FLAGS.sentence_extract_config = os.path.abspath( FLAGS.sentence_extract_config) os.chdir(FLAGS.sentence_extract_root) sys.path.insert(0, 'run') sys.path.insert(0, 'util') import laucher import xml_parser laucher_params = xml_parser.parse(FLAGS.sentence_extract_config, flat=False) ext_solver = laucher.laucher(laucher_params) ext_solver.start() os.chdir(cur_path) # Launch abstractive model loaded_params = tf.global_variables() abs_model = SummarizationModel(model_settings, vocab, extra_info={}) train_model(ext_solver=ext_solver, abs_model=abs_model, data_manager=data_manager)
def main(): tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want args = FLAGS # get_args() vocab = Vocab(args.vocab_path, args.vocab_size) # create a vocabulary hps = get_hps() b = json_batch(args.json_path, hps, vocab) batcher = MyBatcher(b, vocab, hps, args.single_pass) decode_model_hps = hps._replace(max_dec_steps=1) model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode() import pdb pdb.set_trace() pass
def main(unused_argv): # if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly # raise Exception("Problem with flags: %s" % unused_argv) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once)
def inference_on_flink(context, sess_config, server_target): vocab, hps = default_setup() if hps.inference: print "Inference Mode" batcher = FlinkInferenceBatcher(context, vocab, hps) writer = FlinkWriter(context) decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder( model, batcher, vocab) if sess_config is None else BeamSearchDecoder( model, batcher, vocab, sess_config, server_target, writer) decoder.decode( withRouge=False ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once)
def training_init(hps): vocab = Vocab(hps.vocab_path, hps.vocab_size) batches = get_data(hps, vocab, hps.data_path) train_dir = os.path.join(hps.log_root, "model") if not os.path.exists(train_dir): os.makedirs(train_dir) tf.reset_default_graph() model_params = dict(hps=hps, vocab=vocab) lm = SummarizationModel(**model_params) lm.BuildCoreGraph() lm.BuildTrainGraph() lm.summarizeGraph() return lm, vocab, batches, train_dir
def setup_summarizer(settings): tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention ') # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary vocab = Vocab(settings.vocab_path, settings.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. FLAGS.batch_size = FLAGS.beam_size # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", list(hps_dict.keys()))(**hps_dict) tf.set_random_seed(111) # a seed value for randomness if hps.mode != 'decode': raise ValueError("The 'mode' flag must be decode for serving") decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries serving_device = '/cpu:0' model = SummarizationModel(decode_model_hps, vocab, default_device=serving_device) decoder = BeamSearchDecoder(model, None, vocab) return Summarizer(decoder, vocab=vocab, hps=hps)
def main(): data, vocab, htree = get_data(datapath, FLAGS.test_size, FLAGS.val_size) train_x, train_y = data[0], data[1] val_x, val_y = data[2], data[3] test_x, test_y = data[4], data[5] idx2word, word2idx = vocab[0], vocab[1] # train_x, train_y, val_x, val_y, test_x, test_y, idx2word, word2idx = \ # get_data(datapath, FLAGS.test_size, FLAGS.val_size) FLAGS.vocab_len = len(idx2word) # any key would give the correct max path len FLAGS.max_depth = len(htree['<pad>']) FLAGS.num_samples = train_x.shape[0] FLAGS.sentence_len = train_x.shape[-1] FLAGS.timesteps = train_y.shape[-1] FLAGS.embedding_method = args.load_embed FLAGS.embedding_size = int(args.dim_embed) FLAGS.dataset_name = dataset # if dataset != "bbc_news": # FLAGS.batch_size = 1 if args.attlayer == "concat": FLAGS.multi_concat = False print('news headlines format:', train_y.shape) print('news descriptions format:', train_x.shape) print('number of tokens in the vocabulary', FLAGS.vocab_len) print("huffman tree max depth ", max([len(path) for path in htree.values()])) Model = SummarizationModel(FLAGS, idx2word, htree) if args.train is not None: # Trains the model Model.train(train_x, train_y, val_x, val_y) else: # Evaluates the model Model.eval(test_x, test_y)
def main(unused_argv): print("unused_argv: ", unused_argv) if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) print("FLAGS.vocab_size: ", FLAGS.vocab_size) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary print("vocab size: ", vocab.size()) # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen', 'fine_tune', 'train_size', 'subred_size', 'use_doc_vec', 'use_multi_attn', 'use_multi_pgen', 'use_multi_pvocab', 'create_ckpt' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness # return if hps.mode.value == 'train': print("creating model...") model = SummarizationModel(hps, vocab) # ------------------------------------- if hps.create_ckpt.value: step = 0 model.build_graph() print("get value") pretrained_ckpt = '/home/cs224u/pointer/log/pretrained_model_tf1.2.1/train/model-238410' reader = pywrap_tensorflow.NewCheckpointReader(pretrained_ckpt) var_to_shape_map = reader.get_variable_to_shape_map() value = {} for key in var_to_shape_map: value[key] = reader.get_tensor(key) print("assign op") assign_op = [] if hps.use_multi_pvocab.value: new_key = [ "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_0/Bias", "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_1/Bias" ] for v in tf.trainable_variables(): key = v.name.split(":")[0] if key in new_key: origin_key = "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/" + key.split( "/")[-1] a_op = v.assign(tf.convert_to_tensor( value[origin_key])) else: a_op = v.assign(tf.convert_to_tensor(value[key])) # if key == "seq2seq/embedding/embedding": # a_op = v.assign(tf.convert_to_tensor(value[key])) assign_op.append(a_op) else: for v in tf.trainable_variables(): key = v.name.split(":")[0] if key == "seq2seq/embedding/embedding": a_op = v.assign(tf.convert_to_tensor(value[key])) assign_op.append(a_op) # ratio = 1 # for v in tf.trainable_variables(): # key = v.name.split(":")[0] # # embedding (50000, 128) -> (50000, 32) # if key == "seq2seq/embedding/embedding": # print (key) # print (value[key].shape) # d1 = value[key].shape[1] # a_op = v.assign(tf.convert_to_tensor(value[key][:,:d1//ratio])) # # kernel (384, 1024) -> (96, 256) # # w_reduce_c (512, 256) -> (128, 64) # elif key == "seq2seq/encoder/bidirectional_rnn/fw/lstm_cell/kernel" or \ # key == "seq2seq/encoder/bidirectional_rnn/bw/lstm_cell/kernel" or \ # key == "seq2seq/reduce_final_st/w_reduce_c" or \ # key == "seq2seq/reduce_final_st/w_reduce_h" or \ # key == "seq2seq/decoder/attention_decoder/Linear/Matrix" or \ # key == "seq2seq/decoder/attention_decoder/lstm_cell/kernel" or \ # key == "seq2seq/decoder/attention_decoder/Attention/Linear/Matrix" or \ # key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/Matrix": # print (key) # print (value[key].shape) # d0, d1 = value[key].shape[0], value[key].shape[1] # a_op = v.assign(tf.convert_to_tensor(value[key][:d0//ratio, :d1//ratio])) # # bias (1024,) -> (256,) # elif key == "seq2seq/encoder/bidirectional_rnn/fw/lstm_cell/bias" or \ # key == "seq2seq/encoder/bidirectional_rnn/bw/lstm_cell/bias" or \ # key == "seq2seq/reduce_final_st/bias_reduce_c" or \ # key == "seq2seq/reduce_final_st/bias_reduce_h" or \ # key == "seq2seq/decoder/attention_decoder/lstm_cell/bias" or \ # key == "seq2seq/decoder/attention_decoder/v" or \ # key == "seq2seq/decoder/attention_decoder/Attention/Linear/Bias" or \ # key == "seq2seq/decoder/attention_decoder/Linear/Bias" or \ # key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/Bias": # print (key) # print (value[key].shape) # d0 = value[key].shape[0] # a_op = v.assign(tf.convert_to_tensor(value[key][:d0//ratio])) # # W_h (1, 1, 512, 512) -> (1, 1, 128, 128) # elif key == "seq2seq/decoder/attention_decoder/W_h": # print (key) # print (value[key].shape) # d2, d3 = value[key].shape[2], value[key].shape[3] # a_op = v.assign(tf.convert_to_tensor(value[key][:,:,:d2//ratio,:d3//ratio])) # # Matrix (1152, 1) -> (288, 1) # elif key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear/Matrix" or \ # key == "seq2seq/output_projection/w": # print (key) # print (value[key].shape) # d0 = value[key].shape[0] # a_op = v.assign(tf.convert_to_tensor(value[key][:d0//ratio,:])) # # Bias (1,) -> (1,) # elif key == "seq2seq/output_projection/v" or \ # key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear/Bias": # print (key) # print (value[key].shape) # a_op = v.assign(tf.convert_to_tensor(value[key])) # # multi_attn # if hps.use_multi_attn.value: # if key == "seq2seq/decoder/attention_decoder/attn_0/v" or \ # key == "seq2seq/decoder/attention_decoder/attn_1/v": # # key == "seq2seq/decoder/attention_decoder/attn_2/v": # k = "seq2seq/decoder/attention_decoder/v" # print (key) # print (value[k].shape) # d0 = value[k].shape[0] # a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio])) # if key == "seq2seq/decoder/attention_decoder/Attention/Linear_0/Bias" or \ # key == "seq2seq/decoder/attention_decoder/Attention/Linear_1/Bias": # # key == "seq2seq/decoder/attention_decoder/Attention/Linear_2/Bias": # k = "seq2seq/decoder/attention_decoder/Attention/Linear/Bias" # print (key) # print (value[k].shape) # d0 = value[k].shape[0] # a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio])) # elif hps.use_multi_pgen.value: # if key == "seq2seq/decoder/attention_decoder/Linear_0/Bias" or \ # key == "seq2seq/decoder/attention_decoder/Linear_1/Bias": # # key == "seq2seq/decoder/attention_decoder/Linear_2/Bias": # k = "seq2seq/decoder/attention_decoder/Linear/Bias" # print (key) # print (value[k].shape) # d0 = value[k].shape[0] # a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio])) # if key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear_0/Bias" or \ # key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear_1/Bias": # # key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear_2/Bias": # k = "seq2seq/decoder/attention_decoder/calculate_pgen/Linear/Bias" # print (key) # print (value[k].shape) # a_op = v.assign(tf.convert_to_tensor(value[k])) # elif hps.use_multi_pvocab.value: # if key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_0/Bias" or \ # key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_1/Bias": # # key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_2/Bias": # k = "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/Bias" # print (key) # print (value[k].shape) # d0 = value[k].shape[0] # a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio])) # assign_op.append(a_op) # Add an op to initialize the variables. init_op = tf.global_variables_initializer() # Add ops to save and restore all the variables. saver = tf.train.Saver() with tf.Session(config=util.get_config()) as sess: sess.run(init_op) # Do some work with the model. for a_op in assign_op: a_op.op.run() for _ in range(0): batch = batcher.next_batch() results = model.run_train_step(sess, batch) # Save the variables to disk. if hps.use_multi_attn.value: ckpt_tag = "multi_attn_2_attn_proj" elif hps.use_multi_pgen.value: ckpt_tag = "multi_attn_2_pgen_proj" elif hps.use_multi_pvocab.value: ckpt_tag = "big_multi_attn_2_pvocab_proj" else: ckpt_tag = "pointer_proj" ckpt_to_save = '/home/cs224u/pointer/log/ckpt/' + ckpt_tag + '/model.ckpt-' + str( step) save_path = saver.save(sess, ckpt_to_save) print("Model saved in path: %s" % save_path) # ------------------------------------- else: setup_training(model, batcher, hps) elif hps.mode.value == 'eval': model = SummarizationModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode.value == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): if len( unused_argv) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) if FLAGS.dataset_name != "": FLAGS.data_path = os.path.join(FLAGS.data_root, FLAGS.dataset_name, FLAGS.dataset_split + '*') if not os.path.exists( os.path.join(FLAGS.data_root, FLAGS.dataset_name)) or len( os.listdir(os.path.join(FLAGS.data_root, FLAGS.dataset_name))) == 0: print( 'No TF example data found at %s so creating it from raw data.' % os.path.join( FLAGS.data_root, FLAGS.dataset_name)) convert_data.process_dataset(FLAGS.dataset_name) logging.set_verbosity(logging.INFO) # choose what level of logging you want logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.exp_name = FLAGS.exp_name if FLAGS.exp_name != '' else FLAGS.dataset_name FLAGS.actual_log_root = FLAGS.log_root FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen'] hps_dict = {} for key, val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val.value # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) if FLAGS.pg_mmr or FLAGS.pg_mmr_sim or FLAGS.pg_mmr_diff: # Fit the TFIDF vectorizer if not already fitted if FLAGS.importance_fn == 'tfidf': tfidf_model_path = os.path.join(FLAGS.actual_log_root, 'tfidf_vectorizer', FLAGS.dataset_name + '.dill') if not os.path.exists(tfidf_model_path): print( 'No TFIDF vectorizer model file found at %s, so fitting the model now.' % tfidf_model_path) tfidf_vectorizer = fit_tfidf_vectorizer(hps, vocab) with open(tfidf_model_path, 'wb') as f: dill.dump(tfidf_vectorizer, f) # Train the SVR model on the CNN validation set if not already trained if FLAGS.importance_fn == 'svr': save_path = os.path.join(FLAGS.data_root, 'svr_training_data') importance_model_path = os.path.join(FLAGS.actual_log_root, 'svr.pickle') dataset_split = 'val' if not os.path.exists(importance_model_path): if not os.path.exists(save_path) or len( os.listdir(save_path)) == 0: print( 'No importance_feature instances found at %s so creating it from raw data.' % save_path) decode_model_hps = hps._replace( max_dec_steps=1, batch_size=100, mode='calc_features') # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries cnn_dm_train_data_path = os.path.join(FLAGS.data_root, FLAGS.dataset_name, dataset_split + '*') batcher = Batcher(cnn_dm_train_data_path, vocab, decode_model_hps, single_pass=FLAGS.single_pass, cnn_500_dm_500=False) calc_features(cnn_dm_train_data_path, decode_model_hps, vocab, batcher, save_path) print( 'No importance_feature SVR model found at %s so training it now.' % importance_model_path) features_list = importance_features.get_features_list(True) sent_reps = importance_features.load_data( os.path.join(save_path, dataset_split + '*'), -1) print 'Loaded %d sentences representations' % len(sent_reps) x_y = importance_features.features_to_array(sent_reps, features_list) train_x, train_y = x_y[:, :-1], x_y[:, -1] svr_model = importance_features.run_training(train_x, train_y) with open(importance_model_path, 'wb') as f: cPickle.dump(svr_model, f) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness # Start decoding on multi-document inputs if hps.mode == 'decode': decode_model_hps = hps._replace( max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode() # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) # Loading the external information first extra_info = {} if os.path.exists(FLAGS.external_config): external_params = xml_parser.parse(FLAGS.external_config, flat=False) if 'sent2vec_params' in external_params: sent2vec_params = external_params['sent2vec_params'] convnet_params = sent2vec_params['convnet_params'] convnet_model2load = sent2vec_params['model2load'] gamma = 0.2 if not 'gamma' in sent2vec_params else sent2vec_params[ 'gamma'] my_convnet = convnet.convnet(convnet_params) my_convnet.train_validate_test_init() my_convnet.load_params(file2load=convnet_model2load) fixed_vars = tf.global_variables() fixed_vars.remove(my_convnet.embedding_matrix) extra_info['sent2vec'] = {'gamma': gamma, 'network': my_convnet} extra_info['fixed_vars'] = fixed_vars if 'key_phrases' in external_params: # TODO: phrase some parameters to import the results of key-phrase extracted or \ # parameters for online key-phrase extraction extra_info['key_phrases'] = {} raise NotImplementedError( 'Key phrases part has not been implemented yet') tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} for key, val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print "creating model..." model = SummarizationModel(hps, vocab, extra_info) setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab, extra_info) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab, extra_info) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(args): main_start = time.time() tf.set_random_seed(2019) random.seed(2019) np.random.seed(2019) if len(args) != 1: raise Exception('Problem with flags: %s' % args) # Correcting a few flags for test/eval mode. if FLAGS.mode != 'train': FLAGS.batch_size = FLAGS.beam_size FLAGS.bs_dec_steps = FLAGS.dec_steps if FLAGS.model.lower() != "tx": FLAGS.dec_steps = 1 assert FLAGS.mode == 'train' or FLAGS.batch_size == FLAGS.beam_size, \ "In test mode, batch size should be equal to beam size." assert FLAGS.mode == 'train' or FLAGS.dec_steps == 1 or FLAGS.model.lower() == "tx", \ "In test mode, no. of decoder steps should be one." os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['CUDA_VISIBLE_DEVICES'] = ",".join( str(gpu_id) for gpu_id in FLAGS.GPUs) if not os.path.exists(FLAGS.PathToCheckpoint): os.makedirs(FLAGS.PathToCheckpoint) if FLAGS.mode == "test" and not os.path.exists(FLAGS.PathToResults): os.makedirs(FLAGS.PathToResults) os.makedirs(FLAGS.PathToResults + 'predictions') os.makedirs(FLAGS.PathToResults + 'groundtruths') if FLAGS.mode == 'eval': eval_model(FLAGS.PathToResults) else: start = time.time() vocab = Vocab(max_vocab_size=FLAGS.vocab_size, emb_dim=FLAGS.dim, dataset_path=FLAGS.PathToDataset, glove_path=FLAGS.PathToGlove, vocab_path=FLAGS.PathToVocab, lookup_path=FLAGS.PathToLookups) if FLAGS.model.lower() == "plain": print("Setting up the plain model.\n") data = DataGenerator(path_to_dataset=FLAGS.PathToDataset, max_inp_seq_len=FLAGS.enc_steps, max_out_seq_len=FLAGS.dec_steps, vocab=vocab, use_pgen=FLAGS.use_pgen, use_sample=FLAGS.sample) summarizer = SummarizationModel(vocab, data) elif FLAGS.model.lower() == "hier": print("Setting up the hier model.\n") data = DataGeneratorHier( path_to_dataset=FLAGS.PathToDataset, max_inp_sent=FLAGS.max_enc_sent, max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent, max_out_tok=FLAGS.dec_steps, vocab=vocab, use_pgen=FLAGS.use_pgen, use_sample=FLAGS.sample) summarizer = SummarizationModelHier(vocab, data) elif FLAGS.model.lower() == "rlhier": print("Setting up the Hier RL model.\n") data = DataGeneratorHier( path_to_dataset=FLAGS.PathToDataset, max_inp_sent=FLAGS.max_enc_sent, max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent, max_out_tok=FLAGS.dec_steps, vocab=vocab, use_pgen=FLAGS.use_pgen, use_sample=FLAGS.sample) summarizer = SummarizationModelHierSC(vocab, data) else: raise ValueError( "model flag should be either of plain/hier/bayesian/shared!! \n" ) end = time.time() print( "Setting up vocab, data and model took {:.2f} sec.".format(end - start)) summarizer.build_graph() if FLAGS.mode == 'train': summarizer.train() elif FLAGS.mode == "test": summarizer.test() else: raise ValueError("mode should be either train/test!! \n") main_end = time.time() print("Total time elapsed: %.2f \n" % (main_end - main_start))
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) if FLAGS.singles_and_pairs == 'both': FLAGS.exp_name = FLAGS.exp_name + '_both' exp_name = _exp_name + '_both' dataset_articles = _dataset_articles else: FLAGS.exp_name = FLAGS.exp_name + '_singles' exp_name = _exp_name + '_singles' dataset_articles = _dataset_articles + '_singles' my_log_dir = os.path.join(log_dir, FLAGS.ssi_exp_name) print('Running statistics on %s' % FLAGS.exp_name) if FLAGS.dataset_name != "": FLAGS.data_path = os.path.join(FLAGS.data_root, FLAGS.dataset_name, FLAGS.dataset_split + '*') if not os.path.exists(os.path.join( FLAGS.data_root, FLAGS.dataset_name)) or len( os.listdir(os.path.join(FLAGS.data_root, FLAGS.dataset_name))) == 0: print(('No TF example data found at %s so creating it from raw data.' % os.path.join(FLAGS.data_root, FLAGS.dataset_name))) convert_data.process_dataset(FLAGS.dataset_name) logging.set_verbosity( logging.INFO) # choose what level of logging you want logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.exp_name = FLAGS.exp_name if FLAGS.exp_name != '' else FLAGS.dataset_name FLAGS.actual_log_root = FLAGS.log_root FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen', 'lambdamart_input' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val.value # add it to the dict hps = namedtuple("HParams", list(hps_dict.keys()))(**hps_dict) tf.set_random_seed(113) # a seed value for randomness decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) start_time = time.time() np.random.seed(random_seed) source_dir = os.path.join(data_dir, dataset_articles) source_files = sorted(glob.glob(source_dir + '/' + dataset_split + '*')) with open(os.path.join(my_log_dir, 'ssi.pkl')) as f: ssi_list = pickle.load(f) total = len(source_files ) * 1000 if 'cnn' or 'newsroom' in dataset_articles else len( source_files) example_generator = data.example_generator(source_dir + '/' + dataset_split + '*', True, False, should_check_valid=False) # batcher = Batcher(None, vocab, hps, single_pass=FLAGS.single_pass) model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, None, vocab) decoder.decode_iteratively(example_generator, total, names_to_types, ssi_list, hps) a = 0
def main(self, unused_argv): if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary flags = getattr(FLAGS,"__flags") if not os.path.exists(FLAGS.log_root): if FLAGS.mode=="train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) fw = open('{}/config.txt'.format(FLAGS.log_root),'w') for k,v in flags.items(): fw.write('{}\t{}\n'.format(k,v)) fw.close() self.vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode!='decode': raise Exception("The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['mode', 'lr', 'gpu_num', 'gamma', 'eta', 'zeta', 'fixed_zeta', 'zeta_clipping', 'rl_start_step', 'fixed_eta', 'reward_function', 'intradecoder', 'use_temporal_attention', 'ac_training','rl_training', 'matrix_attention', 'calculate_true_q', 'enc_hidden_dim', 'dec_hidden_dim', 'k', 'scheduled_sampling', 'sampling_probability','fixed_sampling_probability', 'alpha', 'hard_argmax', 'greedy_scheduled_sampling', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'dqn_scheduled_sampling', 'dqn_sleep_time', 'E2EBackProp', 'coverage', 'cov_loss_wt', 'pointer_gen', 'partial_rewarding'] hps_dict = {} for key,val in flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val.value # add it to the dict self.hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data self.full_batcher = Batcher(FLAGS.full_data_path, self.vocab, self.hps, single_pass=FLAGS.single_pass, decode_after=FLAGS.decode_after) self.partial_batcher = Batcher(FLAGS.partial_data_path, self.vocab, self.hps, single_pass=FLAGS.single_pass, decode_after=FLAGS.decode_after) tf.set_random_seed(111) # a seed value for randomness if self.hps.mode == 'train': print("creating model...") if FLAGS.rl_training: # merging batches from full and partial datasets self.hps = self.hps._replace(batch_size=2 * self.hps.batch_size) self.model = SummarizationModel(self.hps, self.vocab) self.setup_training() elif self.hps.mode == 'eval': if FLAGS.rl_training: # merging batches from full and partial datasets self.hps = self.hps._replace(batch_size=2 * self.hps.batch_size) self.model = SummarizationModel(self.hps, self.vocab) self.run_eval() elif self.hps.mode == 'decode': decode_model_hps = self.hps decode_model_hps = self.hps._replace(max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries print(decode_model_hps) model = SummarizationModel(decode_model_hps, self.vocab) if FLAGS.partial_decoding: decoder = BeamSearchDecoder(model, self.partial_batcher, self.vocab, dqn = None) else: decoder = BeamSearchDecoder(model, self.full_batcher, self.vocab, dqn = None) decoder.decode() # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) extractor = 'bert' if FLAGS.use_bert else 'lambdamart' pretrained_dataset = FLAGS.dataset_name if FLAGS.dataset_name == 'duc_2004': pretrained_dataset = 'cnn_dm' if FLAGS.singles_and_pairs == 'both': FLAGS.exp_name = FLAGS.dataset_name + '_' + FLAGS.exp_name + extractor + '_both' FLAGS.pretrained_path = os.path.join(FLAGS.log_root, pretrained_dataset + '_both') dataset_articles = FLAGS.dataset_name else: FLAGS.exp_name = FLAGS.dataset_name + '_' + FLAGS.exp_name + extractor + '_singles' FLAGS.pretrained_path = os.path.join(FLAGS.log_root, pretrained_dataset + '_singles') dataset_articles = FLAGS.dataset_name + '_singles' if FLAGS.upper_bound: FLAGS.exp_name = FLAGS.exp_name + '_upperbound' ssi_list = None # this is if we are doing the upper bound evaluation (ssi_list comes straight from the groundtruth) else: my_log_dir = os.path.join( log_dir, '%s_%s_%s' % (FLAGS.dataset_name, extractor, FLAGS.singles_and_pairs)) with open(os.path.join(my_log_dir, 'ssi.pkl'), 'rb') as f: ssi_list = pickle.load(f) print('Running statistics on %s' % FLAGS.exp_name) if FLAGS.dataset_name != "": FLAGS.data_path = os.path.join(FLAGS.data_root, FLAGS.dataset_name, FLAGS.dataset_split + '*') if not os.path.exists(os.path.join( FLAGS.data_root, FLAGS.dataset_name)) or len( os.listdir(os.path.join(FLAGS.data_root, FLAGS.dataset_name))) == 0: raise Exception('No TF example data found at %s.' % os.path.join(FLAGS.data_root, FLAGS.dataset_name)) logging.set_verbosity( logging.INFO) # choose what level of logging you want logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.exp_name = FLAGS.exp_name if FLAGS.exp_name != '' else FLAGS.dataset_name FLAGS.actual_log_root = FLAGS.log_root FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) print(util.bcolors.OKGREEN + "Experiment path: " + FLAGS.log_root + util.bcolors.ENDC) if FLAGS.dataset_name == 'duc_2004': vocab = Vocab(FLAGS.vocab_path + '_' + 'cnn_dm', FLAGS.vocab_size) # create a vocabulary else: vocab = Vocab(FLAGS.vocab_path + '_' + FLAGS.dataset_name, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ item for item in list(FLAGS.flag_values_dict().keys()) if item != '?' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val.value # add it to the dict hps = namedtuple("HParams", list(hps_dict.keys()))(**hps_dict) tf.set_random_seed(113) # a seed value for randomness decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) start_time = time.time() np.random.seed(random_seed) source_dir = os.path.join(FLAGS.data_root, dataset_articles) source_files = sorted(glob.glob(source_dir + '/' + dataset_split + '*')) total = len( source_files ) * 1000 if 'cnn' in dataset_articles or 'xsum' in dataset_articles else len( source_files) example_generator = data.example_generator(source_dir + '/' + dataset_split + '*', True, False, should_check_valid=False) # batcher = Batcher(None, vocab, hps, single_pass=FLAGS.single_pass) model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, None, vocab) decoder.decode_iteratively(example_generator, total, names_to_types, ssi_list, hps)
def main(unused_argv): # GPU tricks if FLAGS.device == None: index_of_gpu = get_available_gpu() if index_of_gpu < 0: index_of_gpu = '' FLAGS.device = index_of_gpu tf.logging.info(bcolors.OKGREEN + 'using {}'.format(FLAGS.device) + bcolors.ENDC) #终端颜色 else: index_of_gpu = FLAGS.device os.environ["CUDA_VISIBLE_DEVICES"] = str(index_of_gpu) tf.logging.info('try to occupy GPU memory!') placeholder_session = tf.Session() #tf.contrib.memory_stats.BytesLimit():Generates an op that measures the total memory (in bytes) of a device. limit = placeholder_session.run( tf.contrib.memory_stats.BytesLimit()) / 1073741824 tf.logging.info('occupy GPU memory %f GB', limit) if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) tf.logging.info("vocab path is %s ", FLAGS.vocab_path) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen' ] hps_dict = {} export_json = {} for key, val in FLAGS.__flags.items(): export_json[key] = val if key in hparam_list: hps_dict[key] = val tf.logging.info('{} {}'.format(key, val)) # hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) ###################### # save parameters and python script ###################### # save parameters tf.logging.info('saving parameters') current_time_str = datetime.now().strftime('%m-%d-%H-%M') json_para_file = open( os.path.join(FLAGS.log_root, 'flags-' + current_time_str + '-' + FLAGS.mode + '.json'), 'w') json_para_file.write(json.dumps(export_json, indent=4) + '\n') json_para_file.close() # save python source code tf.logging.info('saving source code') python_list = glob.glob('./*.py') zip_file = zipfile.ZipFile( os.path.join( FLAGS.log_root, 'source_code_bak-' + current_time_str + '-' + FLAGS.mode + '.zip'), 'w') for d in python_list: zip_file.write(d) zip_file.close() # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': tf.logging.info("creating model...") model = SummarizationModel(hps, vocab) placeholder_session.close() setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) placeholder_session.close() run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) placeholder_session.close() try: decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) except KeyboardInterrupt: tf.logging.info('stop decoding!') else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(_): tf.gfile.MakeDirs(FLAGS.output_dir) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # create a vocabulary vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode not in ['decode', 'test']: raise Exception( "The single_pass flag should only be True in decode mode") run_config = tf.contrib.tpu.RunConfig( model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps) # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'init_checkpoint', 'mode', 'learning_rate', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen', 'num_train_steps', 'use_tpu', 'num_train_steps', 'num_warmup_steps' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.input_file, vocab, hps, single_pass=FLAGS.single_pass) if hps.mode.value == 'train': tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.batch_size) model = SummarizationModel(hps, vocab) setup_training(model, batcher) elif hps.mode.value == 'eval': tf.logging.info("***** Running evaluation *****") tf.logging.info(" Batch size = %d", FLAGS.batch_size) model = SummarizationModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode.value == 'decode': decode_model_hps = hps._replace(max_dec_steps=1) model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) # When we set single_pass=True (default), decode the dataset exactly once decoder.decode() else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary stop_word_ids = get_stop_word_ids( FLAGS.stop_words_path, vocab ) if FLAGS.pointer_gen and ( FLAGS.co_occurrence or FLAGS.prev_relation or FLAGS.co_occurrence_h or FLAGS.co_occurrence_i or (FLAGS.coverage and FLAGS.coverage_weighted) ) or FLAGS.attention_weighted or FLAGS.markov_attention or FLAGS.markov_attention_contribution else None # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode': FLAGS.batch_size = FLAGS.beam_size # If single_pass=True, check we're in decode mode if FLAGS.single_pass and FLAGS.mode != 'decode': raise Exception( "The single_pass flag should only be True in decode mode") # if FLAGS.prev_relation and not FLAGS.co_occurrence: # raise Exception("The co_occurrence flag should be True when the prev_relation flag is True") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'top_ten_kept', 'decode_only', 'generation_only', 'copy_only', 'occurrence_window_size', 'max_title_len', 'title_engaged', 'title_guided', 'ref_dir', 'tagger_encoding', 'tagger_attention', 'source_siding_bridge', 'target_siding_bridge', 'language', 'dropout', 'optimizer', 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'beam_depth', 'max_dec_steps', 'max_enc_steps', 'max_keyphrase_num', 'attention_weighted', 'coverage', 'coverage_weighted', 'coverage_weighted_expansion', 'co_occurrence', 'prev_relation', 'co_occurrence_h', 'co_occurrence_i', 'cov_loss_wt', 'pointer_gen', 'cell_type', 'markov_attention', 'markov_attention_contribution', 'markov_attention_contribution_used_x' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass, stop_words=stop_word_ids) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print("creating model...") model = SummarizationModel(hps, vocab) setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(unused_argv): if FLAGS.placeholder: tf.logging.info('try to occupy GPU memory!') config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.8 placeholder_session = tf.Session(config=config) limit = placeholder_session.run( tf.contrib.memory_stats.BytesLimit()) / 1073741824 tf.logging.info('occupy GPU memory %f GB', limit) if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity( tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode == "train": os.makedirs(FLAGS.log_root) else: raise Exception( "Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if FLAGS.mode == 'decode' or FLAGS.mode == 'auto_decode': FLAGS.batch_size = FLAGS.beam_size # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = [ 'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'max_side_steps', 'coverage', 'cov_loss_wt', 'pointer_gen', 'epoch_num', 'current_source_code_zip', 'multi_dec_steps' ] hps_dict = {} for key, val in FLAGS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val.value # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # save python source code current_time_str = datetime.now().strftime('%m-%d-%H-%M') FLAGS.current_source_code_zip = os.path.abspath( os.path.join( FLAGS.log_root, 'source_code_bak-' + current_time_str + '-' + FLAGS.mode + '.zip')) tf.logging.info('saving source code: %s', FLAGS.current_source_code_zip) python_list = glob.glob('./*.py') zip_file = zipfile.ZipFile(FLAGS.current_source_code_zip, 'w') for d in python_list: zip_file.write(d) zip_file.close() # Create a batcher object that will create minibatches of data batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=FLAGS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print("creating model...") model = SummarizationModel(hps, vocab) if FLAGS.placeholder: placeholder_session.close() setup_training(model, batcher) elif hps.mode == 'eval': model = SummarizationModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode( ) # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) elif hps.mode == 'auto_decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace( max_dec_steps=1 ) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = SummarizationModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab, hps.epoch_num) decoder.decode() else: raise ValueError("The 'mode' flag must be one of train/eval/decode")