def train(self, sess, baseline_steps=0, loss_function='xent', use_baseline=True, **kwargs): self.init_training(sess=sess, **kwargs) if (loss_function == 'reinforce' and use_baseline and baseline_steps > 0 and self.baseline_step.eval(sess) < baseline_steps): utils.log('pre-training reinforce baseline') for i in range(baseline_steps - self.baseline_step.eval(sess)): self.seq2seq_model.reinforce_step(sess, next(self.batch_iterator), update_model=False, use_sgd=False, update_baseline=True) utils.log('starting training') while True: try: self.train_step(sess=sess, loss_function=loss_function, use_baseline=use_baseline, **kwargs) except utils.EvalException: self.save(sess) step, score = self.training.scores[-1] self.manage_best_checkpoints(step, score) except utils.CheckpointException: self.save(sess)
def load_checkpoint(sess, checkpoint_dir, filename=None, blacklist=(), prefix=None): """ if `filename` is None, we load last checkpoint, otherwise we ignore `checkpoint_dir` and load the given checkpoint file. """ if filename is None: # load last checkpoint ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt is not None: filename = ckpt.model_checkpoint_path else: checkpoint_dir = os.path.dirname(filename) vars_ = [] var_names = [] for var in tf.global_variables(): if prefix is None or var.name.startswith(prefix): name = var.name if prefix is None else var.name[len(prefix) + 1:] vars_.append(var) var_names.append(name) var_file = os.path.join(checkpoint_dir, 'vars.pkl') if os.path.exists(var_file): with open(var_file, 'rb') as f: old_names = pickle.load(f) else: old_names = list(var_names) name_mapping = {} for name in old_names: name_ = name for key, value in variable_mapping: name_ = re.sub(key, value, name_) name_mapping[name] = name_ var_names_ = [] for name in var_names: for key, value in reverse_mapping: name = re.sub(key, value, name) var_names_.append(name) vars_ = dict(zip(var_names_, vars_)) variables = { old_name[:-2]: vars_[new_name] for old_name, new_name in name_mapping.items() if new_name in vars_ and not any(prefix in new_name for prefix in blacklist) } if filename is not None: utils.log('reading model parameters from {}'.format(filename)) tf.train.Saver(variables).restore(sess, filename) utils.debug('retrieved parameters ({})'.format(len(variables))) for var in sorted(variables.values(), key=lambda var: var.name): utils.debug(' {} {}'.format(var.name, var.get_shape()))
def decode(self, output=None, remove_unk=False, raw_output=False, max_test_size=None, **kwargs): utils.log('starting decoding') # empty `test` means that we read from standard input, which is not possible with multiple encoders # assert len(self.src_ext) == 1 or self.filenames.test # check that there is the right number of files for decoding # assert not self.filenames.test or len(self.filenames.test) == len(self.src_ext) output_file = None try: output_file = sys.stdout if output is None else open(output, 'w') paths = self.filenames.test or [None] lines = utils.read_lines(paths, binary=self.binary) if max_test_size: lines = itertools.islice(lines, max_test_size) if not self.filenames.test: # interactive mode batch_size = 1 else: batch_size = self.batch_size lines = list(lines) hypothesis_iter = self.decode_batch(lines, batch_size, remove_unk=remove_unk) for hypothesis, raw in hypothesis_iter: if raw_output: hypothesis = raw output_file.write(hypothesis + '\n') output_file.flush() finally: if output_file is not None: output_file.close()
def eval_step(self): # compute loss on dev set for prefix, dev_batches in zip(self.dev_prefix, self.dev_batches): eval_loss = sum( self.seq2seq_model.step(batch, update_model=False).loss * len(batch) for batch in dev_batches) eval_loss /= sum(map(len, dev_batches)) utils.log(" {} eval: loss {:.2f}".format(prefix, eval_loss))
def load_checkpoint(sess, checkpoint_dir, filename, variables): if filename is not None: ckpt_file = checkpoint_dir + "/" + filename utils.log('reading model parameters from {}'.format(ckpt_file)) tf.train.Saver(variables).restore(sess, ckpt_file) utils.debug('retrieved parameters ({})'.format(len(variables))) for var in sorted(variables, key=lambda var: var.name): utils.debug(' {} {}'.format(var.name, var.get_shape()))
def manage_best_checkpoints(self, step, score): score_filename = os.path.join(self.checkpoint_dir, 'scores.txt') # try loading previous scores try: with open(score_filename) as f: # list of pairs (score, step) scores = [(float(line.split()[0]), int(line.split()[1])) for line in f] except IOError: scores = [] if any(step_ >= step for _, step_ in scores): utils.warn('inconsistent scores.txt file') best_scores = sorted(scores, reverse=True)[:self.keep_best] def full_path(filename): return os.path.join(self.checkpoint_dir, filename) if any(score_ < score for score_, _ in best_scores) or not best_scores: # if this checkpoint is in the top, save it under a special name prefix = 'translate-{}.'.format(step) dest_prefix = 'best-{}.'.format(step) absolute_best = all(score_ < score for score_, _ in best_scores) if absolute_best: utils.log('new best model') for filename in os.listdir(self.checkpoint_dir): if filename.startswith(prefix): dest_filename = filename.replace(prefix, dest_prefix) shutil.copy(full_path(filename), full_path(dest_filename)) # also copy to `best` if this checkpoint is the absolute best if absolute_best: dest_filename = filename.replace(prefix, 'best.') shutil.copy(full_path(filename), full_path(dest_filename)) best_scores = sorted(best_scores + [(score, step)], reverse=True) for _, step_ in best_scores[self.keep_best:]: # remove checkpoints that are not in the top anymore prefix = 'best-{}'.format(step_) for filename in os.listdir(self.checkpoint_dir): if filename.startswith(prefix): os.remove(full_path(filename)) # save scores scores.append((score, step)) with open(score_filename, 'w') as f: for score_, step_ in scores: f.write('{:.2f} {}\n'.format(score_, step_))
def eval_step(self, sess): # compute perplexity on dev set for dev_batches in self.dev_batches: eval_loss = sum( self.seq2seq_model.step( sess, batch, update_model=False, update_baseline=False).loss * len(batch) for batch in dev_batches) eval_loss /= sum(map(len, dev_batches)) utils.log(" eval: loss {:.2f}".format(eval_loss))
def eval_step(self, sess): # compute perplexity on dev set for dev_batches in self.dev_batches: eval_loss = sum( self.model.step(sess, batch, forward_only=True).loss * len(batch) for batch in dev_batches) eval_loss /= sum(map(len, dev_batches)) perplexity = math.exp(eval_loss) if eval_loss < 300 else float( 'inf') utils.log(" eval: perplexity {:.2f}".format(perplexity))
def calculate_true_alignments(self, encoder_inputs, targets, input_length): sum_align = [] for m_inputs, m_targets in zip(encoder_inputs[0], targets[0]): single_align = self.calculate_single_align( [self.vocab_in[int(item)] for item in m_inputs], [self.vocab_out[int(item)] for item in m_targets]) sum_align.append(single_align) utils.log("align_juzhen") utils.log([np.array(sum_align)][0]) utils.log(len([np.array(sum_align)][0])) utils.log(len([np.array(sum_align)][0][0])) utils.log(len([np.array(sum_align)][0][0][0])) return [np.array(sum_align)][0]
def save_checkpoint(sess, saver, checkpoint_dir, step=None, name=None): var_file = os.path.join(checkpoint_dir, 'vars.pkl') name = name or 'translate' os.makedirs(checkpoint_dir, exist_ok=True) with open(var_file, 'wb') as f: var_names = [var.name for var in tf.global_variables()] pickle.dump(var_names, f) utils.log('saving model to {}'.format(checkpoint_dir)) checkpoint_path = os.path.join(checkpoint_dir, name) saver.save(sess, checkpoint_path, step, write_meta_graph=False) utils.log('finished saving model')
def train(self, sess, **kwargs): for model in self.models: utils.log('initializing {}'.format(model.name)) model.init_training(sess=sess, **kwargs) while True: i = np.random.choice(len(self.models), 1, p=self.ratios)[0] model = self.models[i] try: model.train_step(sess=sess, **kwargs) except utils.CheckpointException: if i == 0: # only save main model (includes all variables) model.save(sess) step, score = model.training.scores[-1] model.manage_best_checkpoints(step, score)
def save_embedding(self, output_dir): os.makedirs(output_dir, exist_ok=True) for encoder_or_decoder, vocab in zip(self.encoders + self.decoders, self.vocabs): utils.log('saving embeddings for: {}'.format( encoder_or_decoder.name)) if not (encoder_or_decoder.name == "edits"): with tf.variable_scope(tf.get_variable_scope(), reuse=True): embedding_var = tf.get_variable('embedding_' + encoder_or_decoder.name) embedding_value = embedding_var.eval() with open(output_dir + "/" + embedding_var.name + ".txt", 'w') as file_: for word, i in vocab.vocab.items(): file_.write( '%s %s\n' % (word, ' '.join(map(str, embedding_value[i]))))
def decode(self, sess, beam_size, output=None, remove_unk=False, early_stopping=True, use_edits=False, **kwargs): utils.log('starting decoding') # empty `test` means that we read from standard input, which is not possible with multiple encoders assert len(self.src_ext) == 1 or self.filenames.test # we can't read binary data from standard input assert self.filenames.test or self.src_ext[0] not in self.binary_input # check that there is the right number of files for decoding assert not self.filenames.test or len(self.filenames.test) == len( self.src_ext) output_file = None try: output_file = sys.stdout if output is None else open(output, 'w') lines = utils.read_lines(self.filenames.test, self.src_ext, self.binary_input) if self.filenames.test is None: # interactive mode batch_size = 1 else: batch_size = self.batch_size lines = list(lines) hypothesis_iter = self._decode_batch(sess, lines, batch_size, beam_size=beam_size, early_stopping=early_stopping, remove_unk=remove_unk, use_edits=use_edits) for hypothesis in hypothesis_iter: output_file.write(hypothesis + '\n') output_file.flush() finally: if output_file is not None: output_file.close()
def train(self, **kwargs): for model in self.models: utils.log('initializing {}'.format(model.name)) model.init_training(**kwargs) utils.log('starting training') while True: i = np.random.choice(len(self.models), 1, p=self.ratios)[0] model = self.models[i] try: model.train_step(**kwargs) except (utils.FinishedTrainingException, KeyboardInterrupt): utils.log('exiting...') self.main_model.save() return except utils.EvalException: if i == 0: model.save() step, score = model.training.scores[-1] model.manage_best_checkpoints(step, score) except utils.CheckpointException: if i == 0: # only save main model (includes all variables) model.save() step, score = model.training.scores[-1] model.manage_best_checkpoints(step, score)
def train(self, baseline_steps=0, loss_function='xent', use_baseline=True, **kwargs): self.init_training(**kwargs) if (loss_function == 'reinforce' and use_baseline and baseline_steps > 0 and self.baseline_step.eval() < baseline_steps): utils.log('pre-training reinforce baseline') for i in range(baseline_steps - self.baseline_step.eval()): self.seq2seq_model.reinforce_step(next(self.batch_iterator), update_model=False, use_sgd=False, update_baseline=True) utils.log('starting training') while True: try: self.train_step(loss_function=loss_function, use_baseline=use_baseline, **kwargs) sys.stdout.flush() except (utils.FinishedTrainingException, KeyboardInterrupt): utils.log('exiting...') self.save() return except utils.EvalException: self.save() step, score = self.training.scores[-1] self.manage_best_checkpoints(step, score) except utils.CheckpointException: self.save()
def load_checkpoint(sess, checkpoint_dir, filename=None, blacklist=()): """ `checkpoint_dir` should be unique to this model if `filename` is None, we load last checkpoint, otherwise we ignore `checkpoint_dir` and load the given checkpoint file. """ if filename is None: # load last checkpoint ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt is not None: filename = ckpt.model_checkpoint_path else: checkpoint_dir = os.path.dirname(filename) var_file = os.path.join(checkpoint_dir, 'vars.pkl') if os.path.exists(var_file): with open(var_file, 'rb') as f: var_names = pickle.load(f) variables = [ var for var in tf.global_variables() if var.name in var_names ] else: variables = tf.global_variables() # remove variables from blacklist variables = [ var for var in variables if not any(prefix in var.name for prefix in blacklist) ] if filename is not None: utils.log('reading model parameters from {}'.format(filename)) tf.train.Saver(variables).restore(sess, filename) utils.debug('retrieved parameters ({})'.format(len(variables))) for var in variables: utils.debug(' {} {}'.format(var.name, var.get_shape()))
def decode(self, sess, beam_size, output=None, remove_unk=False, **kwargs): utils.log('starting decoding') # empty `test` means that we read from standard input, which is not possible with multiple encoders assert len(self.src_ext) == 1 or self.filenames.test # we can't read binary data from standard input assert self.filenames.test or self.src_ext[0] not in self.binary_input # check that there is the right number of files for decoding assert not self.filenames.test or len(self.filenames.test) == len( self.src_ext) output_file = None try: output_file = sys.stdout if output is None else open(output, 'w') for lines in utils.read_lines(self.filenames.test, self.src_ext, self.binary_input): trg_sentence = self._decode_sentence(sess, lines, beam_size, remove_unk) output_file.write(trg_sentence + '\n') output_file.flush() finally: if output_file is not None: output_file.close()
def save_checkpoint(sess, saver, checkpoint_dir, step=None, name=None): """ `checkpoint_dir` should be unique to this model """ var_file = os.path.join(checkpoint_dir, 'vars.pkl') name = name or 'translate' if not os.path.exists(checkpoint_dir): utils.log("creating directory {}".format(checkpoint_dir)) os.makedirs(checkpoint_dir) with open(var_file, 'wb') as f: var_names = [var.name for var in tf.all_variables()] pickle.dump(var_names, f) utils.log('saving model to {}'.format(checkpoint_dir)) checkpoint_path = os.path.join(checkpoint_dir, name) saver.save(sess, checkpoint_path, step, write_meta_graph=False) utils.log('finished saving model')
def evaluate(self, score_functions, on_dev=True, output=None, remove_unk=False, max_dev_size=None, raw_output=False, fix_edits=True, max_test_size=None, post_process_script=None, unk_replace=False, **kwargs): """ Decode a dev or test set, and perform evaluation with respect to gold standard, using the provided scoring function. If `output` is defined, also save the decoding output to this file. When evaluating development data (`on_dev` to True), several dev sets can be specified (`dev_prefix` parameter in configuration files), and a score is computed for each of them. :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score') :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus :param output: save the hypotheses to this file :param remove_unk: remove the UNK symbols from the output :param max_dev_size: maximum number of lines to read from dev files :param max_test_size: maximum number of lines to read from test files :param raw_output: save raw decoder output (don't do post-processing like UNK deletion or subword concatenation). The evaluation is still done with the post-processed output. :param fix_edits: when predicting edit operations, pad shorter hypotheses with KEEP symbols. :return: scores of each corpus to evaluate """ utils.log('starting evaluation') if on_dev: filenames = self.filenames.dev else: filenames = [self.filenames.test] # convert `output` into a list, for zip if isinstance(output, str): output = [output] elif output is None: output = [None] * len(filenames) scores = [] utils.log('show output') utils.log(output) # evaluation on multiple corpora for dev_id, (filenames_, output_, prefix) in enumerate(zip(filenames, output, self.dev_prefix)): utils.log('filenames, output, self.dev_prefix') utils.log(filenames) utils.log(output) if self.dev_batches: dev_batches = self.dev_batches[dev_id] dev_loss = sum(self.seq2seq_model.step(batch, update_model=False).loss * len(batch) for batch in dev_batches) dev_loss /= sum(map(len, dev_batches)) else: # TODO dev_loss = 0 extensions = list(self.extensions) if self.ref_ext is not None: extensions.append(self.ref_ext) lines = list(utils.read_lines(filenames_, binary=self.binary)) if on_dev and max_dev_size: lines = lines[:max_dev_size] elif not on_dev and max_test_size: lines = lines[:max_test_size] hypotheses = [] references = [] utils.log("making hypotheses") output_file = None try: if output_ is not None: output_file = open(output_, 'w', encoding='utf-8') lines_ = list(zip(*lines)) src_sentences = list(zip(*lines_[:len(self.src_ext)])) trg_sentences = list(zip(*lines_[len(self.src_ext):])) utils.log("making decode_batch") hypothesis_iter = self.decode_batch(lines, self.batch_size, remove_unk=remove_unk, fix_edits=fix_edits, unk_replace=unk_replace) for i, (sources, hypothesis, reference) in enumerate(zip(src_sentences, hypothesis_iter, trg_sentences)): if self.ref_ext is not None and on_dev: reference = reference[-1] else: reference = reference[0] # single output for now hypothesis, raw = hypothesis # hypothesis: [10items],each item is a "token sequence" hypotheses.append(hypothesis) references.append(reference.strip().replace('@@ ', '')) if output_file is not None: if raw_output: hypothesis = raw line = "source:\t" + str(sources) + "\nref:\t" + str(reference) + "\n" for item in hypothesis: line += str(item) + '\n' line += "\n" # line = hypothesis + '\n' output_file.write(line) output_file.flush() finally: if output_file is not None: output_file.close() if post_process_script is not None: data = '\n'.join(hypotheses).encode() data = Popen([post_process_script], stdout=PIPE, stdin=PIPE).communicate(input=data)[0].decode() hypotheses = data.splitlines() scores_ = [] summary = None for score_function in score_functions: try: if score_function == 'loss': score = dev_loss reversed_ = True else: fun = getattr(evaluation, 'corpus_' + score_function) try: reversed_ = fun.reversed except AttributeError: reversed_ = False func_arg = [] for item in hypotheses: func_arg.append(item[0]) score, score_summary = fun(func_arg, references) summary = summary or score_summary scores_.append((score_function, score, reversed_)) except: pass score_info = ['{}={:.2f}'.format(key, value) for key, value, _ in scores_] score_info.insert(0, prefix) if summary: score_info.append(summary) if self.name is not None: score_info.insert(0, self.name) utils.log(' '.join(map(str, score_info))) # main score _, score, reversed_ = scores_[0] scores.append(-score if reversed_ else score) return scores
def evaluate(self, sess, beam_size, score_function, on_dev=True, output=None, remove_unk=False, max_dev_size=None, script_dir='scripts', early_stopping=True, use_edits=False, **kwargs): """ :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score') :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus :param output: save the hypotheses to this file :param remove_unk: remove the UNK symbols from the output :param max_dev_size: maximum number of lines to read from dev files :param script_dir: parameter of scoring functions :return: scores of each corpus to evaluate """ utils.log('starting decoding') assert on_dev or len(self.filenames.test) == len(self.extensions) filenames = self.filenames.dev if on_dev else [self.filenames.test] # convert `output` into a list, for zip if isinstance(output, str): output = [output] elif output is None: output = [None] * len(filenames) scores = [] for filenames_, output_ in zip( filenames, output): # evaluation on multiple corpora lines = list( utils.read_lines(filenames_, self.extensions, self.binary_input)) if on_dev and max_dev_size: lines = lines[:max_dev_size] hypotheses = [] references = [] output_file = None try: if output_ is not None: output_file = open(output_, 'w') *src_sentences, trg_sentences = zip(*lines) src_sentences = list(zip(*src_sentences)) hypothesis_iter = self._decode_batch( sess, src_sentences, self.batch_size, beam_size=beam_size, early_stopping=early_stopping, remove_unk=remove_unk, use_edits=use_edits) for sources, hypothesis, reference in zip( src_sentences, hypothesis_iter, trg_sentences): if use_edits: reference = utils.reverse_edits(sources[0], reference) hypotheses.append(hypothesis) references.append(reference.strip().replace('@@ ', '')) if output_file is not None: output_file.write(hypothesis + '\n') output_file.flush() finally: if output_file is not None: output_file.close() # default scoring function is utils.bleu_score score, score_summary = getattr(evaluation, score_function)( hypotheses, references, script_dir=script_dir) # print the scoring information score_info = [] if self.name is not None: score_info.append(self.name) score_info.append('score={:.2f}'.format(score)) if score_summary: score_info.append(score_summary) utils.log(' '.join(map(str, score_info))) scores.append(score) return scores
def main(args=None): args = parser.parse_args(args) # read config file and default config with open('config/default.yaml') as f: default_config = utils.AttrDict(yaml.safe_load(f)) with open(args.config) as f: config = utils.AttrDict(yaml.safe_load(f)) if args.learning_rate is not None: args.reset_learning_rate = True # command-line parameters have higher precedence than config file for k, v in vars(args).items(): if v is not None: config[k] = v # set default values for parameters that are not defined for k, v in default_config.items(): config.setdefault(k, v) if config.score_function: config.score_functions = evaluation.name_mapping[config.score_function] if args.crash_test: config.max_train_size = 0 if not config.debug: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # disable TensorFlow's debugging logs decoding_mode = any(arg is not None for arg in (args.decode, args.eval, args.align)) # enforce parameter constraints assert config.steps_per_eval % config.steps_per_checkpoint == 0, ( 'steps-per-eval should be a multiple of steps-per-checkpoint') assert decoding_mode or args.train or args.save or args.save_embedding, ( 'you need to specify at least one action (decode, eval, align, or train)') assert not (args.average and args.ensemble) if args.train and args.purge: utils.log('deleting previous model') shutil.rmtree(config.model_dir, ignore_errors=True) os.makedirs(config.model_dir, exist_ok=True) # copy config file to model directory config_path = os.path.join(config.model_dir, 'config.yaml') if args.train and not os.path.exists(config_path): with open(args.config) as config_file, open(config_path, 'w') as dest_file: content = config_file.read() content = re.sub(r'model_dir:.*?\n', 'model_dir: {}\n'.format(config.model_dir), content, flags=re.MULTILINE) dest_file.write(content) # also copy default config config_path = os.path.join(config.model_dir, 'default.yaml') if args.train and not os.path.exists(config_path): shutil.copy('config/default.yaml', config_path) # copy source code to model directory tar_path = os.path.join(config.model_dir, 'code.tar.gz') if args.train and not os.path.exists(tar_path): with tarfile.open(tar_path, "w:gz") as tar: for filename in os.listdir('translate'): if filename.endswith('.py'): tar.add(os.path.join('translate', filename), arcname=filename) logging_level = logging.DEBUG if args.verbose else logging.INFO # always log to stdout in decoding and eval modes (to avoid overwriting precious train logs) log_path = os.path.join(config.model_dir, config.log_file) logger = utils.create_logger(log_path if args.train else None) logger.setLevel(logging_level) utils.log('label: {}'.format(config.label)) utils.log('description:\n {}'.format('\n '.join(config.description.strip().split('\n')))) utils.log(' '.join(sys.argv)) # print command line try: # print git hash commit_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip() utils.log('commit hash {}'.format(commit_hash)) except: pass utils.log('tensorflow version: {}'.format(tf.__version__)) # log parameters utils.debug('program arguments') for k, v in sorted(config.items(), key=itemgetter(0)): utils.debug(' {:<20} {}'.format(k, pformat(v))) if isinstance(config.dev_prefix, str): config.dev_prefix = [config.dev_prefix] if config.tasks is not None: config.tasks = [utils.AttrDict(task) for task in config.tasks] tasks = config.tasks else: tasks = [config] for task in tasks: for parameter, value in config.items(): task.setdefault(parameter, value) task.encoders = [utils.AttrDict(encoder) for encoder in task.encoders] task.decoders = [utils.AttrDict(decoder) for decoder in task.decoders] for encoder_or_decoder in task.encoders + task.decoders: for parameter, value in task.items(): encoder_or_decoder.setdefault(parameter, value) if args.max_len: args.max_input_len = args.max_len if args.max_output_len: # override decoder's max len task.decoders[0].max_len = args.max_output_len if args.max_input_len: # override encoder's max len task.encoders[0].max_len = args.max_input_len config.checkpoint_dir = os.path.join(config.model_dir, 'checkpoints') # setting random seeds if config.seed is None: config.seed = random.randrange(sys.maxsize) if config.tf_seed is None: config.tf_seed = random.randrange(sys.maxsize) utils.log('python random seed: {}'.format(config.seed)) utils.log('tf random seed: {}'.format(config.tf_seed)) random.seed(config.seed) tf.set_random_seed(config.tf_seed) device = None if config.no_gpu: device = '/cpu:0' device_id = None elif config.gpu_id is not None: device = '/gpu:{}'.format(config.gpu_id) device_id = config.gpu_id else: device_id = 0 # hide other GPUs so that TensorFlow won't use memory on them os.environ['CUDA_VISIBLE_DEVICES'] = '' if device_id is None else str(device_id) utils.log('creating model') utils.log('using device: {}'.format(device)) with tf.device(device): if config.weight_scale: if config.initializer == 'uniform': initializer = tf.random_uniform_initializer(minval=-config.weight_scale, maxval=config.weight_scale) else: initializer = tf.random_normal_initializer(stddev=config.weight_scale) else: initializer = None tf.get_variable_scope().set_initializer(initializer) # exempt from creating gradient ops config.decode_only = decoding_mode if config.tasks is not None: model = MultiTaskModel(**config) else: model = TranslationModel(**config) # count parameters # not counting parameters created by training algorithm (e.g. Adam) variables = [var for var in tf.global_variables() if not var.name.startswith('gradients')] utils.log('model parameters ({})'.format(len(variables))) parameter_count = 0 for var in sorted(variables, key=lambda var: var.name): utils.log(' {} {}'.format(var.name, var.get_shape())) v = 1 for d in var.get_shape(): v *= d.value parameter_count += v utils.log('number of parameters: {:.2f}M'.format(parameter_count / 1e6)) tf_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) tf_config.gpu_options.allow_growth = config.allow_growth tf_config.gpu_options.per_process_gpu_memory_fraction = config.mem_fraction def average_checkpoints(main_sess, sessions): for var in tf.global_variables(): avg_value = sum(sess.run(var) for sess in sessions) / len(sessions) main_sess.run(var.assign(avg_value)) with tf.Session(config=tf_config) as sess: best_checkpoint = os.path.join(config.checkpoint_dir, 'best') params = {'variable_mapping': config.variable_mapping, 'reverse_mapping': config.reverse_mapping, 'rnn_lm_model_dir': None, 'rnn_mt_model_dir': None, 'rnn_lm_cell_name': None, 'origin_model_ckpt': None} if config.ensemble and len(config.checkpoints) > 1: model.initialize(config.checkpoints, **params) elif config.average and len(config.checkpoints) > 1: model.initialize(reset=True) sessions = [tf.Session(config=tf_config) for _ in config.checkpoints] for sess_, checkpoint in zip(sessions, config.checkpoints): model.initialize(sess=sess_, checkpoints=[checkpoint], **params) average_checkpoints(sess, sessions) elif (not config.checkpoints and decoding_mode and (os.path.isfile(best_checkpoint + '.index') or os.path.isfile(best_checkpoint + '.index'))): # in decoding and evaluation mode, unless specified otherwise (by `checkpoints`), # try to load the best checkpoint model.initialize([best_checkpoint], **params) else: # loads last checkpoint, unless `reset` is true model.initialize(**config) if config.output is not None: dirname = os.path.dirname(config.output) if dirname: os.makedirs(dirname, exist_ok=True) try: if args.save: model.save() elif args.save_embedding: if config.embedding_output_dir is None: output_dir = "." else: output_dir = config.embedding_output_dir model.save_embedding(output_dir) elif args.decode is not None: if config.align is not None: config.align = True model.decode(**config) elif args.eval is not None: model.evaluate(on_dev=False, **config) elif args.align is not None: model.align(**config) elif args.train: model.train(**config) except KeyboardInterrupt: sys.exit()
def initialize(self, checkpoints=None, reset=False, reset_learning_rate=False, max_to_keep=1, keep_every_n_hours=0, sess=None, whitelist=None, blacklist=None, **kwargs): """ :param checkpoints: list of checkpoints to load (instead of latest checkpoint) :param reset: don't load latest checkpoint, reset learning rate and global step :param reset_learning_rate: reset the learning rate to its initial value :param max_to_keep: keep this many latest checkpoints at all times :param keep_every_n_hours: and keep checkpoints every n hours """ sess = sess or tf.get_default_session() if keep_every_n_hours <= 0 or keep_every_n_hours is None: keep_every_n_hours = float('inf') self.saver = tf.train.Saver( max_to_keep=max_to_keep, keep_checkpoint_every_n_hours=keep_every_n_hours, sharded=False) sess.run(tf.global_variables_initializer()) # load pre-trained embeddings for encoder_or_decoder, vocab in zip(self.encoders + self.decoders, self.vocabs): if encoder_or_decoder.embedding_file: utils.log('loading embeddings from: {}'.format( encoder_or_decoder.embedding_file)) embeddings = {} with open(encoder_or_decoder.embedding_file) as embedding_file: for line in embedding_file: word, vector = line.split(' ', 1) if word in vocab.vocab: embeddings[word] = np.array( list(map(float, vector.split()))) # standardize (mean of 0, std of 0.01) mean = sum(embeddings.values()) / len(embeddings) std = np.sqrt( sum((value - mean)**2 for value in embeddings.values())) / (len(embeddings) - 1) for key in embeddings: embeddings[key] = 0.01 * (embeddings[key] - mean) / std # change TensorFlow variable's value with tf.variable_scope(tf.get_variable_scope(), reuse=True): embedding_var = tf.get_variable('embedding_' + encoder_or_decoder.name) embedding_value = embedding_var.eval() for word, i in vocab.vocab.items(): if word in embeddings: embedding_value[i] = embeddings[word] sess.run(embedding_var.assign(embedding_value)) if whitelist: with open(whitelist) as f: whitelist = list(line.strip() for line in f) if blacklist: with open(blacklist) as f: blacklist = list(line.strip() for line in f) else: blacklist = [] blacklist.append('dropout_keep_prob') if reset_learning_rate or reset: blacklist.append('learning_rate') if reset: blacklist.append('global_step') params = { k: kwargs.get(k) for k in ('variable_mapping', 'reverse_mapping') } if checkpoints and len(self.models) > 1: assert len(self.models) == len(checkpoints) for i, checkpoint in enumerate(checkpoints, 1): load_checkpoint(sess, None, checkpoint, blacklist=blacklist, whitelist=whitelist, prefix='model_{}'.format(i), **params) elif checkpoints: # load partial checkpoints for checkpoint in checkpoints: # checkpoint files to load load_checkpoint(sess, None, checkpoint, blacklist=blacklist, whitelist=whitelist, **params) elif not reset: load_checkpoint(sess, self.checkpoint_dir, blacklist=blacklist, whitelist=whitelist, **params) utils.debug('global step: {}'.format(self.global_step.eval())) utils.debug('baseline step: {}'.format(self.baseline_step.eval()))
def main(args=None): args = parser.parse_args(args) # read config file and default config with open('config/default.yaml') as f: default_config = utils.AttrDict(yaml.safe_load(f)) with open(args.config) as f: config = utils.AttrDict(yaml.safe_load(f)) if args.learning_rate is not None: args.reset_learning_rate = True # command-line parameters have higher precedence than config file for k, v in vars(args).items(): if v is not None: config[k] = v # set default values for parameters that are not defined for k, v in default_config.items(): config.setdefault(k, v) # enforce parameter constraints assert config.steps_per_eval % config.steps_per_checkpoint == 0, ( 'steps-per-eval should be a multiple of steps-per-checkpoint') assert args.decode is not None or args.eval or args.train or args.align, ( 'you need to specify at least one action (decode, eval, align, or train)' ) assert not (args.avg_checkpoints and args.ensemble) if args.purge: utils.log('deleting previous model') shutil.rmtree(config.model_dir, ignore_errors=True) os.makedirs(config.model_dir, exist_ok=True) # copy config file to model directory config_path = os.path.join(config.model_dir, 'config.yaml') if not os.path.exists(config_path): shutil.copy(args.config, config_path) # also copy default config config_path = os.path.join(config.model_dir, 'default.yaml') if not os.path.exists(config_path): shutil.copy('config/default.yaml', config_path) # copy source code to model directory tar_path = os.path.join(config.model_dir, 'code.tar.gz') if not os.path.exists(tar_path): with tarfile.open(tar_path, "w:gz") as tar: for filename in os.listdir('translate'): if filename.endswith('.py'): tar.add(os.path.join('translate', filename), arcname=filename) logging_level = logging.DEBUG if args.verbose else logging.INFO # always log to stdout in decoding and eval modes (to avoid overwriting precious train logs) log_path = os.path.join(config.model_dir, config.log_file) logger = utils.create_logger(log_path if args.train else None) logger.setLevel(logging_level) utils.log('label: {}'.format(config.label)) utils.log('description:\n {}'.format('\n '.join( config.description.strip().split('\n')))) utils.log(' '.join(sys.argv)) # print command line try: # print git hash commit_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip() utils.log('commit hash {}'.format(commit_hash)) except: pass utils.log('tensorflow version: {}'.format(tf.__version__)) # log parameters utils.debug('program arguments') for k, v in sorted(config.items(), key=itemgetter(0)): utils.debug(' {:<20} {}'.format(k, pformat(v))) if isinstance(config.dev_prefix, str): config.dev_prefix = [config.dev_prefix] if config.tasks is not None: config.tasks = [utils.AttrDict(task) for task in config.tasks] tasks = config.tasks else: tasks = [config] for task in tasks: for parameter, value in config.items(): task.setdefault(parameter, value) task.encoders = [utils.AttrDict(encoder) for encoder in task.encoders] task.decoders = [utils.AttrDict(decoder) for decoder in task.decoders] for encoder_or_decoder in task.encoders + task.decoders: for parameter, value in task.items(): encoder_or_decoder.setdefault(parameter, value) device = None if config.no_gpu: device = '/cpu:0' elif config.gpu_id is not None: device = '/gpu:{}'.format(config.gpu_id) utils.log('creating model') utils.log('using device: {}'.format(device)) with tf.device(device): config.checkpoint_dir = os.path.join(config.model_dir, 'checkpoints') if config.weight_scale: if config.initializer == 'uniform': initializer = tf.random_uniform_initializer( minval=-config.weight_scale, maxval=config.weight_scale) else: initializer = tf.random_normal_initializer( stddev=config.weight_scale) else: initializer = None tf.get_variable_scope().set_initializer(initializer) config.decode_only = args.decode is not None or args.eval or args.align # exempt from creating gradient ops if config.tasks is not None: model = MultiTaskModel(**config) else: model = TranslationModel(**config) # count parameters utils.log('model parameters ({})'.format(len(tf.global_variables()))) parameter_count = 0 for var in tf.global_variables(): utils.log(' {} {}'.format(var.name, var.get_shape())) if not var.name.startswith( 'gradients' ): # not counting parameters created by training algorithm (e.g. Adam) v = 1 for d in var.get_shape(): v *= d.value parameter_count += v utils.log('number of parameters: {:.2f}M'.format(parameter_count / 1e6)) tf_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) tf_config.gpu_options.allow_growth = config.allow_growth tf_config.gpu_options.per_process_gpu_memory_fraction = config.mem_fraction def average_checkpoints(main_sess, sessions): for var in tf.global_variables(): avg_value = sum(sess.run(var) for sess in sessions) / len(sessions) main_sess.run(var.assign(avg_value)) with tf.Session(config=tf_config) as sess: best_checkpoint = os.path.join(config.checkpoint_dir, 'best') if ((config.ensemble or config.avg_checkpoints) and (args.eval or args.decode is not None) and len(config.checkpoints) > 1): # create one session for each model in the ensemble sessions = [tf.Session() for _ in config.checkpoints] for sess_, checkpoint in zip(sessions, config.checkpoints): model.initialize(sess_, [checkpoint]) if config.ensemble: sess = sessions else: sess = sessions[0] average_checkpoints(sess, sessions) elif (not config.checkpoints and (args.eval or args.decode is not None or args.align) and (os.path.isfile(best_checkpoint + '.index') or os.path.isfile(best_checkpoint + '.index'))): # in decoding and evaluation mode, unless specified otherwise (by `checkpoints`), # try to load the best checkpoint) model.initialize(sess, [best_checkpoint]) else: # loads last checkpoint, unless `reset` is true model.initialize(sess, **config) if args.decode is not None: model.decode(sess, **config) elif args.eval: model.evaluate(sess, on_dev=False, **config) elif args.align: model.align(sess, **config) elif args.train: try: model.train(sess=sess, **config) except (KeyboardInterrupt, utils.FinishedTrainingException): utils.log('exiting...') model.save(sess) sys.exit()
def train_step(self, steps_per_checkpoint, model_dir, steps_per_eval=None, max_steps=0, max_epochs=0, eval_burn_in=0, decay_if_no_progress=None, decay_after_n_epoch=None, decay_every_n_epoch=None, sgd_after_n_epoch=None, sgd_learning_rate=None, min_learning_rate=None, loss_function='xent', use_baseline=True, **kwargs): if min_learning_rate is not None and self.learning_rate.eval( ) < min_learning_rate: utils.debug('learning rate is too small: stopping') raise utils.FinishedTrainingException if 0 < max_steps <= self.global_step.eval( ) or 0 < max_epochs <= self.epoch.eval(): raise utils.FinishedTrainingException start_time = time.time() if loss_function == 'reinforce': step_function = self.seq2seq_model.reinforce_step else: step_function = self.seq2seq_model.step res = step_function(next(self.batch_iterator), update_model=True, use_sgd=self.training.use_sgd, update_baseline=True) self.training.loss += res.loss self.training.baseline_loss += getattr(res, 'baseline_loss', 0) self.training.time += time.time() - start_time self.training.steps += 1 global_step = self.global_step.eval() epoch = self.epoch.eval() if decay_after_n_epoch is not None and self.batch_size * global_step >= decay_after_n_epoch * self.train_size: if decay_every_n_epoch is not None and ( self.batch_size * (global_step - self.training.last_decay) >= decay_every_n_epoch * self.train_size): self.learning_rate_decay_op.eval() utils.debug(' decaying learning rate to: {:.3g}'.format( self.learning_rate.eval())) self.training.last_decay = global_step if sgd_after_n_epoch is not None and epoch >= sgd_after_n_epoch: if not self.training.use_sgd: utils.debug('epoch {}, starting to use SGD'.format(epoch + 1)) self.training.use_sgd = True if sgd_learning_rate is not None: self.learning_rate.assign(sgd_learning_rate).eval() self.training.last_decay = global_step # reset learning rate decay if steps_per_checkpoint and global_step % steps_per_checkpoint == 0: loss = self.training.loss / self.training.steps baseline_loss = self.training.baseline_loss / self.training.steps step_time = self.training.time / self.training.steps summary = 'step {} epoch {} learning rate {:.3g} step-time {:.3f} loss {:.3f}'.format( global_step, epoch + 1, self.learning_rate.eval(), step_time, loss) if self.name is not None: summary = '{} {}'.format(self.name, summary) if use_baseline and loss_function == 'reinforce': summary = '{} baseline-loss {:.4f}'.format( summary, baseline_loss) utils.log(summary) if decay_if_no_progress and len( self.training.losses) >= decay_if_no_progress: if loss >= max(self.training.losses[:decay_if_no_progress]): self.learning_rate_decay_op.eval() self.training.losses.append(loss) self.training.loss, self.training.time, self.training.steps, self.training.baseline_loss = 0, 0, 0, 0 if steps_per_eval and global_step % steps_per_eval == 0 and 0 <= eval_burn_in <= global_step: eval_dir = 'eval' if self.name is None else 'eval_{}'.format( self.name) eval_output = os.path.join(model_dir, eval_dir) os.makedirs(eval_output, exist_ok=True) # if there are several dev files, we define several output files output = [ os.path.join(eval_output, '{}.{}.out'.format(prefix, global_step)) for prefix in self.dev_prefix ] kwargs_ = dict(kwargs) kwargs_['output'] = output score, *_ = self.evaluate(on_dev=True, **kwargs_) self.training.scores.append((global_step, score)) if steps_per_eval and global_step % steps_per_eval == 0: raise utils.EvalException elif steps_per_checkpoint and global_step % steps_per_checkpoint == 0: raise utils.CheckpointException
def train(self, sess, beam_size, steps_per_checkpoint, steps_per_eval=None, eval_output=None, max_steps=0, max_epochs=0, eval_burn_in=0, decay_if_no_progress=5, decay_after_n_epoch=None, decay_every_n_epoch=None, sgd_after_n_epoch=None, loss_function='xent', baseline_steps=0, reinforce_baseline=True, reward_function=None, use_edits=False, **kwargs): utils.log('reading training and development data') self.global_step = 0 for model in self.models: model.read_data(**kwargs) # those parameters are used to track the progress of each task model.loss, model.time, model.steps = 0, 0, 0 model.baseline_loss = 0 model.previous_losses = [] global_step = model.global_step.eval(sess) model.epoch = model.batch_size * global_step // model.train_size model.last_decay = global_step for _ in range(global_step): # read all the data up to this step next(model.batch_iterator) self.global_step += global_step # pre-train baseline if loss_function == 'reinforce' and baseline_steps > 0 and reinforce_baseline: utils.log('pre-training baseline') for model in self.models: baseline_loss = 0 for step in range(1, baseline_steps + 1): baseline_loss += model.baseline_step( sess, reward_function=reward_function, use_edits=use_edits) if step % steps_per_checkpoint == 0: loss = baseline_loss / steps_per_checkpoint baseline_loss = 0 utils.log('{} step {} baseline loss {:.4f}'.format( model.name, step, loss)) utils.log('starting training') while True: i = np.random.choice(len(self.models), 1, p=self.ratios)[0] model = self.models[i] start_time = time.time() res = model.train_step(sess, loss_function=loss_function, reward_function=reward_function, use_edits=use_edits) model.loss += res.loss if loss_function == 'reinforce': model.baseline_loss += res.baseline_loss model.time += time.time() - start_time model.steps += 1 self.global_step += 1 model_global_step = model.global_step.eval(sess) epoch = model.batch_size * model_global_step / model.train_size model.epoch = int(epoch) + 1 if decay_after_n_epoch is not None and epoch >= decay_after_n_epoch: if decay_every_n_epoch is not None and ( model.batch_size * (model_global_step - model.last_decay) >= decay_every_n_epoch * model.train_size): sess.run(model.learning_rate_decay_op) utils.debug(' decaying learning rate to: {:.4f}'.format( model.learning_rate.eval())) model.last_decay = model_global_step if sgd_after_n_epoch is not None and epoch >= sgd_after_n_epoch: if not model.use_sgd: utils.debug(' epoch {}, starting to use SGD'.format( model.epoch)) model.use_sgd = True if steps_per_checkpoint and self.global_step % steps_per_checkpoint == 0: for model_ in self.models: if model_.steps == 0: continue loss_ = model_.loss / model_.steps step_time_ = model_.time / model_.steps if loss_function == 'reinforce': baseline_loss_ = ' baseline loss {:.4f}'.format( model_.baseline_loss / model_.steps) model_.baseline_loss = 0 else: baseline_loss_ = '' utils.log( '{} step {} epoch {} learning rate {:.4f} step-time {:.4f}{} loss {:.4f}' .format(model_.name, model_.global_step.eval(sess), model.epoch, model_.learning_rate.eval(), step_time_, baseline_loss_, loss_)) if decay_if_no_progress and len( model_.previous_losses) >= decay_if_no_progress: if loss_ >= max( model_.previous_losses[:decay_if_no_progress]): sess.run(model_.learning_rate_decay_op) model_.previous_losses.append(loss_) model_.loss, model_.time, model_.steps = 0, 0, 0 model_.eval_step(sess) self.save(sess) if steps_per_eval and self.global_step % steps_per_eval == 0 and 0 <= eval_burn_in <= self.global_step: score = 0 for ratio, model_ in zip(self.ratios, self.models): if eval_output is None: output = None elif len(model_.filenames.dev) > 1: # if there are several dev files, we define several output files # TODO: put dev_prefix into the name of the output file (also in the logging output) output = [ '{}.{}.{}.{}'.format(eval_output, i + 1, model_.name, model_.global_step.eval(sess)) for i in range(len(model_.filenames.dev)) ] else: output = '{}.{}.{}'.format( eval_output, model_.name, model_.global_step.eval(sess)) # kwargs_ = {**kwargs, 'output': output} kwargs_ = dict(kwargs) kwargs_['output'] = output scores_ = model_.evaluate(sess, beam_size, on_dev=True, use_edits=use_edits, **kwargs_) score_ = scores_[ 0] # in case there are several dev files, only the first one counts # if there is a main task, pick best checkpoint according to its score # otherwise use the average score across tasks if self.main_task is None: score += ratio * score_ elif model_.name == self.main_task: score = score_ self.manage_best_checkpoints(self.global_step, score) if 0 < max_steps <= self.global_step or 0 < max_epochs <= epoch: utils.log('finished training') # TODO: save models return
def evaluate(self, score_functions, on_dev=True, output=None, remove_unk=False, max_dev_size=None, raw_output=False, fix_edits=True, max_test_size=None, post_process_script=None, unk_replace=False, **kwargs): """ Decode a dev or test set, and perform evaluation with respect to gold standard, using the provided scoring function. If `output` is defined, also save the decoding output to this file. When evaluating development data (`on_dev` to True), several dev sets can be specified (`dev_prefix` parameter in configuration files), and a score is computed for each of them. :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score') :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus :param output: save the hypotheses to this file :param remove_unk: remove the UNK symbols from the output :param max_dev_size: maximum number of lines to read from dev files :param max_test_size: maximum number of lines to read from test files :param raw_output: save raw decoder output (don't do post-processing like UNK deletion or subword concatenation). The evaluation is still done with the post-processed output. :param fix_edits: when predicting edit operations, pad shorter hypotheses with KEEP symbols. :return: scores of each corpus to evaluate """ utils.log('starting evaluation') if on_dev: filenames = self.filenames.dev else: filenames = [self.filenames.test] # convert `output` into a list, for zip if isinstance(output, str): output = [output] elif output is None: output = [None] * len(filenames) scores = [] # evaluation on multiple corpora for dev_id, (filenames_, output_, prefix) in enumerate( zip(filenames, output, self.dev_prefix)): if self.ref_ext is not None: filenames_ = filenames_[:len(self.src_ext)] + filenames_[-1:] if self.dev_batches: dev_batches = self.dev_batches[dev_id] dev_loss = sum( self.seq2seq_model.step(batch, update_model=False).loss * len(batch) for batch in dev_batches) dev_loss /= sum(map(len, dev_batches)) else: # TODO dev_loss = 0 src_lines = list( utils.read_lines(filenames_[:len(self.src_ext)], binary=self.binary[:len(self.src_ext)])) trg_lines = list(utils.read_lines([filenames_[len(self.src_ext)]])) assert len(trg_lines) % len(src_lines) == 0 references = [] ref_count = len(trg_lines) // len(src_lines) for i in range(len(src_lines)): ref = trg_lines[i * ref_count:(i + 1) * ref_count] ref = [ ref_[0].strip().replace('@@ ', '').replace('@@', '') for ref_ in ref ] references.append(ref) if on_dev and max_dev_size: max_size = max_dev_size elif not on_dev and max_test_size: max_size = max_test_size else: max_size = len(src_lines) src_lines = src_lines[:max_size] references = references[:max_size] hypotheses = [] output_file = None try: if output_ is not None: output_file = open(output_, 'w') hypothesis_iter = self.decode_batch(src_lines, self.batch_size, remove_unk=remove_unk, fix_edits=fix_edits, unk_replace=unk_replace) if post_process_script is not None: hypotheses, raw = zip(*hypothesis_iter) data = '\n'.join(hypotheses).encode() data = Popen( [post_process_script], stdout=PIPE, stdin=PIPE).communicate(input=data)[0].decode() hypotheses = data.splitlines() hypothesis_iter = zip(hypotheses, raw) for i, hypothesis in enumerate(hypothesis_iter): hypothesis, raw = hypothesis hypotheses.append(hypothesis) if output_file is not None: if raw_output: hypothesis = raw output_file.write(hypothesis + '\n') output_file.flush() finally: if output_file is not None: output_file.close() scores_ = [] summary = None for score_function in score_functions: try: if score_function != 'bleu': references_ = [ref[0] for ref in references] else: references_ = references if score_function == 'loss': score = dev_loss reversed_ = True else: fun = getattr(evaluation, 'corpus_' + score_function) try: reversed_ = fun.reversed except AttributeError: reversed_ = False score, score_summary = fun(hypotheses, references_) summary = summary or score_summary scores_.append((score_function, score, reversed_)) except: pass score_info = [ '{}={:.2f}'.format(key, value) for key, value, _ in scores_ ] score_info.insert(0, prefix) if summary: score_info.append(summary) if self.name is not None: score_info.insert(0, self.name) utils.log(' '.join(map(str, score_info))) # main score _, score, reversed_ = scores_[0] scores.append(-score if reversed_ else score) return scores
def load_checkpoint(sess, checkpoint_dir, filename=None, blacklist=()): """ if `filename` is None, we load last checkpoint, otherwise we ignore `checkpoint_dir` and load the given checkpoint file. """ if filename is None: # load last checkpoint ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt is not None: filename = ckpt.model_checkpoint_path else: checkpoint_dir = os.path.dirname(filename) var_file = os.path.join(checkpoint_dir, 'vars.pkl') def get_variable_by_name(name): for var in tf.global_variables(): if var.name == name: return var return None if os.path.exists(var_file): with open(var_file, 'rb') as f: var_names = pickle.load(f) variables = {} for var_name in var_names: skip = False for var in tf.global_variables(): name = var.name for key, value in reverse_mapping: name = re.sub(key, value, name) if var_name == name: variables[var_name] = var skip = True break if skip: continue name = var_name for key, value in variable_mapping: name = re.sub(key, value, name) for var in tf.global_variables(): if var.name == name: variables[var_name] = var break else: variables = {var.name: var for var in tf.global_variables()} # remove variables from blacklist # variables = [var for var in variables if not any(prefix in var.name for prefix in blacklist)] variables = { name[:-2]: var for name, var in variables.items() if not any(prefix in name for prefix in blacklist) } if filename is not None: utils.log('reading model parameters from {}'.format(filename)) tf.train.Saver(variables).restore(sess, filename) utils.debug('retrieved parameters ({})'.format(len(variables))) for var in sorted(variables.values(), key=lambda var: var.name): utils.debug(' {} {}'.format(var.name, var.get_shape()))
def main(args=None): args = parser.parse_args(args) # read config file and default config with open('config/default.yaml') as f: default_config = utils.AttrDict(yaml.safe_load(f)) with open(args.config) as f: config = utils.AttrDict(yaml.safe_load(f)) if args.learning_rate is not None: args.reset_learning_rate = True # command-line parameters have higher precedence than config file for k, v in vars(args).items(): if v is not None: config[k] = v # set default values for parameters that are not defined for k, v in default_config.items(): config.setdefault(k, v) # enforce parameter constraints assert config.steps_per_eval % config.steps_per_checkpoint == 0, ( 'steps-per-eval should be a multiple of steps-per-checkpoint') assert args.decode is not None or args.eval or args.train or args.align, ( 'you need to specify at least one action (decode, eval, align, or train)' ) if args.purge: utils.log('deleting previous model') shutil.rmtree(config.model_dir, ignore_errors=True) logging_level = logging.DEBUG if args.verbose else logging.INFO # always log to stdout in decoding and eval modes (to avoid overwriting precious train logs) logger = utils.create_logger(config.log_file if args.train else None) logger.setLevel(logging_level) utils.log(' '.join(sys.argv)) # print command line try: # print git hash commit_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip() utils.log('commit hash {}'.format(commit_hash)) except: pass # list of encoder and decoder parameter names (each encoder and decoder can have a different value # for those parameters) model_parameters = [ 'cell_size', 'layers', 'vocab_size', 'embedding_size', 'attention_filters', 'attention_filter_length', 'use_lstm', 'time_pooling', 'attention_window_size', 'dynamic', 'binary', 'character_level', 'bidir', 'load_embeddings', 'pooling_avg', 'swap_memory', 'parallel_iterations', 'input_layers', 'residual_connections', 'attn_size' ] # TODO: independent model dir for each task task_parameters = [ 'data_dir', 'train_prefix', 'dev_prefix', 'vocab_prefix', 'ratio', 'lm_file', 'learning_rate', 'learning_rate_decay_factor', 'max_input_len', 'max_output_len', 'encoders', 'decoder' ] # in case no task is defined (standard mono-task settings), define a "main" task config.setdefault('tasks', [{ 'encoders': config.encoders, 'decoder': config.decoder, 'name': 'main', 'ratio': 1.0 }]) config.tasks = [utils.AttrDict(task) for task in config.tasks] for task in config.tasks: for parameter in task_parameters: task.setdefault(parameter, config.get(parameter)) if isinstance(task.dev_prefix, str): # for back-compatibility with old config files task.dev_prefix = [task.dev_prefix] # convert dicts to AttrDicts for convenience task.encoders = [utils.AttrDict(encoder) for encoder in task.encoders] task.decoder = utils.AttrDict(task.decoder) for encoder_or_decoder in task.encoders + [task.decoder]: # move parameters all the way up from base level to encoder/decoder level: # default values for encoder/decoder parameters can be defined at the task level and base level # default values for tasks can be defined at the base level for parameter in model_parameters: if parameter in encoder_or_decoder: continue elif parameter in task: encoder_or_decoder[parameter] = task[parameter] else: encoder_or_decoder[parameter] = config.get(parameter) # log parameters utils.log('program arguments') for k, v in sorted(config.items(), key=itemgetter(0)): if k == 'tasks': utils.log(' {:<20}\n{}'.format(k, pformat(v))) elif k not in model_parameters and k not in task_parameters: utils.log(' {:<20} {}'.format(k, pformat(v))) device = None if config.no_gpu: device = '/cpu:0' elif config.gpu_id is not None: device = '/gpu:{}'.format(config.gpu_id) utils.log('creating model') utils.log('using device: {}'.format(device)) with tf.device(device): checkpoint_dir = os.path.join(config.model_dir, 'checkpoints') # All parameters except recurrent connexions and attention parameters are initialized with this. # Recurrent connexions are initialized with orthogonal matrices, and the parameters of the attention model # with a standard deviation of 0.001 if config.weight_scale: initializer = tf.random_normal_initializer( stddev=config.weight_scale) else: initializer = None tf.get_variable_scope().set_initializer(initializer) decode_only = args.decode is not None or args.eval or args.align # exempt from creating gradient ops model = MultiTaskModel(name='main', checkpoint_dir=checkpoint_dir, decode_only=decode_only, **config) utils.log('model parameters ({})'.format(len(tf.global_variables()))) parameter_count = 0 for var in tf.global_variables(): utils.log(' {} {}'.format(var.name, var.get_shape())) v = 1 for d in var.get_shape(): v *= d.value parameter_count += v utils.log('number of parameters: {}'.format(parameter_count)) tf_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) tf_config.gpu_options.allow_growth = config.allow_growth tf_config.gpu_options.per_process_gpu_memory_fraction = config.mem_fraction with tf.Session(config=tf_config) as sess: best_checkpoint = os.path.join(checkpoint_dir, 'best') if config.ensemble and (args.eval or args.decode is not None): # create one session for each model in the ensemble sess = [tf.Session() for _ in config.checkpoints] for sess_, checkpoint in zip(sess, config.checkpoints): model.initialize(sess_, [checkpoint], reset=True) elif (not config.checkpoints and (args.eval or args.decode is not None or args.align) and (os.path.isfile(best_checkpoint + '.index') or os.path.isfile(best_checkpoint + '.index'))): # in decoding and evaluation mode, unless specified otherwise (by `checkpoints`), # try to load the best checkpoint) model.initialize(sess, [best_checkpoint], reset=True) else: # loads last checkpoint, unless `reset` is true model.initialize(sess, **config) # Inspect variables: # tf.get_variable_scope().reuse_variables() # import pdb; pdb.set_trace() if args.decode is not None: model.decode(sess, **config) elif args.eval: model.evaluate(sess, on_dev=False, **config) elif args.align: model.align(sess, **config) elif args.train: eval_output = os.path.join(config.model_dir, 'eval') try: model.train(sess, eval_output=eval_output, **config) except KeyboardInterrupt: utils.log('exiting...') model.save(sess) sys.exit()
def evaluate(self, score_function, on_dev=True, output=None, remove_unk=False, max_dev_size=None, raw_output=False, fix_edits=True, max_test_size=None, post_process_script=None, **kwargs): """ Decode a dev or test set, and perform evaluation with respect to gold standard, using the provided scoring function. If `output` is defined, also save the decoding output to this file. When evaluating development data (`on_dev` to True), several dev sets can be specified (`dev_prefix` parameter in configuration files), and a score is computed for each of them. :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score') :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus :param output: save the hypotheses to this file :param remove_unk: remove the UNK symbols from the output :param max_dev_size: maximum number of lines to read from dev files :param max_test_size: maximum number of lines to read from test files :param raw_output: save raw decoder output (don't do post-processing like UNK deletion or subword concatenation). The evaluation is still done with the post-processed output. :param fix_edits: when predicting edit operations, pad shorter hypotheses with KEEP symbols. :return: scores of each corpus to evaluate """ utils.log('starting decoding') if on_dev: filenames = self.filenames.dev else: filenames = [self.filenames.test] # convert `output` into a list, for zip if isinstance(output, str): output = [output] elif output is None: output = [None] * len(filenames) scores = [] for filenames_, output_, prefix in zip( filenames, output, self.dev_prefix): # evaluation on multiple corpora extensions = list(self.extensions) if self.ref_ext is not None: extensions.append(self.ref_ext) lines = list(utils.read_lines(filenames_, binary=self.binary)) if on_dev and max_dev_size: lines = lines[:max_dev_size] elif not on_dev and max_test_size: lines = lines[:max_test_size] hypotheses = [] references = [] output_file = None try: if output_ is not None: output_file = open(output_, 'w') lines_ = list(zip(*lines)) src_sentences = list(zip(*lines_[:len(self.src_ext)])) trg_sentences = list(zip(*lines_[len(self.src_ext):])) hypothesis_iter = self.decode_batch(lines, self.batch_size, remove_unk=remove_unk, fix_edits=fix_edits) for i, (sources, hypothesis, reference) in enumerate( zip(src_sentences, hypothesis_iter, trg_sentences)): if self.ref_ext is not None and on_dev: reference = reference[-1] else: reference = reference[0] # single output for now hypothesis, raw = hypothesis hypotheses.append(hypothesis) references.append(reference.strip().replace('@@ ', '')) if output_file is not None: if raw_output: hypothesis = raw output_file.write(hypothesis + '\n') output_file.flush() finally: if output_file is not None: output_file.close() if post_process_script is not None: data = '\n'.join(hypotheses).encode() data = Popen([post_process_script], stdout=PIPE, stdin=PIPE).communicate(input=data)[0].decode() hypotheses = data.splitlines() # default scoring function is utils.bleu_score score, score_summary = getattr(evaluation, score_function)(hypotheses, references) # print scoring information score_info = [prefix, 'score={:.2f}'.format(score)] if score_summary: score_info.append(score_summary) if self.name is not None: score_info.insert(0, self.name) utils.log(' '.join(map(str, score_info))) scores.append(score) return scores
def decode_batch(self, sentence_tuples, batch_size, remove_unk=False, fix_edits=True, unk_replace=False, align=False, reverse=False, output=None): utils.log("start decode batch") if batch_size == 1: batches = ([sentence_tuple] for sentence_tuple in sentence_tuples) # lazy else: batch_count = int(math.ceil(len(sentence_tuples) / batch_size)) batches = [sentence_tuples[i * batch_size:(i + 1) * batch_size] for i in range(batch_count)] def map_to_ids(sentence_tuple): token_ids = [ sentence if vocab is None else utils.sentence_to_token_ids(sentence, vocab.vocab, character_level=self.character_level.get(ext)) for ext, vocab, sentence in zip(self.extensions, self.vocabs, sentence_tuple) ] return token_ids line_id = 0 for batch_id, batch in enumerate(batches): token_ids = list(map(map_to_ids, batch)) batch_token_ids, batch_weights = self.seq2seq_model.greedy_decoding(token_ids, align=unk_replace or align) utils.log("batch_token_ids") utils.log(batch_token_ids) utils.log(len(batch_token_ids)) utils.log(len(batch_token_ids[0])) utils.log(len(batch_token_ids[0][0])) utils.log(len(batch_token_ids[0][0][0])) batch_token_ids = zip(*batch_token_ids) for sentence_id, (src_tokens, trg_token_ids) in enumerate(zip(batch, batch_token_ids)): # trg_token_ids, shape(64,10,50), [[[....50num....],[....50num....],[....50num....],....,[....50num....]]] line_id += 1 trg_tokens = [] # for single_trg_token_id in trg_token_ids: # single_trg_token_id, shape(50), [....50num....] for trg_token_ids_, vocab in zip(trg_token_ids, self.trg_vocab): # trg_token_ids_, shape(10,50) top_10_trg_tokens = [] for single_trg_token_ids in trg_token_ids_: # single_trg_token_ids, [,,,,,,,] 50 nums single_trg_token_ids = list(single_trg_token_ids) if utils.EOS_ID in single_trg_token_ids: single_trg_token_ids = single_trg_token_ids[:single_trg_token_ids.index(utils.EOS_ID)] single_trg_token_ids = [vocab.reverse[i] if i < len(vocab.reverse) else utils._UNK for i in single_trg_token_ids] top_10_trg_tokens.append(single_trg_token_ids) # trg_token_ids_ = list(trg_token_ids_) # from np array to list # if utils.EOS_ID in trg_token_ids_: # trg_token_ids_ = trg_token_ids_[:trg_token_ids_.index(utils.EOS_ID)] # # trg_tokens_ = [vocab.reverse[i] if i < len(vocab.reverse) else utils._UNK # for i in trg_token_ids_] # trg_tokens.append(trg_tokens_) trg_tokens.append(top_10_trg_tokens) # trg_tokens, shape(64, 10, ?) # beam_trg_tokens.append(trg_tokens) # trg_tokens = [] if align: weights_ = batch_weights[sentence_id].squeeze() max_len_ = weights_.shape[1] src_tokens_ = src_tokens[0].split()[:max_len_ - 1] + [utils._EOS] src_tokens_ = [token if token in self.src_vocab[0].vocab else utils._UNK for token in src_tokens_] trg_tokens_ = trg_tokens[0][0][:weights_.shape[0] - 1] + [utils._EOS] weights_ = weights_[:len(trg_tokens_), :len(src_tokens_)] output_file = output and '{}.{}.pdf'.format(output, line_id) utils.heatmap(src_tokens_, trg_tokens_, weights_, reverse=reverse, output_file=output_file) if unk_replace: weights = batch_weights[sentence_id] src_words = src_tokens[0].split() align_ids = np.argmax(weights[:, :len(src_words)], axis=1) def replace(token, align_id): if token == utils._UNK: token = src_words[align_id] if not token[0].isupper() and self.lexicon is not None and token in self.lexicon: token = self.lexicon[token] return token for i in range(len(trg_tokens[0])): trg_tokens[0][i] = [replace(token, align_id) for align_id, token in zip(align_ids, trg_tokens[0][i])] ######################################################################### if self.pred_edits: # first output is ops, second output is words raw_hypothesis = ' '.join('_'.join(tokens) for tokens in zip(*trg_tokens)) src_words = src_tokens[0].split() trg_tokens = utils.reverse_edits(src_words, trg_tokens, fix=fix_edits) trg_tokens = [token for token in trg_tokens if token not in utils._START_VOCAB] # FIXME: char-level else: trg_tokens = trg_tokens[0] raw_hypothesis = [] for single_trg_tokens in trg_tokens: single_raw_hypothesis = ''.join(single_trg_tokens) if self.char_output else ' '.join( single_trg_tokens) raw_hypothesis.append(single_raw_hypothesis) # raw_hypothesis = ''.join(trg_tokens) if self.char_output else ' '.join(trg_tokens) if remove_unk: for i in range(len(trg_tokens)): trg_tokens[i] = [token for token in trg_tokens[i] if token != utils._UNK] if self.char_output: hypothesis = [] for i in range(len(trg_tokens)): hypothesis.append(''.join(trg_tokens[i])) # hypothesis = ''.join(trg_tokens) else: hypothesis = [] for i in range(len(trg_tokens)): hypothesis.append(' '.join(trg_tokens[i]).replace('@@ ', '')) # hypothesis = ' '.join(trg_tokens).replace('@@ ', '') # merge subwords units yield hypothesis, raw_hypothesis