def main(): args = parse() # Load a conf file dir_name = os.path.dirname(args.recog_model[0]) conf = load_config(os.path.join(dir_name, 'conf.yml')) # Overwrite conf for k, v in conf.items(): if 'recog' not in k: setattr(args, k, v) # Setting for logging if os.path.isfile(os.path.join(args.recog_dir, 'decode.log')): os.remove(os.path.join(args.recog_dir, 'decode.log')) logger = set_logger(os.path.join(args.recog_dir, 'decode.log'), key='decoding') ppl_avg = 0 for i, s in enumerate(args.recog_sets): # Load dataset dataset = Dataset(corpus=args.corpus, tsv_path=s, dict_path=os.path.join(dir_name, 'dict.txt'), wp_model=os.path.join(dir_name, 'wp.model'), unit=args.unit, batch_size=args.recog_batch_size, bptt=args.bptt, serialize=args.serialize, is_test=True) if i == 0: # Load the LM if args.lm_type == 'gated_cnn': model = GatedConvLM(args) else: model = RNNLM(args) model, checkpoint = load_checkpoint(model, args.recog_model[0]) epoch = checkpoint['epoch'] model.save_path = dir_name logger.info('epoch: %d' % (epoch - 1)) logger.info('batch size: %d' % args.recog_batch_size) # logger.info('recog unit: %s' % args.recog_unit) # logger.info('ensemble: %d' % (len(ensemble_models))) logger.info('BPTT: %d' % (args.bptt)) logger.info('cache size: %d' % (args.recog_n_caches)) logger.info('cache theta: %.3f' % (args.recog_cache_theta)) logger.info('cache lambda: %.3f' % (args.recog_cache_lambda)) model.cache_theta = args.recog_cache_theta model.cache_lambda = args.recog_cache_lambda # GPU setting model.cuda() start_time = time.time() # TODO(hirofumi): ensemble ppl, _ = eval_ppl([model], dataset, batch_size=1, bptt=args.bptt, n_caches=args.recog_n_caches, progressbar=True) ppl_avg += ppl print('PPL (%s): %.2f' % (dataset.set, ppl)) logger.info('Elasped time: %.2f [sec]:' % (time.time() - start_time)) logger.info('PPL (avg.): %.2f\n' % (ppl_avg / len(args.recog_sets)))
def main(): args = parse() # Load a conf file dir_name = os.path.dirname(args.recog_model[0]) conf = load_config(os.path.join(dir_name, 'conf.yml')) # Overwrite conf for k, v in conf.items(): if 'recog' not in k: setattr(args, k, v) # Setting for logging if os.path.isfile(os.path.join(args.recog_dir, 'plot.log')): os.remove(os.path.join(args.recog_dir, 'plot.log')) logger = set_logger(os.path.join(args.recog_dir, 'plot.log'), key='decoding') for i, s in enumerate(args.recog_sets): # Load dataset dataset = Dataset(corpus=args.corpus, tsv_path=s, dict_path=os.path.join(dir_name, 'dict.txt'), wp_model=os.path.join(dir_name, 'wp.model'), unit=args.unit, batch_size=args.recog_batch_size, bptt=args.bptt, serialize=args.serialize, is_test=True) if i == 0: # Load the LM if args.lm_type == 'gated_cnn': model = GatedConvLM(args) else: model = RNNLM(args) epoch = model.load_checkpoint(args.recog_model[0])['epoch'] model.save_path = dir_name logger.info('epoch: %d' % (epoch - 1)) logger.info('batch size: %d' % args.recog_batch_size) # logger.info('recog unit: %s' % args.recog_unit) # logger.info('ensemble: %d' % (len(ensemble_models))) logger.info('BPTT: %d' % (args.bptt)) logger.info('cache size: %d' % (args.recog_n_caches)) logger.info('cache theta: %.3f' % (args.recog_cache_theta)) logger.info('cache lambda: %.3f' % (args.recog_cache_lambda)) model.cache_theta = args.recog_cache_theta model.cache_lambda = args.recog_cache_lambda # GPU setting model.cuda() assert args.recog_n_caches > 0 save_path = mkdir_join(args.recog_dir, 'cache') # Clean directory if save_path is not None and os.path.isdir(save_path): shutil.rmtree(save_path) os.mkdir(save_path) if args.unit == 'word': idx2token = dataset.idx2word elif args.unit == 'wp': idx2token = dataset.idx2wp elif args.unit == 'char': idx2token = dataset.idx2char elif args.unit == 'phone': idx2token = dataset.idx2phone else: raise NotImplementedError(args.unit) hidden = None fig_count = 0 toknen_count = 0 n_tokens = args.recog_n_caches while True: ys, is_new_epoch = dataset.next() for t in range(ys.shape[1] - 1): loss, hidden = model(ys[:, t:t + 2], hidden, is_eval=True, n_caches=args.recog_n_caches)[:2] if len(model.cache_attn) > 0: if toknen_count == n_tokens: tokens_keys = idx2token( model.cache_ids[:args.recog_n_caches], return_list=True) tokens_query = idx2token(model.cache_ids[-n_tokens:], return_list=True) # Slide attention matrix n_keys = len(tokens_keys) n_queries = len(tokens_query) cache_probs = np.zeros( (n_keys, n_queries)) # `[n_keys, n_queries]` mask = np.zeros((n_keys, n_queries)) for i, aw in enumerate(model.cache_attn[-n_tokens:]): cache_probs[:(n_keys - n_queries + i + 1), i] = aw[0, -(n_keys - n_queries + i + 1):] mask[(n_keys - n_queries + i + 1):, i] = 1 plot_cache_weights(cache_probs, keys=tokens_keys, queries=tokens_query, save_path=mkdir_join( save_path, str(fig_count) + '.png'), figsize=(40, 16), mask=mask) toknen_count = 0 fig_count += 1 else: toknen_count += 1 if is_new_epoch: break