def __init__(self, opt, shared=None): super().__init__(opt, shared) if shared: self.probs = shared['probs'] else: # default minimum probability mass for all tokens self.probs = {k: 1e-7 for k in build_dict().keys()}
def eval_ppl(opt): """Evaluates the the perplexity and f1 of a model (and hits@1 if model has ranking enabled. """ dict_agent = build_dict() # create agents agent = create_agent(opt) world = create_task(opt, [agent, dict_agent], default_world=PerplexityWorld) world.dict = dict_agent # set up logging log_time = Timer() tot_time = 0 while not world.epoch_done(): world.parley() # process an example if log_time.time() > 1: # log every 1 sec tot_time += log_time.time() report = world.report() print('{}s elapsed, {}%% complete, {}'.format( int(tot_time), round_sigfigs(report['total'] / world.num_examples() * 100, 2), report)) log_time.reset() if world.epoch_done(): print('EPOCH DONE') tot_time += log_time.time() final_report = world.report() print('{}s elapsed: {}'.format(int(tot_time), final_report))
def __init__(self, opt, shared=None): super(TransformerAgentPpl, self).__init__(opt, shared) if shared: self.prefix2words = shared['prefix2words'] else: print( "Build prefix conversion map between convai dict and our bpe dict" ) convai_dict = build_dict() assert len(convai_dict) == 19304 self.prefix2words = self.vocab.get_prefix2words(convai_dict)
def __init__(self, opt, shared=None): super().__init__(opt, shared) if not shared: # build official eval dictionary self.dict = build_dict() else: # only build dict once self.dict = shared['dict'] max_freq = self.dict.max_freq() # set probability of each word, skipping the invalid words like __NULL__ # (which have frequency more than max_freq) self.freqs = {k: f for k, f in self.dict.freqs().items() if f <= max_freq}
def __init__(self, opt, shared=None): super(TransformerAgent, self).__init__(opt, shared) args = AttrDict( opt) # to keep most commands identical to the interact.py script self.args = args logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__file__) self.logger.info(pformat(args)) random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) if shared is None: self.logger.info("Get pretrained model and tokenizer") if args.model_checkpoint == "": args.model_checkpoint = download_pretrained_model() if 'gpt2' in args.model_checkpoint: self.tokenizer = GPT2Tokenizer.from_pretrained( args.model_checkpoint) model_class = GPT2DoubleHeadsModel if self.args.eval_type == "hits@1" else GPT2LMHeadModel else: self.tokenizer = OpenAIGPTTokenizer.from_pretrained( args.model_checkpoint) model_class = OpenAIGPTDoubleHeadsModel if self.args.eval_type == "hits@1" else OpenAIGPTLMHeadModel self.model_checkpoint = model_class.from_pretrained( args.model_checkpoint) self.model_checkpoint.to(args.device) self.logger.info("Build BPE prefix dictionary") convai_dict = build_dict() assert len(convai_dict) == 19304 self.prefix2words = self.get_prefix2words(convai_dict) else: self.model_checkpoint = shared['model'] self.tokenizer = shared['tokenizer'] self.prefix2words = shared['prefix2words'] add_special_tokens_(self.model_checkpoint, self.tokenizer) self.special_tokens_ids = self.tokenizer.convert_tokens_to_ids( SPECIAL_TOKENS) self.persona = [] self.persona1 = [] self.persona2 = [] self.history = [] self.labels = [] self.reset()
def __init__(self, opt, shared=None): super().__init__(opt, shared) if not shared: # build official eval dictionary self.dict = build_dict() else: # only build dict once self.dict = shared['dict'] # import ipdb;ipdb.set_trace() max_freq = self.dict.max_freq() # set probability of each word, skipping the invalid words like __NULL__ # (which have frequency more than max_freq) self.freqs = { k: f for k, f in self.dict.freqs().items() if f <= max_freq } self.persona = '' self.historical_utterances = '' self.this_turn_history = '' self.next_turn_history = '' self.this_thread_id = str(random.randint(100, 100000))
def eval_ppl(opt): """Evaluates the the perplexity and f1 of a model (and hits@1 if model has ranking enabled. """ dict_agent = build_dict() # create agents agent = create_agent(opt) world = create_task(opt, [agent, dict_agent], default_world=PerplexityWorld) world.dict = dict_agent # set up logging log_time = Timer() tot_time = 0 while not world.epoch_done(): world.parley() # process an example if log_time.time() > 1: # log every 1 sec tot_time += log_time.time() report = world.report() print('{}s elapsed, {}%% complete, {}'.format( int(tot_time), round_sigfigs(report['total'] / world.num_examples() * 100, 3), report)) log_time.reset() if world.epoch_done(): print('EPOCH DONE') tot_time += log_time.time() final_report = world.report() print('{}s elapsed: {}'.format(int(tot_time), final_report)) print("============================") print("FINAL PPL: " + str(final_report['ppl'])) if final_report.get('ppl', 0) == float('inf'): print('Note: you got inf perplexity. Consider adding (or raising) the ' 'minimum probability you assign to each possible word. If you ' 'assign zero probability to the correct token in the evaluation ' 'vocabulary, you get inf probability immediately.')
parser = setup_args() parser.set_params( model='transformer', task='convai2:self', external_dict=DICT_FILE, #model_file='models:convai2/transformer/convai2_self_transformer_model', #dict_file='models:convai2/transformer/convai2_self_transformer_model.dict', model_file= './checkpoints/convai2_transformer_volta_[l=4,h=2,dw=256,dm=256,di=2048,dk=64,dv=64,src_tgt_share=False,tgt_prj=False,smooth=False]', dict_file= './checkpoints/convai2_transformer_volta_[l=4,h=2,dw=256,dm=256,di=2048,dk=64,dv=64,src_tgt_share=False,tgt_prj=False,smooth=False].dict', dict_lower=True, batchsize=1, numthreads=1, ) opt = parser.parse_args(print_args=False) if opt.get('model_file', '').find( 'convai2/transformer/convai2_self_transformer_model') != -1: opt['model_type'] = 'transformer' #fnames = ['convai2_self_transformer_model.tgz', # 'convai2_self_transformer_model.dict', # 'convai2_self_transformer_model.opt'] fnames = [ 'convai2_transformer_volta_[l=4,h=2,dw=256,dm=256,di=2048,dk=64,dv=64,src_tgt_share=False,tgt_prj=False,smooth=False].tgz', 'convai2_transformer_volta_[l=4,h=2,dw=256,dm=256,di=2048,dk=64,dv=64,src_tgt_share=False,tgt_prj=False,smooth=False].dict' 'convai2_transformer_volta_[l=4,h=2,dw=256,dm=256,di=2048,dk=64,dv=64,src_tgt_share=False,tgt_prj=False,smooth=False].opt' ] download_models(opt, fnames, 'convai2', version='v3.0') build_dict() # make sure true dictionary is built eval_wordstat(opt, print_parser=parser)
def __init__(self, opt, shared=None): super(TransformerAgent, self).__init__(opt, shared) args = AttrDict(opt) # to keep most commands identical to the interact.py script self.args = args logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(__file__) self.logger.info(pformat(args)) random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) if shared is None: self.logger.info("Get pretrained model and tokenizer") if args.model_checkpoint == "": args.model_checkpoint = download_pretrained_model() self.tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_checkpoint) if self.args.eval_type == "hits@1": self.model_checkpoint = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_checkpoint) else: self.model_checkpoint = OpenAIGPTLMHeadModel.from_pretrained(args.model_checkpoint) self.model_checkpoint.to(args.device) self.model_checkpoint.eval() self.logger.info("Build BPE prefix dictionary") convai_dict = build_dict() assert len(convai_dict) == 19304 self.prefix2words = self.get_prefix2words(convai_dict) else: self.model_checkpoint = shared['model'] self.tokenizer = shared['tokenizer'] self.prefix2words = shared['prefix2words'] self.special_tokens_ids = self.tokenizer.convert_tokens_to_ids(SPECIAL_TOKENS) self.persona = [] self.history = [] self.labels = [] self.reward = [] self.nli_scores = np.array([0, 0, 0]) self.reward_scores = 0 # reward function self.c_scores = 0 # C score self.cnm = 0 # C_new self.sample_num = 0 # sample number self.con_en = np.array([0, 0, 0]) # if the persona contains a contradicted/entail profile (not applied) self.intrep_scores = 0 # internal repetition score self.lm_ppl_scores = 0 # fine-tuned GPT-based language model self.bleu_scores = 0 # BLEU-2 score # Loading NLI models reset_seed(args.seed) self.nli_tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) output_config_file = os.path.join(args.output_dir, CONFIG_NAME) # print('config_file:', output_config_file) output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) # print('model_file:', output_model_file) nli_config = BertConfig(output_config_file) self.nli_model = BertForSequenceClassification(nli_config, num_labels=3) self.nli_model.load_state_dict(torch.load(output_model_file)) self.nli_model.to(args.device) self.nli_model.eval() # Loading LM models reset_seed(args.seed) self.lm_special_tokens = ['_start_', '_delimiter_', '_classify_'] # special tokens for LM # Load pre-trained model (weights) with torch.no_grad(): lm_output_config_file = os.path.join(args.lm_output_dir, CONFIG_NAME) lm_config = OpenAIGPTConfig(lm_output_config_file) print(type(lm_config)) if not isinstance(lm_config, OpenAIGPTConfig): print('NOT') lm_output_model_file = os.path.join(args.lm_output_dir, WEIGHTS_NAME) lm_model_state_dict = torch.load(lm_output_model_file) self.lm_model = OpenAIGPTLMHeadModel(lm_config) self.lm_model.load_state_dict(lm_model_state_dict) # Load pre-trained model tokenizer (vocabulary) self.lm_tokenizer = OpenAIGPTTokenizer.from_pretrained(args.lm_model_path, special_tokens=self.lm_special_tokens) self.special_tokens_ids = list(self.lm_tokenizer.convert_tokens_to_ids(token) for token in self.lm_special_tokens) self.lm_model.to(args.device) self.lm_model.eval() reset_seed(args.seed) self.reset()