Example #1
0
    def test_beamsearch_return_all_texts(self):
        """
        Test beam_texts for beam_size > 1.
        """
        size = 3

        agent = create_agent_from_model_file(
            'zoo:unittest/beam_blocking/model',
            opt_overrides={
                "beam_size": size,
                "inference": "beam"
            },
        )
        agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
        response = agent.act()
        self.assertTrue("beam_texts" in response)
        self.assertGreaterEqual(len(response["beam_texts"]), size)
        hyp, score = response["beam_texts"][0]
        self.assertTrue(isinstance(hyp, str))
        self.assertTrue(isinstance(score, float))

        agent = create_agent_from_model_file(
            'zoo:unittest/beam_blocking/model',
            opt_overrides={
                "beam_size": size,
                "inference": "topk"
            },
        )
        agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
        response = agent.act()
        self.assertTrue("beam_texts" in response)
        self.assertEqual(len(response["beam_texts"]), size)
Example #2
0
    def test_beamsearch_contextblocking(self):
        """
        Test beamsearch context blocking.
        """

        agent = create_agent_from_model_file(
            'zoo:unittest/context_blocking/model')
        agent.observe({'text': '5 4 3 2', 'episode_done': True})
        assert agent.act()['text'] == '5 4 3 2'

        agent = create_agent_from_model_file(
            'zoo:unittest/context_blocking/model',
            Opt(beam_context_block_ngram=1))
        agent.observe({'text': '5 4 3 2', 'episode_done': True})
        text = agent.act()['text']
        assert '5' not in text
        assert '4' not in text
        assert '3' not in text
        assert '2' not in text

        agent = create_agent_from_model_file(
            'zoo:unittest/context_blocking/model',
            Opt(beam_context_block_ngram=2))
        agent.observe({'text': '5 4 3 2', 'episode_done': True})
        text = agent.act()['text']
        assert '5' in text
        assert '5 4' not in text
        assert '4 3' not in text
        assert '3 2' not in text
Example #3
0
 def __init__(self, opt: Opt):
     self.opt = opt
     self.agents = []
     self.agent_dict = None
     self.generations = []
     self.input_type = 'Memory'
     self.delimiter = opt.get('memory_decoder_delimiter', '\n')
     self.one_line_memories = opt.get('memory_decoder_one_line_memories', False)
     model_file = modelzoo_path(opt['datapath'], opt['memory_decoder_model_file'])
     if model_file and os.path.exists(model_file):
         logging.info(f'Building Memory Decoder from file: {model_file}')
         logging.disable()
         overrides = {
             'skip_generation': False,
             'inference': 'beam',
             'beam_size': opt.get('memory_decoder_beam_size', 3),
             'beam_min_length': opt.get('memory_decoder_beam_min_length', 10),
             'beam_block_ngram': 3,
         }
         if self.opt.get('memory_decoder_truncate', -1) > 0:
             overrides['text_truncate'] = self.opt['memory_decoder_truncate']
             overrides['truncate'] = self.opt['memory_decoder_truncate']
         base_agent = create_agent_from_model_file(
             model_file, opt_overrides=overrides
         )
         assert isinstance(base_agent, TorchAgent)
         self.agents = [base_agent]
         assert isinstance(self.agents[0], TorchAgent)
         copies = max(100, (opt['batchsize'] * opt.get('rag_turn_n_turns', 1)))
         self.agents += [
             create_agent_from_shared(self.agents[0].share()) for _ in range(copies)
         ]
         self.agent_dict = self.agents[0].build_dictionary()
         logging.enable()
def get_classifier_model_and_dict(
    opt: Opt
) -> Tuple[Optional[TorchAgent], Optional[DictionaryAgent]]:
    """
    Build classifier model and dictionary.
    """
    model_file = modelzoo_path(
        opt['datapath'], opt['expanded_attention_classifier_model_file']
    )
    model, dictionary = None, None
    if model_file and os.path.exists(model_file):
        logging.info(f'Building polyencoder from path: {model_file}')
        logging.disable()
        overrides = {
            'model': 'return_code_weights_agent',
            'data_parallel': opt.get('data_parallel', False),
            'model_parallel': opt['model_parallel'],
            'delimiter': opt['delimiter'],
            'no_cuda': opt['no_cuda'],
            'fp16': opt['fp16'],
        }
        poly_agent = create_agent_from_model_file(model_file, overrides)
        logging.enable()
        logging.info('Poly Build Complete')
        dictionary = poly_agent.build_dictionary()
        model = poly_agent.model
    return model, dictionary
    def __init__(self, opt, shared=None):

        # Define coefficients
        self.task_loss_coeff = opt['task_loss_coeff']
        self.encoder_loss_coeff = opt['encoder_loss_coeff']
        self.hidden_loss_coeff = opt['hidden_loss_coeff']
        self.pred_loss_coeff = opt['pred_loss_coeff']

        assert (
            opt.get('model_parallel', False) is False
        ), 'model_parallel is not currently supported for distillation!'

        # Create teacher model
        if shared is None:
            to_copy = {'no_cuda', 'model_parallel', 'fp16', 'fp16_impl', 'gpu'}
            override = {k: opt[k] for k in to_copy}
            override[
                'datatype'] = 'train:evalmode'  # Don't initialize the optimizer
            teacher_agent = create_agent_from_model_file(
                opt['teacher_model'], override)
            self.teacher_agent_opt = teacher_agent.opt
            self.teacher_model = teacher_agent.model
            self.teacher_model.eval()

        super().__init__(opt, shared)
Example #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_type',
                        default='blender',
                        help='Either `blender` or `dialogpt`.')
    parser.add_argument('--device',
                        default='cpu',
                        help='Either `cpu` or `cuda` for dialogpt.')
    parser.add_argument('--model_path', help='Path for dialogpt.')
    parser.add_argument('--tokenizer_path', help='Path for dialogpt.')
    parser.add_argument('--generated_file',
                        default='',
                        help='Evaluate previously generated samples.')
    parser.add_argument('--include_persona',
                        action='store_true',
                        help='Whether to include personas.')
    args = parser.parse_args()
    print(args)

    tokenizer = None
    model = None
    if args.model_type == 'blender':
        model = create_agent_from_model_file('zoo:blender/blender_90M/model')
    elif args.model_type == 'dialogpt':
        tokenizer = AutoTokenizer.from_pretrained(
            args.tokenizer_path if args.tokenizer_path else args.model_path)
        model = AutoModelWithLMHead.from_pretrained(args.model_path)
        model = model.to(args.device)
    args.model = model
    args.tokenizer = tokenizer

    global checklist
    checklist = BiasChecklist(args,
                              util.generate_response_fn,
                              personas=constants.PERSONAS,
                              generated_file=args.generated_file)

    # Test cases.
    case = TestCase('Test: Harmful Affirmation (XYZs are [DESCRIPTION].)',
                    negative_generator, neg_score)
    checklist.add(case)
    case = TestCase('Test: Harmful Refutations (XYZs are [DESCRIPTION].)',
                    positive_generator, pos_score)
    checklist.add(case)

    # Report.
    result, samples = checklist.report(with_persona=args.include_persona)
    fi = './' + args.model_type + '_harmful_affirm_refute'
    if args.include_persona:
        fi += '_persona'
    else:
        fi += '_nopersona'
    if args.generated_file:
        fi += '_regenerated'
    with open(fi + '.txt', 'w') as f:
        f.writelines('\n'.join(result))
    with open(fi + '_samples.tsv', 'w') as f:
        reader = csv.writer(f, delimiter='\t')
        for sample in samples:
            reader.writerow(sample)
Example #7
0
 def __init__(self, opt: Opt):
     self.opt = opt
     self.agents = []
     self.agent_dict = None
     self.generations = []
     self.input_type = 'Search'
     self.knowledge_access_method = KnowledgeAccessMethod(
         opt['knowledge_access_method']
     )
     model_file = modelzoo_path(opt['datapath'], opt['query_generator_model_file'])
     if model_file and os.path.exists(model_file):
         logging.info(f'Building Query Generator from file: {model_file}')
         logging.disable()
         overrides: Dict[str, Any] = {'skip_generation': False}
         overrides['inference'] = opt['query_generator_inference']
         overrides['beam_size'] = opt.get('query_generator_beam_size', 3)
         overrides['beam_min_length'] = opt.get('query_generator_beam_min_length', 2)
         if self.opt['query_generator_truncate'] > 0:
             overrides['text_truncate'] = self.opt['query_generator_truncate']
             overrides['truncate'] = self.opt['query_generator_truncate']
         base_agent = create_agent_from_model_file(
             model_file, opt_overrides=overrides
         )
         assert isinstance(base_agent, TorchAgent)
         self.agents = [base_agent]
         bsz = opt.get('batchsize', 1)
         rag_turn_n_turns = opt.get('rag_turn_n_turns', 1)
         if bsz > 1 or rag_turn_n_turns > 1:
             self.agents += [
                 create_agent_from_shared(self.agents[0].share())
                 for _ in range((bsz * rag_turn_n_turns) - 1)
             ]
         self.agent_dict = self.agents[0].build_dictionary()
         logging.enable()
 def run(self):
     """
     1) load model 2) generate embeddings 3) save embeddings.
     """
     self.use_cuda = not self.opt.get('no_cuda') and torch.cuda.is_available()
     overrides = {'interactive_mode': True, 'interactive_candidates': 'inline'}
     if self.opt['dpr_model']:
         overrides.update(
             {
                 'model': 'dpr_agent',
                 'model_file': self.opt['model_file'],
                 'override': {
                     'model': 'dpr_agent',
                     'interactive_candidates': 'inline',
                 },
             }
         )
         agent = create_agent(Opt(overrides))
     else:
         agent = create_agent_from_model_file(self.opt['model_file'], overrides)
     model = agent.model.module if hasattr(agent.model, 'module') else agent.model
     assert hasattr(model, 'encoder_cand') or hasattr(model, 'cand_encoder')
     assert isinstance(agent, TorchRankerAgent)
     passages = self.load_passages()
     data = self.encode_passages(agent, passages)
     self.save_data(data)
Example #9
0
def self_chat(opt):
    random.seed(opt['seed'])
    partner = opt['partner_model_file']
    partner_opt_file = opt.get('partner_opt_file')

    # Create agents
    agent1 = create_agent(opt, requireModelExists=True)
    if partner is None:
        # Self chat with same model
        agent2 = agent1.clone()
    else:
        # Self chat with different models
        if partner_opt_file:
            print(f"WARNING: Loading override opts from: {partner_opt_file}")
            with open(partner_opt_file) as f:
                partner_opt = json.load(f)
        else:
            partner_opt = {}
        partner_opt['interactive_mode'] = opt.get('interactive_mode', True)
        print(
            f"WARNING: Setting partner interactive mode to: {partner_opt['interactive_mode']}"
        )
        agent2 = create_agent_from_model_file(partner, partner_opt)

    # Set IDs
    agent1.id = agent1.id + "_1"
    agent2.id = agent2.id + "_2"

    model_id = agent1.id + "_" + agent2.id

    world = create_task(opt, user_agents=[agent1, agent2])

    # Set up world logging
    logger = WorldLogger(opt)
    log_time = TimeLogger()

    # Run some self chats.
    for i in range(opt['num_self_chats']):
        _run_self_chat_episode(opt, world, logger)
        report = world.report()
        text, report = log_time.log(i + 1, opt['num_self_chats'], report)
        logging.info(text)

    # Save chats
    if opt['outfile'] is None:
        outfile = '/tmp/{}_selfchat'.format(model_id)
    else:
        outfile = opt['outfile']

    if opt['save_format'] == 'conversations' and hasattr(world, 'write'):
        # use self chat specific world to write conversation
        # this might be useful for logging extra contextual
        # information (like personas)
        world.write(logger, outfile)
    else:
        # use default logger write function
        logger.write(outfile, world, opt['save_format'])

    return logger.get_logs()
 def __init__(self):
     # Load the model from the model zoo via ParlAI
     overrides = {
         "skip_generation": False,
         "interactive_mode": True,
         "init_opt": "gen/seeker_dialogue",
         "all_model_path": "zoo:seeker/seeker_dialogue_3B/model",
         # seeker_dialogue
         "beam_disregard_knowledge_for_context_blocking": False,
         "drm_beam_block_full_context": True,
         "drm_beam_block_ngram": 3,
         "drm_beam_context_block_ngram": 3,
         "drm_beam_min_length": 20,
         "drm_beam_size": 10,
         "drm_inference": "beam",
         "drm_message_mutators": None,
         "drm_model":
         "projects.seeker.agents.seeker:ComboFidSearchQueryAgent",
         "exclude_context_in_krm_context_blocking": False,
         "include_knowledge_in_krm_context_blocking": True,
         "inject_query_string": None,
         "knowledge_response_control_token": None,
         "krm_beam_block_ngram": 3,
         "krm_beam_context_block_ngram": 3,
         "krm_beam_min_length": 1,
         "krm_beam_size": 3,
         "krm_doc_chunks_ranker": "woi_chunk_retrieved_docs",
         "krm_inference": "beam",
         "krm_message_mutators": None,
         "krm_model":
         "projects.seeker.agents.seeker:ComboFidSearchQueryAgent",
         "krm_n_ranked_doc_chunks": 1,
         "krm_rag_retriever_type": "search_engine",
         "krm_search_query_generator_model_file": "''",
         "loglevel": "debug",
         "min_knowledge_length_when_search": 10,
         "model": "projects.seeker.agents.seeker:SeekerAgent",
         "model_file": "zoo:seeker/seeker_dialogue_3B/model",
         "sdm_beam_block_ngram": -1,
         "sdm_beam_min_length": 1,
         "sdm_beam_size": 1,
         "sdm_history_size": 1,
         "sdm_inference": "greedy",
         "sdm_model":
         "projects.seeker.agents.seeker:ComboFidSearchQueryAgent",
         "search_decision": "always",
         "search_decision_control_token": "__is-search-required__",
         "search_decision_do_search_reply": "__do-search__",
         "search_decision_dont_search_reply": "__do-not-search__",
         "search_query_control_token": "__generate-query__",
         "sqm_beam_block_ngram": -1,
         "sqm_beam_min_length": 2,
         "sqm_beam_size": 1,
         "sqm_inference": "beam",
         "sqm_model":
         "projects.seeker.agents.seeker:ComboFidSearchQueryAgent",
     }
     self.model = create_agent_from_model_file(self.zoo_path, overrides)
def load_model(model_checkpoint, gpu_num):
    opt_overrides = {}
    opt_overrides['gpu'] = gpu_num
    opt_overrides['datatype'] = 'test'
    opt_overrides['inference'] = 'nucleus'
    opt_overrides['skip_generation'] = False
    model = create_agent_from_model_file(model_checkpoint,
                                         opt_overrides=opt_overrides)
    logging.info("load Raw Blender model from:{}".format(model_checkpoint))
    logging.info("allocate Raw Blender model to gpu_{}".format(gpu_num))
    return model
Example #12
0
    def test_beamsearch_blocking(self):
        """
        Test beamsearch blocking.
        """
        with testing_utils.tempdir() as tmpdir:
            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model')
            agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
            assert agent.act()['text'] == '5 5 5 5 5 5 5'

            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model', Opt(beam_block_ngram=1))
            agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
            assert '5 5' not in agent.act()['text']

            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model', Opt(beam_block_ngram=2))
            agent.observe({'text': '5 5 5 5 5 5 5', 'episode_done': True})
            assert '5 5 5' not in agent.act()['text']

            with open(os.path.join(tmpdir, 'blocklist.txt'), 'w') as f:
                f.write("38\n62\n34 34\n")

            agent = create_agent_from_model_file(
                'zoo:unittest/beam_blocking/model',
                Opt(beam_block_list_filename=os.path.join(
                    tmpdir, 'blocklist.txt')),
            )
            agent.observe({'text': '4 4 4', 'episode_done': True})
            assert agent.act()['text'] == '4 4 4'

            agent.observe({'text': '38 38 38', 'episode_done': True})
            assert '38' not in agent.act()['text']

            agent.observe({'text': '62 62 62', 'episode_done': True})
            assert '62' not in agent.act()['text']

            agent.observe({'text': '34 34 34', 'episode_done': True})
            text = agent.act()['text']
            assert '34' in text
            assert '34 34' not in text
Example #13
0
 def init_predictor(self, opt: Opt, shared=None):
     """
     Initializes Predictor Module
     """
     if not shared:
         if not opt.get("predictor_model_file"):
             logging.warn(
                 'Reranker MUST specify predictor_model_file unless subclass __init__() sets up the model in its own way (unusual). Skipping predictor setup!'
             )
         else:
             self.predictor = create_agent_from_model_file(
                 self.predictor_model_file)
     else:
         self.predictor = shared['predictor']
Example #14
0
 def init_predictor(self, opt: Opt, shared=None):
     if not shared:
         override = {
             'return_cand_scores': True,
             'datatype': 'valid',
             'interactive_mode': opt.get('interactive_mode', True),
             'ignore_bad_candidates': True,
             'encode_candidate_vecs': True,
             'interactive_candidates': 'inline',
         }  # to not init optim
         self.predictor = create_agent_from_model_file(
             self.predictor_model_file, opt_overrides=override)
     else:
         self.predictor = shared['predictor']
def eval_single(opt, tgt_agent, ref_agent, save_dir):
  eval_file_path = opt['eval_dir'] + ref_agent + '/' + opt['log_file']
  save_file_path = os.path.join(save_dir, ref_agent + '.jsonl')

  model_mf = 'outputs/agent_' + tgt_agent + '/model'
  model_optf = 'outputs/agent_' + tgt_agent + '/model.opt'
  with open(model_optf) as f:
    model_opt = json.load(f)
  model_opt['interactive_mode'] = True
  tgt_agent = create_agent_from_model_file(model_mf, model_opt)

  model_mf = 'outputs/agent_' + ref_agent + '/model'
  model_optf = 'outputs/agent_' + ref_agent + '/model.opt'
  with open(model_optf) as f:
    model_opt = json.load(f)
  model_opt['interactive_mode'] = True
  ref_agent = create_agent_from_model_file(model_mf, model_opt)

  with open(eval_file_path) as eval_file, open(save_file_path,
                                               'w') as save_file:
    num_match = 0
    errorids = []
    for i, line in tqdm(enumerate(eval_file)):
      if not line.strip():
        continue
      conversation = json.loads(line)
      if _run_conversation(i, conversation, tgt_agent, ref_agent):
        num_match += 1

        assert conversation['dialog'][-1]['speaker'] == 'tgt_model'
        assert len(conversation['dialog']) % 3 == 0
        conversation['reward_ref'] = conversation.pop('report')
        save_file.write(json.dumps(conversation) + '\n')
      else:
        errorids.append(i)
    print('Matched: {}/{}'.format(num_match, (num_match + len(errorids))))
    print('Error IDs: ', errorids)
Example #16
0
    def test_sparse_tfidf_retriever_singlethread(self):
        with testing_utils.tempdir() as tmpdir:
            MODEL_FILE = os.path.join(tmpdir, 'tmp_test_babi')
            testing_utils.train_model(
                dict(
                    model='tfidf_retriever',
                    task='babi:task1k:1',
                    model_file=MODEL_FILE,
                    retriever_numworkers=1,
                    retriever_hashsize=2**8,
                    retriever_tokenizer='simple',
                    datatype='train:ordered',
                    batchsize=1,
                    num_epochs=1,
                ))

            agent = create_agent_from_model_file(MODEL_FILE)

            obs = {
                'text':
                ('Mary moved to the bathroom. John went to the hallway. '
                 'Where is Mary?'),
                'episode_done':
                True,
            }
            agent.observe(obs)
            reply = agent.act()
            assert reply['text'] == 'bathroom'

            ANS = 'The one true label.'
            new_example = {
                'text':
                'A bunch of new words that are not in the other task, '
                'which the model should be able to use to identify '
                'this label.',
                'labels': [ANS],
                'episode_done':
                True,
            }
            agent.observe(new_example)
            reply = agent.act()
            assert 'text' in reply and reply['text'] == ANS

            new_example.pop('labels')
            agent.observe(new_example)
            reply = agent.act()
            assert reply['text'] == ANS
Example #17
0
 def init_search_query_generator(self, opt) -> TorchGeneratorAgent:
     model_file = opt['search_query_generator_model_file']
     logging.info('Loading search generator model')
     logging.disable()
     search_query_gen_agent = create_agent_from_model_file(
         model_file,
         opt_overrides={
             'skip_generation': False,
             'inference': opt['search_query_generator_inference'],
             'beam_min_length': opt['search_query_generator_beam_min_length'],
             'beam_size': opt['search_query_generator_beam_size'],
             'text_truncate': opt['search_query_generator_text_truncate'],
         },
     )
     logging.enable()
     logging.info('Search query generator model loading completed!')
     return search_query_gen_agent
Example #18
0
    def __init__(self, model, device, maxlen=-1):
        model = self.check_agent(model)
        maxlen = maxlen if maxlen > 0 else self.default_maxlen()

        if "end2end_generator" in model:
            name = "end2end_generator"
        else:
            raise Exception("wrong model")

        super().__init__(
            name=model,
            suffix="\n",
            device=device,
            maxlen=maxlen,
            model=create_agent_from_model_file(
                f"zoo:wizard_of_wikipedia/{name}/model"),
        )
Example #19
0
 def __init__(self, opt: Opt):
     self.opt = opt
     self.agents = []
     self.agent_dict = None
     self.generations = []
     self.input_type = 'Search'
     self.knowledge_access_method = KnowledgeAccessMethod(
         opt['knowledge_access_method'])
     model_file = modelzoo_path(opt['datapath'],
                                opt['query_generator_model_file'])
     if (self.knowledge_access_method is KnowledgeAccessMethod.SEARCH_ONLY
             and 'blenderbot2/query_generator/model' in model_file):
         raise ValueError(
             'You cannot use the blenderbot2 query generator with search_only. Please '
             'consider setting --query-generator-model-file zoo:sea/bart_sq_gen/model '
             'instead.')
     if model_file and os.path.exists(model_file):
         logging.info(f'Building Query Generator from file: {model_file}')
         logging.disable()
         overrides: Dict[str, Any] = {'skip_generation': False}
         overrides['inference'] = opt['query_generator_inference']
         overrides['beam_size'] = opt.get('query_generator_beam_size', 3)
         overrides['beam_min_length'] = opt.get(
             'query_generator_beam_min_length', 2)
         overrides['model_parallel'] = opt['model_parallel']
         overrides['no_cuda'] = opt['no_cuda']
         if self.opt['query_generator_truncate'] > 0:
             overrides['text_truncate'] = self.opt[
                 'query_generator_truncate']
             overrides['truncate'] = self.opt['query_generator_truncate']
         base_agent = create_agent_from_model_file(model_file,
                                                   opt_overrides=overrides)
         assert isinstance(base_agent, TorchAgent)
         self.agents = [base_agent]
         bsz = max(
             opt.get('batchsize') or 1,
             opt.get('eval_batchsize') or 1)
         rag_turn_n_turns = opt.get('rag_turn_n_turns', 1)
         if bsz > 1 or rag_turn_n_turns > 1:
             self.agents += [
                 create_agent_from_shared(self.agents[0].share())
                 for _ in range((bsz * rag_turn_n_turns) - 1)
             ]
         self.agent_dict = self.agents[0].build_dictionary()
         logging.enable()
Example #20
0
 def init_predictor(self, opt: Opt, shared=None):
     if not shared:
         override = {
             'return_cand_scores': True,
             'datatype': 'valid',
             'no_cuda': opt['reranker_no_cuda'],
             'interactive_mode': opt.get('interactive_mode', True),
             'ignore_bad_candidates': True,
             'encode_candidate_vecs': True,
             'interactive_candidates': 'inline',
         }  # to not init optim
         if opt.get('predictor_characters_file'):
             override['fixed_candidates_path'] = opt[
                 'predictor_characters_file']
         self.predictor = create_agent_from_model_file(
             self.predictor_model_file, opt_overrides=override)
     else:
         self.predictor = shared['predictor']
    def load_classifier(self, gpu_num):
        if self.has_classifier:
            if self.classifier is None:
                opt_overrides = {}
                self.classifier_gpu_num = gpu_num
                opt_overrides['gpu'] = gpu_num
                opt_overrides['datatype'] = 'test'
                # opt_overrides['inference'] = 'nucleus'
                opt_overrides['skip_generation'] = False

                self.classifier = create_agent_from_model_file(
                    self.classifier_checkpoint, opt_overrides=opt_overrides)
                teacher_for_classifier_opt = deepcopy(self.classifier.opt)
                teacher_for_classifier_opt.update({"build_data_or_not": False})
                self.teacher_for_classifier = create_task_agent_from_taskname(
                    teacher_for_classifier_opt)[0]
                logging.info("load classifier from:{}".format(
                    self.classifier_checkpoint))
                logging.info("allocate classifier to gpu_{}".format(gpu_num))
        else:
            self.classifier = None
 def __init__(self):
     # Load the model from the model zoo via ParlAI
     overrides = {"skip_generation": False, "interactive_mode": True}
     self.model = create_agent_from_model_file(self.zoo_path, overrides)
Example #23
0
def create_agent_and_persona(personas=''):
    blender_bot = create_agent_from_model_file("zoo:blender/blender_90M/model")
    for persona in personas:
        blender_bot.observe({'text': persona})
    return blender_bot
Example #24
0
def self_chat(opt):
    random.seed(opt['seed'])
    partner = opt['partner_model_file']
    assert partner is not None
    partner_opt_file = opt.get('partner_opt_file')
    if partner_opt_file:
        assert partner_opt_file == partner + '.opt', (
            'Unless you think it is save,'
            ' you can remove assert')
    else:
        partner_opt_file = partner + '.opt'

    # Create agents
    if opt['model_file'].split(':')[0] == 'human':
        agent1 = MyLocalHumanAgent(opt)
        assert partner is not None
    else:
        agent1 = create_agent(opt, requireModelExists=True)
    if partner is None:
        # Self chat with same model
        agent2 = agent1.clone()
    else:
        # Self chat with different models
        if partner_opt_file:
            print(f"WARNING: Loading override opts from: {partner_opt_file}")
            with open(partner_opt_file) as f:
                partner_opt = json.load(f)
        else:
            partner_opt = {}
        partner_opt['interactive_mode'] = opt.get('interactive_mode', True)
        print(
            f"WARNING: Setting partner interactive mode to: {partner_opt['interactive_mode']}"
        )
        agent2 = create_agent_from_model_file(partner, partner_opt)

    # Set IDs
    agent1.id = agent1.id + '_1'
    agent2.id = agent2.id + '_2'

    model_id = agent1.id + '_' + agent2.id

    world = create_task(opt, user_agents=[agent1, agent2])

    # Set up world logging
    logger = WorldLogger(opt)
    log_time = TimeLogger()

    # Run some self chats.
    all_report = []
    if opt['num_self_chats'] < 0:
        opt['num_self_chats'] = len(world.messages)

    for i in range(opt['num_self_chats']):
        _run_self_chat_episode(opt, world, logger)
        report = world.report()
        text, report = log_time.log(i + 1, opt['num_self_chats'], report)
        logging.info(text)
        all_report.append(report)

        world.write(logger, all_report, opt['outfile'])

    # Save chats
    if opt['outfile'] is None:
        outfile = '/tmp/{}_selfchat'.format(model_id)
    else:
        outfile = opt['outfile']

    if opt['save_format'] == 'conversations' and hasattr(world, 'write'):
        # use self chat specific world to write conversation
        # this might be useful for logging extra contextual
        # information (like personas)
        world.write(logger, all_report, outfile)
    else:
        # use default logger write function
        logger.write(outfile, world, opt['save_format'])

    return logger.get_logs()
Example #25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_type',
                        default='blender',
                        help='Either `blender` or `dialogpt`.')
    parser.add_argument('--device',
                        default='cpu',
                        help='Either `cpu` or `cuda` for dialogpt.')
    parser.add_argument('--model_path', help='Path for dialogpt.')
    parser.add_argument('--tokenizer_path', help='Path for dialogpt.')
    parser.add_argument('--generated_file',
                        default='',
                        help='Previously generated samples.')
    parser.add_argument('--include_persona',
                        action='store_true',
                        help='Whether to include personas.')
    parser.add_argument('--generator',
                        default='rtp',
                        help='Either `rtp` or `bst`.')
    args = parser.parse_args()
    print(args)

    tokenizer = None
    model = None
    if args.model_type == 'blender':
        model = create_agent_from_model_file('zoo:blender/blender_90M/model')
    elif args.model_type == 'dialogpt':
        tokenizer = AutoTokenizer.from_pretrained(
            args.tokenizer_path if args.tokenizer_path else args.model_path)
        model = AutoModelWithLMHead.from_pretrained(args.model_path)
        model = model.to(args.device)
    args.model = model
    args.tokenizer = tokenizer

    global checklist
    checklist = BiasChecklist(args,
                              util.generate_response_fn,
                              personas=constants.PERSONAS,
                              generated_file=args.generated_file)

    # Test case
    assert args.generator in ['rtp', 'bst']
    if args.generator == 'rtp':
        generator = rtp_generator
    elif args.generator == 'bst':
        generator = bst_generator
    case = TestCase('Test: Offensiveness (' + args.generator + ')', generator,
                    score)
    checklist.add(case)

    # Report.
    result, samples = checklist.report(with_persona=args.include_persona)
    fi = './' + args.model_type + '_offensiveness'
    if args.include_persona:
        fi += '_persona'
    else:
        fi += '_nopersona'
    if args.generated_file:
        fi += '_regenerated'
    with open(fi + '.txt', 'w') as f:
        f.writelines('\n'.join(result))
    with open(fi + '_samples.tsv', 'w') as f:
        reader = csv.writer(f, delimiter='\t')
        for sample in samples:
            reader.writerow(sample)