def create_world(opt):
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    agent.opt.log()
    human_agent = HumanAgent(opt)

    # set up world logger
    world_logger = WorldLogger(opt) if opt.get('outfile') else None
    world = MultiClientInteractiveWorld(opt, [human_agent, agent])

    # Show some example dialogs:
    # while not world.epoch_done():
    #     world.parley()
    #     if world.epoch_done() or world.get_total_parleys() <= 0:
    #         # chat was reset with [DONE], [EXIT] or EOF
    #         if world_logger is not None:
    #             world_logger.reset()
    #         continue
    #
    #     if world_logger is not None:
    #         world_logger.log(world)
    #     if opt.get('display_examples'):
    #         print("---")
    #         print(world.display())
    #
    # if world_logger is not None:
    #     # dump world acts to file
    #     world_logger.write(opt['outfile'], world, file_format=opt['save_format'])

    return world
Beispiel #2
0
    def _init_resnext_cnn(self):
        """
        Lazily initialize preprocessor model.

        When image_mode is one of the ``resnext101_..._wsl`` varieties
        """
        try:
            cnn_type, layer_num = self._image_mode_switcher()
            model = self.torch.hub.load('facebookresearch/WSL-Images', cnn_type)
            # cut off layer for ImageNet classification
            # if spatial, cut off another layer for spatial features
            self.netCNN = self.nn.Sequential(*list(model.children())[:layer_num])
        except RuntimeError as e:
            # Perhaps specified one of the wrong model names
            model_names = [m for m in IMAGE_MODE_SWITCHER if 'resnext101' in m]
            logging.error(
                'If you have specified one of the resnext101 wsl models, '
                'please make sure it is one of the following: \n'
                f"{', '.join(model_names)}"
            )
            raise e
        except AttributeError:
            # E.g. "module 'torch' has no attribute 'hub'"
            raise RuntimeError(
                'Please install the latest pytorch distribution to have access '
                'to the resnext101 wsl models (pytorch 1.1.0, torchvision 0.3.0)'
            )

        if self.use_cuda:
            self.netCNN.cuda()
Beispiel #3
0
def interactive(opt: 'zoo:tutorial_transformer_generator/model'):
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=False)
    agent.opt.log()
    human_agent = LocalHumanAgent(opt)
    # set up world logger
    world_logger = WorldLogger(opt) if opt.get('outfile') else None
    world = create_task(opt, [human_agent, agent])

    # Show some example dialogs:
    while not world.epoch_done():
        world.parley()
        print("done by me!")
        print(world.display())
        if world.epoch_done() or world.get_total_parleys() <= 0:
            # chat was reset with [DONE], [EXIT] or EOF
            if world_logger is not None:
                world_logger.reset()
            continue

        if world_logger is not None:
            world_logger.log(world)
        if opt.get('display_examples'):
            print("---")
            print(world.display())

    if world_logger is not None:
        # dump world acts to file
        world_logger.write(opt['outfile'],
                           world,
                           file_format=opt['save_format'])
Beispiel #4
0
    def log_metrics(self, setting, step, report):
        """
        Log all metrics to tensorboard.

        :param setting:
            One of train/valid/test. Will be used as the title for the graph.
        :param step:
            Number of parleys
        :param report:
            The report to log
        """
        for k, v in report.items():
            v = v.value() if isinstance(v, Metric) else v
            if not isinstance(v, numbers.Number):
                logging.error(f'k {k} v {v} is not a number')
                continue
            display = get_metric_display_data(metric=k)
            # Remove invalid characters for TensborboardX Summary beforehand
            # so that the logs aren't cluttered with warnings.
            tag = _TB_SUMMARY_INVALID_TAG_CHARACTERS.sub('_', f'{k}/{setting}')
            try:
                self.writer.add_scalar(
                    tag,
                    v,
                    global_step=step,
                    display_name=f"{display.title}",
                    summary_description=display.description,
                )
            except TypeError:
                # internal tensorboard doesn't support custom display titles etc
                self.writer.add_scalar(tag, v, global_step=step)
Beispiel #5
0
    def log_metrics(self, setting, step, report):
        """
        Log all metrics to tensorboard.

        :param setting:
            One of train/valid/test. Will be used as the title for the graph.
        :param step:
            Number of parleys
        :param report:
            The report to log
        """
        for k, v in report.items():
            v = v.value() if isinstance(v, Metric) else v
            if not isinstance(v, numbers.Number):
                logging.error(f'k {k} v {v} is not a number')
                continue
            display = get_metric_display_data(metric=k)
            try:
                self.writer.add_scalar(
                    f'{k}/{setting}',
                    v,
                    global_step=step,
                    display_name=f"{display.title}",
                    summary_description=display.description,
                )
            except TypeError:
                # internal tensorboard doesn't support custom display titles etc
                self.writer.add_scalar(f'{k}/{setting}', v, global_step=step)
Beispiel #6
0
def interactive(opt, print_parser=None):
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent.opt
        print_parser.print_args()
    human_agent = LocalHumanAgent(opt)
    # set up world logger
    world_logger = WorldLogger(opt) if opt['save_world_logs'] else None
    world = create_task(opt, [human_agent, agent])

    # Show some example dialogs:
    while not world.epoch_done():
        world.parley()
        if world_logger is not None:
            world_logger.log(world)
        if opt.get('display_examples'):
            print("---")
            print(world.display())
        if world_logger is not None:
            # dump world acts to file
            world_logger.reset()  # add final acts to logs
            base_outfile = opt['report_filename'].split('.')[0]
            outfile = f'{base_outfile}_{opt["task"]}_replies.jsonl'
            world_logger.write(outfile, world, file_format=opt['save_format'])
Beispiel #7
0
def load_passage_reader(
    ctx_file: str, return_dict: bool = True
) -> Union[Dict[str, Tuple[str, str]], List[Tuple[str, str, str]]]:
    """
    Load passages from file, corresponding to a FAISS index.

    We attempt to read the passages with a csv reader.

    If passage files are not saved correctly with a csv reader,
    reads can fail.

    :param ctxt_file:
        file to read

    :return reader:
        return a reader over the passages
    """
    logging.info(f'Reading data from: {ctx_file}')
    f_open = gzip.open if ctx_file.endswith(".gz") else open
    try:
        passages = {} if return_dict else []
        with f_open(ctx_file) as tsvfile:
            _reader = csv.reader(tsvfile, delimiter='\t')  # type: ignore
            ids = []
            for idx, row in tqdm(enumerate(_reader)):
                if idx == 0:
                    assert row[0] == 'id'
                    ids.append(-1)
                elif idx <= 1:
                    ids.append(row[0])
                    if return_dict:
                        passages[row[0]] = (row[1], row[2])  # type: ignore
                    else:
                        passages.append((row[0], row[1], row[2]))  # type: ignore
                    continue
                else:
                    assert int(row[0]) == int(ids[idx - 1]) + 1, "invalid load"
                    if return_dict:
                        passages[row[0]] = (row[1], row[2])  # type: ignore
                    else:
                        passages.append((row[0], row[1], row[2]))  # type: ignore
                    ids.append(row[0])

        del ids
    except (csv.Error, AssertionError) as e:
        passages = {} if return_dict else []
        logging.error(f'Exception: {e}')
        logging.warning('Error in loading csv; loading via readlines')
        with f_open(ctx_file) as tsvfile:
            for idx, l in tqdm(enumerate(tsvfile.readlines())):
                line = l.replace('\n', '').split('\t')  # type: ignore
                assert len(line) == 3
                if idx == 0:
                    assert line[0] == 'id'
                if line[0] != 'id':
                    if return_dict:
                        passages[row[0]] = (row[1], row[2])  # type: ignore
                    else:
                        passages.append((row[0], row[1], row[2]))  # type: ignore
    return passages
Beispiel #8
0
def interactive(opt):
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
 
    human_agent = LocalHumanAgent(opt_convai)
        
    convai_agent = create_agent(opt_convai, requireModelExists=True)
    ed_agent = create_agent(opt_ed, requireModelExists=True)
    bst_agent = create_agent(opt_bst, requireModelExists=True)

    models = [convai_agent,ed_agent,bst_agent]
    labels = ['CONVAI2','EMPATHETIC_DIALOGUES','BLENDED_SKILL_TALK']
    
    imperial_quips = ImperialQuipWorld(human_agent, models, labels)

    keep_suggesting = True
    while(keep_suggesting):
        suggestions = imperial_quips.parley()
        selection = input("Choose suggestion: ")
        modification = input("Do you wish to modify the selected response? (y/n): ")
        if modification == 'y':
            personalise_message.personalise_message(suggestions[int(selection)-1])
        else:
            print("Response: "+suggestions[int(selection)-1])

        user_input = input("")
        if user_input == "EXIT":
            keep_suggesting = False
Beispiel #9
0
def interactive(opt):
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task

    human_agent = LocalHumanAgent(opt_convai)
        
    convai_agent = create_agent(opt_convai, requireModelExists=True)
    ed_agent = create_agent(opt_ed, requireModelExists=True)
    bst_agent = create_agent(opt_bst, requireModelExists=True)
    wow_agent = create_agent(opt_wow, requireModelExists=True)

    models = [convai_agent, ed_agent, bst_agent, wow_agent]
    labels = ['ConvAI2', 'Empathetic Dialogue', 'Blended Skill Talk', 'Wizard_of_Wikipedia']
    
    imperial_quips = ImperialQuipsWorld(human_agent, models, labels)

    keep_suggesting = True
    while(keep_suggesting):
        imperial_quips.parley()
        user_input = input("")
        if user_input == "EXIT":
            keep_suggesting = False
Beispiel #10
0
def _unzip(path, fname, delete=True):
    """
    Unpack the given zip file to the same directory.

    :param str path:
        The folder containing the archive. Will contain the contents.

    :param str fname:
        The filename of the archive file.

    :param bool delete:
        If true, the archive will be deleted after extraction.
    """
    import zipfile

    logging.debug(f'unpacking {fname}')
    fullpath = os.path.join(path, fname)
    with zipfile.ZipFile(PathManager.open(fullpath, 'rb'), 'r') as zf:
        for member in zf.namelist():
            outpath = os.path.join(path, member)
            if zf.getinfo(member).is_dir():
                logging.debug(f"Making directory {outpath}")
                PathManager.mkdirs(outpath)
                continue
            logging.debug(f"Extracting to {outpath}")
            with zf.open(member, 'r') as inf, PathManager.open(outpath,
                                                               'wb') as outf:
                shutil.copyfileobj(inf, outf)
    if delete:
        try:
            PathManager.rm(fullpath)
        except PermissionError:
            logging.error(
                f"Tried to delete {fullpath} but got a permission error. This "
                "is known to happen in Windows and is probably not fatal.")
Beispiel #11
0
def safe_interactive(opt, print_parser=None):
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent.opt
        print_parser.print_args()
    human_agent = SafeLocalHumanAgent(opt)
    world = create_task(opt, [human_agent, agent])

    # Interact until episode done
    while True:
        world.parley()
        bot_act = world.get_acts()[-1]
        if 'bot_offensive' in bot_act and bot_act['bot_offensive']:
            agent.reset()

        if opt.get('display_examples'):
            print('---')
            print(world.display())
        if world.epoch_done():
            logging.info('epoch done')
            break
    def _init_resnext_cnn(self):
        """
        Lazily initialize preprocessor model.

        When image_mode is one of the ``resnext101_..._wsl`` varieties
        """
        try:
            model = self.torch.hub.load('facebookresearch/WSL-Images', self.image_mode)
            # cut off layer for ImageNet classification
            self.netCNN = self.nn.Sequential(*list(model.children())[:-1])
        except RuntimeError as e:
            # Perhaps specified one of the wrong model names
            logging.error(
                'If you have specified one of the resnext101 wsl models, '
                'please make sure it is one of the following: \n'
                'resnext101_32x8d_wsl, resnext101_32x16d_wsl, '
                'resnext101_32x32d_wsl, resnext101_32x48d_wsl'
            )
            raise e
        except AttributeError:
            # E.g. "module 'torch' has no attribute 'hub'"
            raise RuntimeError(
                'Please install the latest pytorch distribution to have access '
                'to the resnext101 wsl models (pytorch 1.1.0, torchvision 0.3.0)'
            )

        if self.use_cuda:
            self.netCNN.cuda()
def extract_feats(opt):
    if isinstance(opt, ParlaiParser):
        logging.error('extract_feats should be passed opt not parser')
        opt = opt.parse_args()
    # Get command line arguments
    opt = copy.deepcopy(opt)
    dt = opt['datatype'].split(':')[0] + ':ordered'
    opt['datatype'] = dt
    opt['no_cuda'] = False
    opt['gpu'] = 0
    opt['num_epochs'] = 1
    opt['num_load_threads'] = 20
    opt.log()
    logging.info("Loading Images")
    # create repeat label agent and assign it to the specified task
    agent = RepeatLabelAgent(opt)
    world = create_task(opt, agent)

    total_exs = world.num_examples()
    pbar = tqdm.tqdm(unit='ex', total=total_exs)
    while not world.epoch_done():
        world.parley()
        pbar.update()
    pbar.close()

    logging.info("Finished extracting images")
Beispiel #14
0
def error_once(msg: str) -> None:
    """
    Log an error, but only once.

    :param str msg: Message to display
    """
    global _seen_logs
    if msg not in _seen_logs:
        _seen_logs.add(msg)
        logging.error(msg)
Beispiel #15
0
    def __init__(
        self,
        opt: Opt,
        dpr_model: str = 'bert',
        pretrained_path: str = DPR_ZOO_MODEL,
        encoder_type: str = 'query',
    ):
        # Override options
        try:
            config: BertConfig = BertConfig.from_pretrained(
                'bert-base-uncased')
        except OSError:
            config_path = PathManager.get_local_path(
                os.path.join(opt['datapath'], "bert_base_uncased",
                             self.CONFIG_PATH))
            config: BertConfig = BertConfig.from_pretrained(config_path)

        pretrained_path = modelzoo_path(opt['datapath'],
                                        pretrained_path)  # type: ignore
        if not os.path.exists(pretrained_path):
            # when initializing from parlai rag models, the pretrained path
            # may not longer exist. This is fine if we've already trained
            # the model.
            assert dpr_model == 'bert_from_parlai_rag'
            logging.error(f'Pretrained Path does not exist: {pretrained_path}')
            pretrained_path = modelzoo_path(opt['datapath'],
                                            DPR_ZOO_MODEL)  # type: ignore
            dpr_model = 'bert'
            logging.error(f'Setting to zoo model: {pretrained_path}')
        enc_opt = {
            "n_heads": config.num_attention_heads,
            "n_layers": config.num_hidden_layers,
            "embedding_size": config.hidden_size,
            "ffn_size": config.intermediate_size,
            "dropout": config.hidden_dropout_prob,
            "attention_dropout": config.attention_probs_dropout_prob,
            "activation": config.hidden_act,
            "variant": 'xlm',
            "reduction_type": 'first',
            "n_positions": config.max_position_embeddings,
            "n_segments": config.type_vocab_size,
        }
        embedding = torch.nn.Embedding(config.vocab_size,
                                       config.hidden_size,
                                       padding_idx=config.pad_token_id)
        super().__init__(
            Opt(enc_opt),
            vocabulary_size=config.vocab_size,
            padding_idx=config.pad_token_id,
            embedding=embedding,
            reduction_type='first',
        )

        self._load_state(opt['datapath'], dpr_model, pretrained_path,
                         encoder_type)
Beispiel #16
0
 def get_data_from_unit(self, unit: Unit) -> Dict[str, Any]:
     """
     Retrieves task data for a single unit.
     """
     try:
         data_browser = self.get_mephisto_data_browser()
         return data_browser.get_data_from_unit(unit)
     except (IndexError, AssertionError) as error:
         logging.error(error)
         logging.warning(
             f'Skipping unit {unit.db_id}. No message found for this unit.')
Beispiel #17
0
 def _query_search_server(self, query_term, n):
     server = self.server_address
     req = {'q': query_term, 'n': n}
     logging.debug(f'sending search request to {server}')
     server_response = requests.post(server, data=req)
     resp_status = server_response.status_code
     if resp_status == 200:
         return server_response.json().get('response', None)
     logging.error(
         f'Failed to retrieve data from server! Search server returned status {resp_status}'
     )
Beispiel #18
0
    def _setup_data(self, path):
        logging.info(f"Loading ParlAI text data: {path}")

        self.episodes = []
        self.num_exs = 0
        eps = []
        with PathManager.open(path, newline='\n', encoding='utf-8') as read:
            for line_no, line in enumerate(read, 1):
                msg = str_to_msg(line.rstrip('\n'))
                if msg and 'eval_labels' in msg:
                    raise ValueError(
                        f"It looks like you've written eval_labels as a key in your "
                        f"data file. This is not appropriate; labels will be converted "
                        f"for you automatically. This is happening on Line {line_no} "
                        f"in {path}. The line is:\n\t{line}")
                if msg and 'text' not in msg:
                    raise ValueError(
                        f'ParlaiDialogTeacher requires a "text" field in every '
                        f'entry, but one is missing in Line {line_no} in {path}. '
                        f'The line is:\n\t{line}')
                if msg and 'labels' not in msg:
                    raise ValueError(
                        f'ParlaiDialogTeacher requires a "labels" field in every '
                        f'entry, but one is missing in Line {line_no} in {path}. '
                        f'The line is:\n\t{line}')

                if (self.opt['bad_speaker_to_eval'] != 'all'
                        and self.opt['bad_speaker_to_eval'] !=
                        msg['speaker_to_eval']):
                    continue
                if (self.opt['bad_safety_mix'] != 'all'
                        and SAFETY_DICT[self.opt['bad_safety_mix']] !=
                        msg['labels'][0]):
                    continue
                if self.opt['bad_num_turns'] > 0:
                    dialog = msg['text'].split('\n')
                    msg.force_set(
                        'text', '\n'.join(dialog[-self.opt['bad_num_turns']:]))
                if msg:
                    self.num_exs += 1
                    eps.append(msg)
                    if msg.get('episode_done', False):
                        self.episodes.append(eps)
                        eps = []
        if len(eps) > 0:
            # add last episode
            eps[-1].force_set('episode_done', True)
            self.episodes.append(eps)
        if len(self.episodes) == 1 and line_no > 100:
            logging.error(
                f'The data in {path} looks like one very long episode. If this '
                f'is intentional, you may ignore this, but you MAY have a bug in '
                f'your data.')
Beispiel #19
0
def _untar(path, fname, delete=True, flatten=False):
    """
    Unpack the given archive file to the same directory.

    :param str path:
        The folder containing the archive. Will contain the contents.

    :param str fname:
        The filename of the archive file.

    :param bool delete:
        If true, the archive will be deleted after extraction.
    """
    import tarfile

    logging.debug(f'unpacking {fname}')
    fullpath = os.path.join(path, fname)
    # very painfully manually extract files so that we can use PathManger.open
    # instead, lest we are using fb internal file services

    with tarfile.open(fileobj=PathManager.open(fullpath, 'rb')) as tf:
        for item in tf:
            item_name = item.name
            while item_name.startswith("./"):
                # internal file systems will actually create a literal "."
                # directory, so we gotta watch out for that
                item_name = item_name[2:]
            if flatten:
                # flatten the tar file if there are subdirectories
                fn = os.path.join(path, os.path.split(item_name)[-1])
            else:
                fn = os.path.join(path, item_name)
            logging.debug(f"Extracting to {fn}")
            if item.isdir():
                PathManager.mkdirs(fn)
            elif item.isfile():
                with PathManager.open(fn, 'wb') as wf, tf.extractfile(
                        item.name) as rf:
                    tarfile.copyfileobj(rf, wf)
            else:
                raise NotImplementedError(
                    "No support for symlinks etc. right now.")

    if delete:
        try:
            PathManager.rm(fullpath)
        except PermissionError:
            logging.error(
                f"Tried to delete {fullpath} but got a permission error. This "
                "is known to happen in Windows and is probably not fatal.")
Beispiel #20
0
    def run(self):
        datatype = self.opt['datatype'].split(':')[0]
        self.opt['world_logs'] = os.path.join(
            self.opt['save_dir'],
            f"{datatype}_split_{self.opt['f1_overlap_threshold']}_f1_overlap",
        )
        try:
            self.generate_data()
        except:
            logging.error('ERROR')
            self.log()
            import ipdb

            ipdb.set_trace()
            raise
Beispiel #21
0
 def __init__(self, opt, shared=None):
     build(opt)
     self.use_html = opt.get('use_html', False)
     self.use_long_answer = opt.get('use_long_answer', False)
     self.use_context = opt.get('use_context', False)
     self.id = 'natural_questions'
     self.opt = copy.deepcopy(opt)
     self.dtype = DatatypeHelper.fold(self.opt['datatype'])
     if self.dtype == 'test':
         logging.error(
             "No test split for this teacher; overriding to valid")
         self.dtype = 'valid'
     self.dpath = os.path.join(self.opt['datapath'], DATASET_NAME_LOCAL,
                               self.dtype)
     self.n_samples = None
     super().__init__(self.opt, shared)
Beispiel #22
0
    def _path(opt):
        build(opt)
        dt = opt['datatype'].split(':')[0]

        if dt == 'train':
            path = os.path.join(opt['datapath'], 'DialogueQE', 'train.json')
        elif dt == 'test':
            path = os.path.join(opt['datapath'], 'DialogueQE', 'test.json')
        elif dt == 'valid':
            logging.error(
                'warning: validation is not supported in dialogue_qe. Using test'
            )
            path = os.path.join(opt['datapath'], 'DialogueQE', 'test.json')
        else:
            raise RuntimeError('Not valid datatype.')

        return path
Beispiel #23
0
    def batch_act_sdm(
        self,
        observations: List[Dict[str, Message]],
        knowledge_agent_observations: List[Message],
    ) -> Tuple[List[Message], List[int], List[Message]]:
        """
        Search Decision batch act.

        :param observations:
            observations for batch act.
        :param knowledge_agent_observations:
            observations to modify with the decision from the search decision agent.

        :return (batch_reply, search_indices, observations):
            batch_reply: reply from the search decision agent
            search_indices: batch indices with which to use search.
            observations: modified knowledge agent observations
        """
        search_indices = []
        batch_reply_sdm = [{} for _ in range(len(knowledge_agent_observations))]
        if self.search_decision is SearchDecision.ALWAYS:
            [o.force_set('skip_retrieval', False) for o in knowledge_agent_observations]
            search_indices = list(range(len(knowledge_agent_observations)))
        elif self.search_decision is SearchDecision.NEVER:
            [o.force_set('skip_retrieval', True) for o in knowledge_agent_observations]
        else:
            assert self.search_decision is SearchDecision.COMPUTE
            assert self.search_decision_agent
            batch_reply_sdm = self.search_decision_agent.batch_act(
                [o['search_decision_agent'] for o in observations]
            )
            for i, reply in enumerate(batch_reply_sdm):
                logging.debug(f"Example {i}: {reply['text']}")
                if reply['text'] == self.opt['search_decision_do_search_reply']:
                    search_indices.append(i)
                    knowledge_agent_observations[i].force_set('skip_retrieval', False)
                elif reply['text'] == self.opt['search_decision_dont_search_reply']:
                    knowledge_agent_observations[i].force_set('skip_retrieval', True)
                else:
                    logging.error(
                        f"SDM Reply: {reply['text']}; defaulting to no search"
                    )
                    knowledge_agent_observations[i].force_set('skip_retrieval', True)

        return batch_reply_sdm, search_indices, knowledge_agent_observations
Beispiel #24
0
    def log_metrics(self, setting, step, report):
        """
        Add all metrics from tensorboard_metrics opt key.

        :param setting:
            One of train/valid/test. Will be used as the title for the graph.
        :param step:
            Number of parleys
        :param report:
            The report to log
        """
        for k, v in report.items():
            if isinstance(v, numbers.Number):
                self.writer.add_scalar(f'{k}/{setting}', v, global_step=step)
            elif isinstance(v, Metric):
                self.writer.add_scalar(f'{k}/{setting}', v.value(), global_step=step)
            else:
                logging.error(f'k {k} v {v} is not a number')
    def build_model(self, states=None):
        decoder_variant: DecoderFeedForwardVariant = self.opt[
            'decoder_ffn_variants']
        if decoder_variant == DecoderFeedForwardVariant.ONE:
            decoder_ffn_class = DecoderFFNOne
        elif decoder_variant == DecoderFeedForwardVariant.TWO:
            decoder_ffn_class = DecoderFFNTwo
        else:
            logging.error(
                'Invalid --decoder-ffn-variants option, defaulting to original ffn implementation.'
            )
            decoder_ffn_class = TransformerFFN

        wrapped_class = TransformerGeneratorModel.with_components(
            decoder=TransformerDecoder.with_components(
                layer=TransformerDecoderLayer.with_components(
                    feedforward=decoder_ffn_class)))
        return wrapped_class(opt=self.opt, dictionary=self.dict)
    def train_step(self, batch):
        """
        Train on a single batch of examples.
        """
        self._maybe_invalidate_fixed_encs_cache()
        if batch.text_vec is None and batch.image is None:
            return
        self.model.train()
        self.zero_grad()

        cands, cand_vecs, label_inds = self._build_candidates(
            batch, source=self.candidates, mode='train'
        )
        try:
            scores = self.score_candidates(batch, cand_vecs)
            loss = self.criterion(scores, label_inds)
            self.record_local_metric('mean_loss', AverageMetric.many(loss))
            loss = loss.mean()
            self.backward(loss)
            self.update_params()
        except RuntimeError as e:
            # catch out of memory exceptions during fwd/bck (skip batch)
            if 'out of memory' in str(e):
                logging.error(
                    'Ran out of memory, skipping batch. '
                    'if this happens frequently, decrease batchsize or '
                    'truncate the inputs to the model.'
                )
                return Output()
            else:
                raise e

        # Get train predictions
        if self.candidates == 'batch':
            self._get_batch_train_metrics(scores)
            return Output()
        if not self.opt.get('train_predict', False):
            warn_once(
                "Some training metrics are omitted for speed. Set the flag "
                "`--train-predict` to calculate train metrics."
            )
            return Output()
        return self._get_train_preds(scores, label_inds, cands, cand_vecs)
def profile_interactive(opt, print_parser=None):
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    human_agent = RepeatQueryAgent(opt)
    world = create_task(opt, [human_agent, agent])

    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent.opt
        print_parser.print_args()

    pr = cProfile.Profile()
    pr.enable()

    # Run
    cnt = 0
    while True:
        world.parley()
        if opt.get('display_examples'):
            print("---")
            print(world.display())
        cnt += 1
        if cnt >= opt.get('num_examples', 100):
            break
        if world.epoch_done():
            logging.info("epoch done")
            break

    pr.disable()
    s = io.StringIO()
    sortby = 'cumulative'
    ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
    ps.print_stats()
    print(s.getvalue())
Beispiel #28
0
    def _load_metadata(self, datapath):
        """
        Load metadata.

        Metadata should be saved at <identifier>.metadata
        Metadata should be of the following format:
        {
            'date': <date collected>,
            'opt': <opt used to collect the data,
            'speakers': <identity of speakers>,
            ...
            Other arguments.
        }
        """
        try:
            metadata = Metadata(datapath)
            return metadata
        except RuntimeError:
            logging.error('Metadata does not exist. Please double check your datapath.')
            return None
def safe_interactive_custom(opt, print_parser=None):
    if print_parser is not None:
        if print_parser is True and isinstance(opt, ParlaiParser):
            print_parser = opt
        elif print_parser is False:
            print_parser = None
    if isinstance(opt, ParlaiParser):
        logging.error('interactive should be passed opt not Parser')
        opt = opt.parse_args()

    # Create model and assign it to the specified task
    agent = create_agent(opt, requireModelExists=True)
    if print_parser:
        # Show arguments after loading model
        print_parser.opt = agent.opt
        print_parser.print_args()
    human_agent = SafeLocalHumanAgent(opt)
    world = create_task(opt, [human_agent, agent])

    return world
Beispiel #30
0
def profile(opt):
    if isinstance(opt, ParlaiParser):
        logging.error('profile should be passed opt not Parser')
        opt = opt.parse_args()
    if opt['torch'] or opt['torch_cuda']:
        with torch.autograd.profiler.profile(use_cuda=opt['torch_cuda']) as prof:
            TrainLoop(opt).train()
        print(prof.total_average())

        sort_cpu = sorted(prof.key_averages(), key=lambda k: k.cpu_time)
        sort_cuda = sorted(prof.key_averages(), key=lambda k: k.cuda_time)

        def cpu():
            for e in sort_cpu:
                print(e)

        def cuda():
            for e in sort_cuda:
                print(e)

        cpu()

        if opt['debug']:
            print(
                '`cpu()` prints out cpu-sorted list, '
                '`cuda()` prints cuda-sorted list'
            )

            pdb.set_trace()
    else:
        pr = cProfile.Profile()
        pr.enable()
        TrainLoop(opt).train()
        pr.disable()
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
        if opt['debug']:
            pdb.set_trace()