Beispiel #1
0
 def add_cmdline_args(cls, argparser):
     """
     Add CLI args.
     """
     TorchRankerAgent.add_cmdline_args(argparser)
     arg_group = argparser.add_argument_group('ExampleBagOfWordsModel Arguments')
     arg_group.add_argument('--hidden-dim', type=int, default=512)
Beispiel #2
0
 def add_cmdline_args(argparser):
     """
     Add command-line arguments specifically for this agent.
     """
     TorchRankerAgent.add_cmdline_args(argparser)
     parser = argparser.add_argument_group('KnowledgeRetriever Arguments')
     parser.add_argument('--retriever-model-file',
                         type=str,
                         default=RETRIEVER_FILE)
     parser.add_argument('--selector-model-file',
                         type=str,
                         default=SELECTOR_FILE)
     parser.add_argument(
         '--num-retrieved',
         type=int,
         default=7,
         help='how many passages to retrieve for each category',
     )
     parser.add_argument(
         '--add-token-knowledge',
         type='bool',
         default=False,
         help='Add knowledge token to retrieved knowledge',
     )
     parser.add_argument('--debug', type='bool', default=False)
     return parser
Beispiel #3
0
 def add_cmdline_args(argparser):
     arg_group = argparser.add_argument_group('MemNN Arguments')
     arg_group.add_argument(
         '-esz', '--embedding-size', type=int, default=128,
         help='size of token embeddings')
     arg_group.add_argument(
         '-hops', '--hops', type=int, default=3,
         help='number of memory hops')
     arg_group.add_argument(
         '--memsize', type=int, default=32,
         help='size of memory, set to 0 for "nomemnn" model which just '
              'embeds query and candidates and picks most similar candidate')
     arg_group.add_argument(
         '-tf', '--time-features', type='bool', default=True,
         help='use time features for memory embeddings')
     arg_group.add_argument(
         '-pe', '--position-encoding', type='bool', default=False,
         help='use position encoding instead of bag of words embedding')
     argparser.set_defaults(
         split_lines=True,
         add_p1_after_newln=True,
     )
     TorchRankerAgent.add_cmdline_args(argparser)
     MemnnAgent.dictionary_class().add_cmdline_args(argparser)
     return arg_group
Beispiel #4
0
def add_common_args(parser):
    """Add command line arguments for this agent."""
    TorchRankerAgent.add_cmdline_args(parser)
    parser = parser.add_argument_group('Bert Ranker Arguments')
    parser.add_argument('--add-transformer-layer', type='bool', default=False,
                        help='Also add a transformer layer on top of Bert')
    parser.add_argument('--pull-from-layer', type=int, default=-1,
                        help='Which layer of Bert do we use? Default=-1=last one.')
    parser.add_argument('--out-dim', type=int, default=768,
                        help='For biencoder, output dimension')
    parser.add_argument('--topn', type=int, default=10,
                        help='For the biencoder: select how many elements to return')
    parser.add_argument('--data-parallel', type='bool', default=False,
                        help='use model in data parallel, requires '
                        'multiple gpus. NOTE This is incompatible'
                        ' with distributed training')
    parser.add_argument('--type-optimization', type=str,
                        default='all_encoder_layers',
                        choices=[
                            'additional_layers',
                            'top_layer',
                            'top4_layers',
                            'all_encoder_layers',
                            'all'],
                        help='Which part of the encoders do we optimize. '
                             '(Default: all_encoder_layers.)')
    parser.set_defaults(
        label_truncate=300,
        text_truncate=300,
        learningrate=0.00005,
        eval_candidates='inline',
        candidates='batch',
    )
    def encode_passages(
        self, agent: TorchRankerAgent, passages: List[Tuple[str, str, str]]
    ) -> Tuple[torch.Tensor, List[str]]:
        """
        Encode passages with model, using candidate encoder.

        :param agent:
            parlai agent
        :param passages:
            passages to encode

        :return encodings:
            return passage encodings
        """
        agent.model.eval()
        n = len(passages)
        bsz = self.opt['batchsize']
        total_enc = 0
        results: List[torch.Tensor] = []
        results_ids: List[str] = []

        for batch_start in range(0, n, bsz):
            batch_passages = passages[batch_start : batch_start + bsz]
            batch_msgs = [
                Message(
                    {
                        "text_vec": agent._check_truncate(
                            agent.dict.txt2vec(f'{title} {text}'), agent.text_truncate
                        )
                    }
                )
                for _, text, title in batch_passages
            ]
            # we call batchify here rather than _pad_tensor directly.
            batch = agent.batchify(batch_msgs)
            if self.use_cuda:
                batch = batch.to('cuda')
            with torch.no_grad():
                if isinstance(agent, TransformerRankerAgent):
                    _, encoding = agent.model(None, None, batch.text_vec)
                else:
                    assert isinstance(agent, DropoutPolyAgent) or isinstance(
                        agent, PolyencoderAgent
                    )
                    _, _, encoding = agent.model(
                        cand_tokens=batch.text_vec.unsqueeze(1)
                    )
                    encoding = encoding.squeeze(1)

            ids = [r[0] for r in batch_passages]
            assert len(ids) == encoding.size(0)
            results.append(encoding.cpu())
            results_ids += ids

            total_enc += len(ids)
            if total_enc % (10 * bsz) == 0:
                logging.info(f'Encoded {total_enc} out of {n} passages')

        return torch.cat(results).cpu(), results_ids
Beispiel #6
0
 def add_cmdline_args(cls, argparser):
     """
     Add CLI args.
     """
     TorchRankerAgent.add_cmdline_args(argparser)
     arg_group = argparser.add_argument_group('DualEncoder Arguments')
     arg_group.add_argument('--hiddensize', type=int, default=512)
     arg_group.add_argument('--embedding_size', type=int, default=128)
     arg_group.add_argument('--num_layers', type=int, default=1)
     arg_group.add_argument('--dropout', type=float, default=0)
Beispiel #7
0
def add_common_args(parser):
    """Add command line arguments for this agent."""
    TorchRankerAgent.add_cmdline_args(parser)
    parser = parser.add_argument_group('Bert Ranker Arguments')
    parser.add_argument('--pretrained-bert-path',
                        type=str,
                        default=None,
                        required="true",
                        help="path to the tgz of the pretrained model.\n"
                        "See: https://github.com/huggingface/"
                        "pytorch-pretrained-BERT")
    parser.add_argument('--bert-vocabulary-path',
                        type=str,
                        default=None,
                        required="true",
                        help="path to the vocabulary file\n"
                        "See: https://github.com/huggingface/"
                        "pytorch-pretrained-BERT")
    parser.add_argument('--add-transformer-layer',
                        type="bool",
                        default=False,
                        help="Also add a transformer layer on top of Bert")
    parser.add_argument(
        '--pull-from-layer',
        type=int,
        default=-1,
        help="Which layer of Bert do we use? Default=-1=last one.")
    parser.add_argument('--out-dim',
                        type=int,
                        default=768,
                        help="For biencoder, output dimension")
    parser.add_argument(
        '--topn',
        type=int,
        default=10,
        help="For the biencoder: select how many elements to return")
    parser.add_argument('--data-parallel',
                        type='bool',
                        default=False,
                        help='use model in data parallel, requires '
                        'multiple gpus. NOTE This is incompatible'
                        ' with distributed training')
    parser.add_argument(
        '--type-optimization',
        type=str,
        default="additional_layers",
        choices=[
            "additional_layers", "top_layer", "top4_layers",
            "all_encoder_layers", "all"
        ],
        help=
        "Which part of the encoders do we optimize. (Default: the top one.)")
Beispiel #8
0
 def vectorize(self, *args, **kwargs):
     """
     Overrides BiencoderAgent to not add start/end.
     """
     kwargs['add_start'] = False
     kwargs['add_end'] = False
     return TorchRankerAgent.vectorize(self, *args, **kwargs)
Beispiel #9
0
 def vectorize(self, *args, **kwargs):
     """ Add the start and end token to the text.
     """
     kwargs['add_start'] = True
     kwargs['add_end'] = True
     obs = TorchRankerAgent.vectorize(self, *args, **kwargs)
     return obs
Beispiel #10
0
    def __init__(self, opt: Opt, shared=None):
        super().__init__(opt, shared)
        self.multiobjective_losses = opt['multiobjective_loss'].split(',')
        assert all(l in VALID_MULTIOBJ_LOSSES for l in self.multiobjective_losses)
        self.partial_loss_threshold = opt['partial_output_loss_threshold']
        self.sliced_loss_threshold = opt['sliced_output_loss_threshold']
        self.crossed_partial_loss_threshold = self.partial_loss_threshold < 0
        self.crossed_sliced_loss_threshold = self.sliced_loss_threshold < 0

        if shared:
            self.multiobj_criterion = shared['multiobj_criterion']
        else:
            self.multiobj_criterion = TorchRankerAgent.build_criterion(self)
Beispiel #11
0
    def add_cmdline_args(argparser):
        TorchRankerAgent.add_cmdline_args(argparser)
        agent = argparser.add_argument_group("MacNet Arguments")

        agent.add_argument("-dim",
                           "--dimension",
                           type=int,
                           default=512,
                           help="Dimension for all layers")
        agent.add_argument("-nrh",
                           "--num-reasoning-hops",
                           type=int,
                           default=12,
                           help="Number of reasoning hops")
        agent.add_argument("-mtt",
                           "--mac-to-tensorboard",
                           type=bool,
                           default=False,
                           help="Save MAC Cells weights to tensorboard")

        MacNetAgent.dictionary_class().add_cmdline_args(argparser)

        return agent
 def _setup_parser(self) -> Opt:
     parser = ParlaiParser(True, True)
     parser = RPAReranker.add_cmdline_args(parser, {})
     parser = TorchRankerAgent.add_cmdline_args(parser, {})
     opt = parser.parse_args(['--predictor-model-file', RPA_RERANKER])
     return opt