Beispiel #1
0
 def __init__(self,
              vocab: Vocabulary,
              question_embedder: TextFieldEmbedder,
              action_embedding_dim: int,
              encoder: Seq2SeqEncoder,
              entity_encoder: Seq2VecEncoder,
              decoder_beam_search: BeamSearch,
              max_decoding_steps: int,
              attention: Attention,
              mixture_feedforward: FeedForward = None,
              add_action_bias: bool = True,
              training_beam_size: int = None,
              use_neighbor_similarity_for_linking: bool = False,
              dropout: float = 0.0,
              num_linking_features: int = 10,
              rule_namespace: str = 'rule_labels',
              tables_directory: str = '/wikitables/') -> None:
     use_similarity = use_neighbor_similarity_for_linking
     super().__init__(vocab=vocab,
                      question_embedder=question_embedder,
                      action_embedding_dim=action_embedding_dim,
                      encoder=encoder,
                      entity_encoder=entity_encoder,
                      max_decoding_steps=max_decoding_steps,
                      add_action_bias=add_action_bias,
                      use_neighbor_similarity_for_linking=use_similarity,
                      dropout=dropout,
                      num_linking_features=num_linking_features,
                      rule_namespace=rule_namespace,
                      tables_directory=tables_directory)
     self._beam_search = decoder_beam_search
     self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size)
     self._decoder_step = LinkingTransitionFunction(
         encoder_output_dim=self._encoder.get_output_dim(),
         action_embedding_dim=action_embedding_dim,
         input_attention=attention,
         num_start_types=self._num_start_types,
         predict_start_type_separately=True,
         add_action_bias=self._add_action_bias,
         mixture_feedforward=mixture_feedforward,
         dropout=dropout)
    def __init__(
            self,
            vocab: Vocabulary,
            question_embedder: TextFieldEmbedder,
            action_embedding_dim: int,
            encoder: Seq2SeqEncoder,
            decoder_beam_search: BeamSearch,
            max_decoding_steps: int,
            attention: Attention,
            mixture_feedforward: FeedForward = None,
            add_action_bias: bool = True,
            dropout: float = 0.0,
            num_linking_features: int = 0,
            num_entity_bits: int = 0,
            entity_bits_output: bool = True,
            use_entities: bool = False,
            denotation_only: bool = False,
            # Deprecated parameter to load older models
            entity_encoder: Seq2VecEncoder = None,  # pylint: disable=unused-argument
            entity_similarity_mode: str = "dot_product",
            rule_namespace: str = 'rule_labels') -> None:
        super(QuarelSemanticParser, self).__init__(vocab)
        self._question_embedder = question_embedder
        self._encoder = encoder
        self._beam_search = decoder_beam_search
        self._max_decoding_steps = max_decoding_steps
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._rule_namespace = rule_namespace
        self._denotation_accuracy = Average()
        self._action_sequence_accuracy = Average()
        self._has_logical_form = Average()

        self._embedding_dim = question_embedder.get_output_dim()
        self._use_entities = use_entities

        # Note: there's only one non-trivial entity type in QuaRel for now, so most of the
        # entity_type stuff is irrelevant
        self._num_entity_types = 4  # TODO(mattg): get this in a more principled way somehow?
        self._num_start_types = 1  # Hardcoded until we feed lf syntax into the model
        self._entity_type_encoder_embedding = Embedding(
            self._num_entity_types, self._embedding_dim)
        self._entity_type_decoder_embedding = Embedding(
            self._num_entity_types, action_embedding_dim)

        self._entity_similarity_layer = None
        self._entity_similarity_mode = entity_similarity_mode
        if self._entity_similarity_mode == "weighted_dot_product":
            self._entity_similarity_layer = \
                TimeDistributed(torch.nn.Linear(self._embedding_dim, 1, bias=False))
            # Center initial values around unweighted dot product
            self._entity_similarity_layer._module.weight.data += 1  # pylint: disable=protected-access
        elif self._entity_similarity_mode == "dot_product":
            pass
        else:
            raise ValueError("Invalid entity_similarity_mode: {}".format(
                self._entity_similarity_mode))

        if num_linking_features > 0:
            self._linking_params = torch.nn.Linear(num_linking_features, 1)
        else:
            self._linking_params = None

        self._decoder_trainer = MaximumMarginalLikelihood()

        self._encoder_output_dim = self._encoder.get_output_dim()
        if entity_bits_output:
            self._encoder_output_dim += num_entity_bits

        self._entity_bits_output = entity_bits_output

        self._debug_count = 10

        self._num_denotation_cats = 2  # Hardcoded for simplicity
        self._denotation_only = denotation_only
        if self._denotation_only:
            self._denotation_accuracy_cat = CategoricalAccuracy()
            self._denotation_classifier = torch.nn.Linear(
                self._encoder_output_dim, self._num_denotation_cats)
            # Rest of init not needed for denotation only where no decoding to actions needed
            return

        self._action_padding_index = -1  # the padding value used by IndexField
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        self._num_actions = num_actions
        self._action_embedder = Embedding(num_embeddings=num_actions,
                                          embedding_dim=action_embedding_dim)
        # We are tying the action embeddings used for input and output
        # self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._output_action_embedder = self._action_embedder  # tied weights
        self._add_action_bias = add_action_bias
        if self._add_action_bias:
            self._action_biases = Embedding(num_embeddings=num_actions,
                                            embedding_dim=1)

        # This is what we pass as input in the first step of decoding, when we don't have a
        # previous action, or a previous question attention.
        self._first_action_embedding = torch.nn.Parameter(
            torch.FloatTensor(action_embedding_dim))
        self._first_attended_question = torch.nn.Parameter(
            torch.FloatTensor(self._encoder_output_dim))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_question)

        self._decoder_step = LinkingTransitionFunction(
            encoder_output_dim=self._encoder_output_dim,
            action_embedding_dim=action_embedding_dim,
            input_attention=attention,
            num_start_types=self._num_start_types,
            predict_start_type_separately=False,
            add_action_bias=self._add_action_bias,
            mixture_feedforward=mixture_feedforward,
            dropout=dropout)
Beispiel #3
0
    def __init__(self,
                 question_embedder: TextFieldEmbedder,
                 input_memory_embedder: TextFieldEmbedder,
                 output_memory_embedder: TextFieldEmbedder,
                 question_encoder: Seq2SeqEncoder,
                 input_memory_encoder: Seq2VecEncoder,
                 output_memory_encoder: Seq2VecEncoder,
                 decoder_beam_search: BeamSearch,
                 input_attention: Attention,
                 past_attention: Attention,
                 action_embedding_dim: int,
                 max_decoding_steps: int,
                 nhop: int,
                 decoding_nhop: int,
                 vocab: Vocabulary,
                 dataset_path: str = 'dataset',
                 parse_sql_on_decoding: bool = True,
                 training_beam_size: int = None,
                 add_action_bias: bool = True,
                 decoder_self_attend: bool = True,
                 decoder_num_layers: int = 1,
                 dropout: float = 0.0,
                 rule_namespace: str = 'rule_labels') -> None:
        super().__init__(vocab)

        self.question_embedder = question_embedder
        self._input_mm_embedder = input_memory_embedder
        self._output_mm_embedder = output_memory_embedder
        self._question_encoder = question_encoder
        self._input_mm_encoder = TimeDistributed(input_memory_encoder)
        self._output_mm_encoder = TimeDistributed(output_memory_encoder)

        self.parse_sql_on_decoding = parse_sql_on_decoding
        self._self_attend = decoder_self_attend
        self._max_decoding_steps = max_decoding_steps
        self._add_action_bias = add_action_bias
        self._rule_namespace = rule_namespace
        num_actions = vocab.get_vocab_size(self._rule_namespace)
        if self._add_action_bias:
            input_action_dim = action_embedding_dim + 1
        else:
            input_action_dim = action_embedding_dim
        self._action_embedder = Embedding(num_embeddings=num_actions,
                                          embedding_dim=input_action_dim)
        self._input_action_embedder = Embedding(
            num_embeddings=num_actions, embedding_dim=action_embedding_dim)
        self._output_action_embedder = Embedding(
            num_embeddings=num_actions, embedding_dim=action_embedding_dim)

        self._num_entity_types = 9
        self._entity_type_decoder_input_embedding = Embedding(
            self._num_entity_types, action_embedding_dim)
        self._entity_type_decoder_output_embedding = Embedding(
            self._num_entity_types, action_embedding_dim)

        self._entity_type_encoder_embedding = Embedding(
            self._num_entity_types,
            (int)(question_encoder.get_output_dim() / 2))

        self._decoder_num_layers = decoder_num_layers
        self._action_embedding_dim = action_embedding_dim

        self._ent2ent_ff = FeedForward(action_embedding_dim, 1,
                                       action_embedding_dim,
                                       Activation.by_name('relu')())

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._first_action_embedding = torch.nn.Parameter(
            torch.FloatTensor(action_embedding_dim))
        self._first_attended_utterance = torch.nn.Parameter(
            torch.FloatTensor(question_encoder.get_output_dim()))
        torch.nn.init.normal_(self._first_action_embedding)
        torch.nn.init.normal_(self._first_attended_utterance)

        if self._self_attend:
            self._transition_function = AttendPastSchemaItemsTransitionFunction(
                encoder_output_dim=question_encoder.get_output_dim(),
                action_embedding_dim=action_embedding_dim,
                input_attention=input_attention,
                past_attention=past_attention,
                decoding_nhop=decoding_nhop,
                predict_start_type_separately=False,
                add_action_bias=self._add_action_bias,
                dropout=dropout,
                num_layers=self._decoder_num_layers)
        else:
            self._transition_function = LinkingTransitionFunction(
                encoder_output_dim=question_encoder.get_output_dim(),
                action_embedding_dim=action_embedding_dim,
                input_attention=input_attention,
                predict_start_type_separately=False,
                add_action_bias=self._add_action_bias,
                dropout=dropout,
                num_layers=self._decoder_num_layers)

        self._mm_attn = MemAttn(question_encoder.get_output_dim(), nhop)

        self._beam_search = decoder_beam_search
        self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size)

        self._action_padding_index = -1  # the padding value used by IndexField

        self._exact_match = Average()
        self._sql_evaluator_match = Average()
        self._action_similarity = Average()
        self._acc_single = Average()
        self._acc_multi = Average()
        self._beam_hit = Average()

        # TODO: Remove hard-coded dirs
        self._evaluate_func = partial(
            evaluate,
            db_dir=os.path.join(dataset_path, 'database'),
            table=os.path.join(dataset_path, 'tables.json'),
            check_valid=False)