def test_sql_accuracy_is_scored_correctly(self): sql_query_label = ("( SELECT airport_service . airport_code " "FROM airport_service " "WHERE airport_service . city_code IN ( " "SELECT city . city_code FROM city " "WHERE city.city_name = 'BOSTON' ) ) ;") executor = SqlExecutor(self._database_file) postprocessed_sql_query_label = executor.postprocess_query_sqlite( sql_query_label) # If the predicted query and the label are the same, then we should get 1. assert executor.evaluate_sql_query( postprocessed_sql_query_label, [postprocessed_sql_query_label]) == 1 predicted_sql_query = ("( SELECT airport_service . airport_code " "FROM airport_service " "WHERE airport_service . city_code IN ( " "SELECT city . city_code FROM city " "WHERE city.city_name = 'SEATTLE' ) ) ;") postprocessed_predicted_sql_query = executor.postprocess_query_sqlite( predicted_sql_query) # If the predicted query and the label are different we should get 0. assert executor.evaluate_sql_query( postprocessed_predicted_sql_query, [postprocessed_sql_query_label]) == 0
def __init__( self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = "rule_labels", database_file="/atis/atis.db", ) -> None: # Atis semantic parser init super().__init__(vocab) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._num_entity_types = 2 # TODO(kevin): get this in a more principled way somehow? self._entity_type_decoder_embedding = Embedding( num_embeddings=self._num_entity_types, embedding_dim=action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._transition_function = LinkingTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers, )