def __init__(self, vocab: Vocabulary, mydatabase: str, schema_path: str, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._action_similarity = Average() self._valid_sql_query = SqlValidity(mydatabase=mydatabase) self._token_match = TokenSequenceAccuracy() self._kb_match = KnowledgeBaseConstsAccuracy(schema_path=schema_path) self._schema_free_match = GlobalTemplAccuracy(schema_path=schema_path) self._coverage_loss = CoverageAttentionLossMetric() # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, add_action_bias: bool = True, training_beam_size: int = None, decoder_num_layers: int = 1, dropout: float = 0.0, rule_namespace: str = 'rule_labels', database_file='/atis/atis.db') -> None: # Atis semantic parser init super().__init__(vocab) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) if self._add_action_bias: input_action_dim = action_embedding_dim + 1 else: input_action_dim = action_embedding_dim self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._num_entity_types = 2 # TODO(kevin): get this in a more principled way somehow? self._entity_type_decoder_embedding = Embedding(self._num_entity_types, action_embedding_dim) self._decoder_num_layers = decoder_num_layers self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(training_beam_size) self._transition_function = LinkingTransitionFunction(encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, add_action_bias=self._add_action_bias, dropout=dropout, num_layers=self._decoder_num_layers)
def __init__(self, vocab: Vocabulary, question_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, entity_encoder: Seq2VecEncoder, max_decoding_steps: int, use_neighbor_similarity_for_linking: bool = False, dropout: float = 0.0, num_linking_features: int = 10, rule_namespace: str = 'rule_labels', tables_directory: str = '/wikitables/') -> None: super(WikiTablesSemanticParser, self).__init__(vocab) self._question_embedder = question_embedder self._encoder = encoder self._entity_encoder = TimeDistributed(entity_encoder) self._max_decoding_steps = max_decoding_steps self._use_neighbor_similarity_for_linking = use_neighbor_similarity_for_linking if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._denotation_accuracy = WikiTablesAccuracy(tables_directory) self._action_sequence_accuracy = Average() self._has_logical_form = Average() self._action_padding_index = -1 # the padding value used by IndexField num_actions = vocab.get_vocab_size(self._rule_namespace) self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._output_action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=action_embedding_dim) self._action_biases = Embedding(num_embeddings=num_actions, embedding_dim=1) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous question attention. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) self._first_attended_question = torch.nn.Parameter(torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_question) check_dimensions_match(entity_encoder.get_output_dim(), question_embedder.get_output_dim(), "entity word average embedding dim", "question embedding dim") self._num_entity_types = 4 # TODO(mattg): get this in a more principled way somehow? self._num_start_types = 5 # TODO(mattg): get this in a more principled way somehow? self._embedding_dim = question_embedder.get_output_dim() self._type_params = torch.nn.Linear(self._num_entity_types, self._embedding_dim) self._neighbor_params = torch.nn.Linear(self._embedding_dim, self._embedding_dim) if num_linking_features > 0: self._linking_params = torch.nn.Linear(num_linking_features, 1) else: self._linking_params = None if self._use_neighbor_similarity_for_linking: self._question_entity_params = torch.nn.Linear(1, 1) self._question_neighbor_params = torch.nn.Linear(1, 1) else: self._question_entity_params = None self._question_neighbor_params = None
def get_embeddings(embedder_type, vocab, embedding_dim=300, bert_trainable=True): if embedder_type not in valid_embedders: raise Exception(f'Unknown embedder type {embedder_type}') vocab_size = vocab.get_vocab_size('tokens') token_embedders = {} if embedder_type == 'random': token_embedding = Embedding(vocab_size, embedding_dim, trainable=True) token_embedders['tokens'] = token_embedding if embedder_type in ['glove', 'elmo_and_glove']: weights = load_glove_weights(vocab) token_embedding = Embedding(vocab_size, embedding_dim, weight=weights, trainable=True) token_embedders['tokens'] = token_embedding if embedder_type in ['elmo', 'elmo_and_glove']: elmo_token_embedder = ElmoTokenEmbedder( 'embeddings/elmo_2x4096_512_2048cnn_2xhighway_options.json', 'embeddings/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5', do_layer_norm=False, dropout=0.5) token_embedders['elmo'] = elmo_token_embedder if 'bert' in embedder_type: token_embedders['bert'] = BertEmbedder(bert_type=embedder_type, trainable=bert_trainable) word_embeddings = BasicTextFieldEmbedder(token_embedders) return word_embeddings
def __init__(self, config, vocab: Vocabulary = None): super(GLTEmbeddings, self).__init__() self.word_embeddings = Embedding(config.vocab_size, config.hidden_size, padding_index=0, vocab_namespace='tokens') if hasattr(config, 'glove_path') and config.glove_path: assert vocab is not None self.word_embeddings_glove = Embedding.from_vocab_or_file( vocab, 300, pretrained_file=config.glove_path, projection_dim=config.hidden_size, trainable=False) self.word_embeddings_glove._pretrained_file = config.glove_path self.use_glove = bool(config.glove_path) else: self.use_glove = False self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.dropout = nn.Dropout(config.layer_dropout_prob) self.use_position_embeddings = config.use_position_embeddings
def __init__( self, num_heads: int, input_dim: int, attention_dim: int, num_semantic_labels: int, values_dim: int, output_projection_dim: int = None, attention_dropout_prob: float = 0.1, use_semantic_views=True, multi_head_attention_batch_computation=False, use_separate_label_embeddings_for_q_and_k=True, ) -> None: super(MultiHeadSemanticFlatSelfAttention, self).__init__() self.use_semantic_views = use_semantic_views self.use_separate_label_embeddings_for_q_and_k = use_separate_label_embeddings_for_q_and_k self.multi_head_attention_batch_computation = multi_head_attention_batch_computation self._single_head_attention_dim = int(attention_dim / num_heads) self._semantic_label_embedding_q_w = Embedding( num_embeddings=num_semantic_labels, embedding_dim=input_dim * self._single_head_attention_dim) self._semantic_label_embedding_q_b = Embedding( num_embeddings=num_semantic_labels, embedding_dim=self._single_head_attention_dim) self._semantic_label_embedding_k_w = Embedding( num_embeddings=num_semantic_labels, embedding_dim=input_dim * self._single_head_attention_dim) self._semantic_label_embedding_k_b = Embedding( num_embeddings=num_semantic_labels, embedding_dim=self._single_head_attention_dim) self._num_heads = num_heads self._input_dim = input_dim self._output_dim = output_projection_dim or input_dim self._attention_dim = attention_dim self._values_dim = values_dim if attention_dim % num_heads != 0: raise ValueError( f"Key size ({attention_dim}) must be divisible by the number of " f"attention heads ({num_heads}).") if values_dim % num_heads != 0: raise ValueError( f"Value size ({values_dim}) must be divisible by the number of " f"attention heads ({num_heads}).") self._combined_projection = Linear(input_dim, 2 * attention_dim + values_dim) self._values_projection = Linear(input_dim, values_dim) self._scale = (input_dim // num_heads)**0.5 self._output_projection = Linear(values_dim, self._output_dim) self._attention_dropout = Dropout(attention_dropout_prob)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, share_encoder: Seq2VecEncoder = None, private_encoder: Seq2VecEncoder = None, dropout: float = None, input_dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None) -> None: super(JointSentimentClassifier, self).__init__(vocab=vocab, regularizer=regularizer) self._text_field_embedder = text_field_embedder if share_encoder is None and private_encoder is None: share_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) share_encoder = PytorchSeq2SeqWrapper(share_rnn) private_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) private_encoder = PytorchSeq2SeqWrapper(private_rnn) logger.info("Using LSTM as encoder") self._domain_embeddings = Embedding( len(TASKS_NAME), self._text_field_embedder.get_output_dim()) self._share_encoder = share_encoder self._s_domain_discriminator = Discriminator( share_encoder.get_output_dim(), len(TASKS_NAME)) self._p_domain_discriminator = Discriminator( private_encoder.get_output_dim(), len(TASKS_NAME)) # TODO individual valid discriminator self._valid_discriminator = Discriminator( self._domain_embeddings.get_output_dim(), 2) for task in TASKS_NAME: tagger = SentimentClassifier( vocab=vocab, text_field_embedder=self._text_field_embedder, share_encoder=self._share_encoder, private_encoder=copy.deepcopy(private_encoder), s_domain_discriminator=self._s_domain_discriminator, p_domain_discriminator=self._p_domain_discriminator, valid_discriminator=self._valid_discriminator, dropout=dropout, input_dropout=input_dropout, label_smoothing=0.1, initializer=initializer) self.add_module("_tagger_{}".format(task), tagger) logger.info("Multi-Task Learning Model has been instantiated.")
def __init__(self, vocab: Vocabulary, utterance_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, decoder_beam_search: BeamSearch, max_decoding_steps: int, input_attention: Attention, database_file: str, add_action_bias: bool = True, dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self._utterance_embedder = utterance_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._add_action_bias = add_action_bias self._dropout = torch.nn.Dropout(p=dropout) self._exact_match = Average() self._valid_sql_query = Average() self._action_similarity = Average() self._denotation_accuracy = Average() self._executor = SqlExecutor(database_file) # the padding value used by IndexField self._action_padding_index = -1 num_actions = vocab.get_vocab_size("rule_labels") input_action_dim = action_embedding_dim if self._add_action_bias: input_action_dim += 1 self._action_embedder = Embedding(num_embeddings=num_actions, embedding_dim=input_action_dim) self._output_action_embedder = Embedding( num_embeddings=num_actions, embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action, or a previous utterance attention. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) self._first_attended_utterance = torch.nn.Parameter( torch.FloatTensor(encoder.get_output_dim())) torch.nn.init.normal_(self._first_action_embedding) torch.nn.init.normal_(self._first_attended_utterance) self._beam_search = decoder_beam_search self._decoder_trainer = MaximumMarginalLikelihood(beam_size=1) self._transition_function = BasicTransitionFunction( encoder_output_dim=self._encoder.get_output_dim(), action_embedding_dim=action_embedding_dim, input_attention=input_attention, predict_start_type_separately=False, add_action_bias=self._add_action_bias, dropout=dropout) initializer(self)
def __init__( self, left_seq2vec: Seq2VecEncoder = None, # PytorchSeq2VecWrapper right_seq2vec: Seq2VecEncoder = None, # word drop out should be applied before embedding? ff_seq2vecs: FeedForward = None, ff_context: FeedForward = None, # last feed forward layer [vm, vlocal] ff_type: FeedForward = None, initializer: InitializerApplicator = InitializerApplicator( ), # todo how to init normal 0, 0.01 regularizer: Optional[ RegularizerApplicator] = None, # todo how to use this? use_coherence: bool = False, use_type: bool = False, device: str = "cpu", ) -> None: super(EnityLinknigModel, self).__init__({}, regularizer) self.use_coherence = use_coherence self.use_type = use_type if device == "cuda" and torch.cuda.is_available(): self.device = "cuda" else: self.device = "cpu" # context self.left_seq2vec = left_seq2vec self.right_seq2vec = right_seq2vec self.ff_seq2vecs = ff_seq2vecs self.ff_context = ff_context if self.use_coherence: assert ff_context is not None self.coherence_embedder = Embedding(num_embeddings=1561683, embedding_dim=100, sparse=False) self.coherence_embedder_relu = ReLU(inplace=False) # entity self.entity_embedder = Embedding(num_embeddings=614129, embedding_dim=200, sparse=False) # type self.ff_type = ff_type if self.use_type: assert ff_type is not None # losses self.loss_context = CrossEntropyLoss() self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=2) } initializer(self)
def test_auto_regressive_seq_decoder_init(self): decoder_inout_dim = 4 vocab, decoder_net = create_vocab_and_decoder_net(decoder_inout_dim) AutoRegressiveSeqDecoder( vocab, decoder_net, 10, Embedding(vocab.get_vocab_size(), decoder_inout_dim) ) with pytest.raises(ConfigurationError): AutoRegressiveSeqDecoder( vocab, decoder_net, 10, Embedding(vocab.get_vocab_size(), decoder_inout_dim + 1) )
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: float = 0.0, input_dropout: float = 0.0, label_smoothing: float = 0.1, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SentimentClassifier, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder share_rnn = nn.LSTM(input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, # dropout=dropout, bidirectional=True) share_encoder = PytorchSeq2SeqWrapper(share_rnn) self._encoder = RNNEncoder(vocab, share_encoder, input_dropout, regularizer) self._seq_vec = CnnEncoder(self._encoder.get_output_dim(), 25) self._de_dim = len(TASKS_NAME) weight = torch.empty(self._de_dim, self._text_field_embedder.get_output_dim()) torch.nn.init.orthogonal_(weight) self._domain_embeddings = Embedding(self._de_dim, self._text_field_embedder.get_output_dim(), weight=weight) self._de_attention = BilinearAttention(self._seq_vec.get_output_dim(), self._domain_embeddings.get_output_dim()) self._de_feedforward = FeedForward(self._domain_embeddings.get_output_dim(), 1, self._seq_vec.get_output_dim(), Activation.by_name("elu")()) self._num_classes = self.vocab.get_vocab_size("label") self._sentiment_discriminator = Discriminator(self._seq_vec.get_output_dim(), self._num_classes) self._s_domain_discriminator = Discriminator(self._seq_vec.get_output_dim(), len(TASKS_NAME)) self._valid_discriminator = Discriminator(self._domain_embeddings.get_output_dim(), 2) self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._label_smoothing = label_smoothing self.metrics = { "s_domain_acc": CategoricalAccuracy(), "valid_acc": CategoricalAccuracy() } for task_name in TASKS_NAME: self.metrics["{}_stm_acc".format(task_name)] = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._domain_loss = torch.nn.CrossEntropyLoss() # TODO torch.nn.BCELoss self._valid_loss = torch.nn.BCEWithLogitsLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: MTLWeightSharer, tasks: List[AMTask], pos_tag_embedding: Embedding = None, lemma_embedding: Embedding = None, ne_embedding: Embedding = None, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, tok2vec: Optional[TokenToVec] = None) -> None: super(GraphDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.tok2vec = tok2vec self._pos_tag_embedding = pos_tag_embedding or None self._lemma_embedding = lemma_embedding self._ne_embedding = ne_embedding self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() if self._lemma_embedding is not None: representation_dim += lemma_embedding.get_output_dim() if self._ne_embedding is not None: representation_dim += ne_embedding.get_output_dim() assert len(tasks) > 0, "List of tasks must not be empty" self.tasks: Dict[str, AMTask] = {t.name: t for t in tasks} if self.tok2vec: representation_dim += self.tok2vec.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") for t in tasks: t.check_all_dimensions_match(encoder.get_output_dim()) for formalism, task in sorted(self.tasks.items(), key=lambda nt: nt[0]): #sort by name of formalism for consistent ordering self.add_module(formalism, task) initializer(self)
def __init__(self, weight, vocab_size, embedding_dim, rnn_size_in=(1024 + 300, ), rnn_size_out=(1024, ), max_l=600, mlp_d=1024, num_of_class=3, drop_r=0.5, activation_type='relu'): super(Model, self).__init__() self.glove_embd_layer = Embedding(vocab_size, embedding_dim, weight=weight, padding_index=0) options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" num_of_elmo = 1 self.max_l = max_l self.elmo_embd_layer = Elmo(options_file, weight_file, num_of_elmo, dropout=0) self.esim_layer = FastMaxout(rnn_size_in, rnn_size_out, max_l, mlp_d, num_of_class, drop_r, activation_type)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None) -> None: super(Model2, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(num_embeddings=2, embedding_dim=binary_feature_dim) self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary) -> None: super().__init__(vocab) weight = torch.ones(vocab.get_vocab_size(), 10) token_embedding = Embedding( num_embeddings=vocab.get_vocab_size(), embedding_dim=10, weight=weight, trainable=False ) self.embedder = BasicTextFieldEmbedder({"words": token_embedding})
def create_model( vocab: Vocabulary, embedding_dim: int, max_filter_size: int, num_filters: int, output_dim: int, dropout: float, ): model = BasicClassifier( text_field_embedder=BasicTextFieldEmbedder( { "tokens": Embedding( embedding_dim=embedding_dim, trainable=True, vocab=vocab ) } ), seq2vec_encoder=CnnEncoder( ngram_filter_sizes=range(2, max_filter_size), num_filters=num_filters, embedding_dim=embedding_dim, output_dim=output_dim, ), dropout=dropout, vocab=vocab, ) return model
def __init__(self, vocab: Vocabulary, action_embedding_dim: int, text_field_embedder: TextFieldEmbedder = None, dropout: float = 0.0, rule_namespace: str = 'rule_labels', debug: bool=False, regularizer: Optional[RegularizerApplicator] = None) -> None: super(DROPParserBase, self).__init__(vocab=vocab, regularizer=regularizer) self._denotation_accuracy = Average() self._consistency = Average() if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace # This flag turns on the debugging mode which prints a bunch of stuff in self.decode (inside functions as well) self._debug = debug self._action_embedder = Embedding(num_embeddings=vocab.get_vocab_size(self._rule_namespace), embedding_dim=action_embedding_dim, vocab_namespace=self._rule_namespace) self._action_embedding_dim = action_embedding_dim # This is what we pass as input in the first step of decoding, when we don't have a # previous action. self._first_action_embedding = torch.nn.Parameter(torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding, mean=0.0, std=0.001)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, edge_model: graph_dependency_parser.components.edge_models. EdgeModel, loss_function: graph_dependency_parser.components.losses.EdgeLoss, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, validation_evaluator: Optional[ValidationEvaluator] = None ) -> None: super(GraphDependencyParser, self).__init__(vocab, regularizer) self.validation_evaluator = validation_evaluator self.text_field_embedder = text_field_embedder self.encoder = encoder self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) self._head_sentinel = torch.nn.Parameter( torch.randn([1, 1, encoder.get_output_dim()])) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(encoder.get_output_dim(), edge_model.encoder_dim(), "encoder output dim", "input dim edge model") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags corresponding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self) self.edge_model = edge_model self.loss_function = loss_function #Being able to detect what state we are in, probably not the best idea. self.current_epoch = 1 self.pass_over_data_just_started = True
def __init__(self, vocab: Vocabulary, sentence_embedder: TextFieldEmbedder, action_embedding_dim: int, encoder: Seq2SeqEncoder, dropout: float = 0.0, rule_namespace: str = 'rule_labels') -> None: super(NlvrSemanticParser, self).__init__(vocab=vocab) self._sentence_embedder = sentence_embedder self._denotation_accuracy = Average() self._consistency = Average() self._encoder = encoder if dropout > 0: self._dropout = torch.nn.Dropout(p=dropout) else: self._dropout = lambda x: x self._rule_namespace = rule_namespace self._action_embedder = Embedding(num_embeddings=vocab.get_vocab_size( self._rule_namespace), embedding_dim=action_embedding_dim) # This is what we pass as input in the first step of decoding, when we don't have a # previous action. self._first_action_embedding = torch.nn.Parameter( torch.FloatTensor(action_embedding_dim)) torch.nn.init.normal_(self._first_action_embedding)
def build_seq2seq_model(flags, data_reader, vocab: Vocabulary, source_namespace: str = 'source_tokens', target_namespace: str = 'target_tokens') -> Model: source_embedding = Embedding( vocab.get_vocab_size(namespace=source_namespace), embedding_dim=flags.source_embedding_dim) source_embedder = BasicTextFieldEmbedder({'tokens': source_embedding}) lstm_encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(flags.source_embedding_dim, flags.encoder_hidden_dim, batch_first=True, bidirectional=flags.encoder_bidirectional)) attention = DotProductAttention() model = SimpleSeq2Seq(vocab, source_embedder, lstm_encoder, flags.max_decode_length, target_embedding_dim=flags.decoder_hidden_dim, target_namespace=target_namespace, attention=attention, beam_size=flags.beam_size, use_bleu=True) return model
def __init__(self, weight, vocab_size, embedding_dim, rnn_size_in=(1024 + 300, 1024 + 300), rnn_size_out=(300, 300), max_l=150, max_span_l=50, mlp_d=300, num_of_class=3, drop_r=0.5, activation_type='relu', use_extra_lex_feature=True): super(Model, self).__init__() self.glove_embd_layer = Embedding(vocab_size, embedding_dim, weight=weight, padding_index=0) options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" num_of_elmo = 1 self.max_l = max_l self.elmo_embd_layer = Elmo(options_file, weight_file, num_of_elmo, dropout=0) self.esim_layer = ESIM_SENT_WISE(rnn_size_in, rnn_size_out, max_l, max_span_l, mlp_d, num_of_class, drop_r, activation_type) self.use_extra_lex_feature = use_extra_lex_feature
def test_auto_regressive_seq_decoder_forward(self): batch_size, time_steps, decoder_inout_dim = 2, 3, 4 vocab, decoder_net = create_vocab_and_decoder_net(decoder_inout_dim) auto_regressive_seq_decoder = AutoRegressiveSeqDecoder( vocab, decoder_net, 10, Embedding(num_embeddings=vocab.get_vocab_size(), embedding_dim=decoder_inout_dim), ) encoded_state = torch.rand(batch_size, time_steps, decoder_inout_dim) source_mask = torch.ones(batch_size, time_steps).bool() target_tokens = { "tokens": { "tokens": torch.ones(batch_size, time_steps).long() } } source_mask[0, 1:] = False encoder_out = { "source_mask": source_mask, "encoder_outputs": encoded_state } assert auto_regressive_seq_decoder.forward(encoder_out) == {} loss = auto_regressive_seq_decoder.forward(encoder_out, target_tokens)["loss"] assert loss.shape == torch.Size([]) and loss.requires_grad auto_regressive_seq_decoder.eval() assert "predictions" in auto_regressive_seq_decoder.forward( encoder_out)
def test_auto_regressive_seq_decoder_tensor_and_token_based_metric(self): # set all seeds to a fixed value (torch, numpy, etc.). # this enable a deterministic behavior of the `auto_regressive_seq_decoder` # below (i.e., parameter initialization and `encoded_state = torch.randn(..)`) prepare_environment(Params({})) batch_size, time_steps, decoder_inout_dim = 2, 3, 4 vocab, decoder_net = create_vocab_and_decoder_net(decoder_inout_dim) auto_regressive_seq_decoder = AutoRegressiveSeqDecoder( vocab, decoder_net, 10, Embedding(vocab.get_vocab_size(), decoder_inout_dim), tensor_based_metric=BLEU(), token_based_metric=DummyMetric(), ).eval() encoded_state = torch.randn(batch_size, time_steps, decoder_inout_dim) source_mask = torch.ones(batch_size, time_steps).long() target_tokens = {"tokens": torch.ones(batch_size, time_steps).long()} source_mask[0, 1:] = 0 encoder_out = {"source_mask": source_mask, "encoder_outputs": encoded_state} auto_regressive_seq_decoder.forward(encoder_out, target_tokens) assert auto_regressive_seq_decoder.get_metrics()["BLEU"] == 1.388809517005903e-11 assert auto_regressive_seq_decoder.get_metrics()["em"] == 0.0 assert auto_regressive_seq_decoder.get_metrics()["f1"] == 1 / 3
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = { tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE } self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info( f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def use_glove(): embedding_dim = 300 project_dim = 200 train_reader = StanfordSentimentTreeBankDatasetReader() dev_reader = StanfordSentimentTreeBankDatasetReader(use_subtrees=False) train_dataset = train_reader.read('~/nlp/dataset/sst/trees/train.txt') dev_dataset = dev_reader.read('~/nlp/dataset/sst/trees/dev.txt') print( f"total train samples: {len(train_dataset)}, dev samples: {len(dev_dataset)}" ) # 建立词汇表,从数据集中建立 vocab = Vocabulary.from_instances(train_dataset + dev_dataset) vocab_dim = vocab.get_vocab_size('tokens') print("vocab: ", vocab.get_vocab_size('labels'), vocab_dim) glove_embeddings_file = '~/nlp/pretrainedEmbeddings/glove/glove.840B.300d.txt' # If you want to actually load a pretrained embedding file, # you currently need to do that by calling Embedding.from_params() # see https://github.com/allenai/allennlp/issues/2694 token_embedding = Embedding.from_params(vocab=vocab, params=Params({ 'pretrained_file': glove_embeddings_file, 'embedding_dim': embedding_dim, 'trainable': False })) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) print(word_embeddings.get_output_dim()) # use batch_to_ids to convert sentences to character ids sentence_lists = [["I", 'have', 'a', "dog"], ["How", 'are', 'you', ',', 'today', 'is', "Monday"]] sentence_ids = batch_to_ids(sentence_lists, vocab) embeddings = token_embedding(sentence_ids) for sentence in sentence_lists: for text in sentence: indice = vocab.get_token_index(text) print(f"text: {text}, indice: {indice}") # calculate distance based on elmo embedding import scipy tokens = [["dog", "ate", "an", "apple", "for", "breakfast"]] tokens2 = [["cat", "ate", "an", "carrot", "for", "breakfast"]] token_ids = batch_to_ids(tokens, vocab) token_ids2 = batch_to_ids(tokens2, vocab) vectors = token_embedding(token_ids) vectors2 = token_embedding(token_ids2) print('embedding shape ', vectors.shape) print('\nvector ', vectors[0][0], vectors2[0][0]) distance = scipy.spatial.distance.cosine(vectors[0][0], vectors2[0][0]) print(f"embedding distance: {distance}")
def get_model(vocab: Vocabulary) -> CrfTagger: hidden_dimension = 256 layers = 2 bidirectional = True total_embedding_dim = 0 token_embedding = Embedding(num_embeddings=vocab.get_vocab_size("tokens"), embedding_dim=100, trainable=True) total_embedding_dim += 100 params = Params({ "embedding": { "embedding_dim": 16, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 16, "num_filters": 128, "ngram_filter_sizes": [3], "conv_layer_activation": "relu", }, }) char_embedding = TokenCharactersEncoder.from_params(vocab=vocab, params=params) total_embedding_dim += 128 active_embedders = { "tokens": token_embedding, "token_characters": char_embedding, } word_embeddings = BasicTextFieldEmbedder(active_embedders) network = LSTM(total_embedding_dim, hidden_dimension, num_layers=layers, batch_first=True, bidirectional=bidirectional) encoder = PytorchSeq2SeqWrapper(network, stateful=True) # Finally, we can instantiate the model. model = CrfTagger( vocab=vocab, text_field_embedder=word_embeddings, encoder=encoder, label_encoding="BIO", constrain_crf_decoding=True, calculate_span_f1=True, ) return model
def test_auto_regressive_seq_decoder_init(self): decoder_inout_dim = 4 vocab, decoder_net = create_vocab_and_decoder_net(decoder_inout_dim) AutoRegressiveSeqDecoder( vocab, decoder_net, Embedding(num_embeddings=vocab.get_vocab_size(), embedding_dim=decoder_inout_dim), beam_search=Lazy(BeamSearch, constructor_extras={"max_steps": 10}), ) with pytest.raises(ConfigurationError): AutoRegressiveSeqDecoder( vocab, decoder_net, Embedding(num_embeddings=vocab.get_vocab_size(), embedding_dim=decoder_inout_dim + 1), beam_search=Lazy(BeamSearch, constructor_extras={"max_steps": 10}), )
def test_auto_regressive_seq_decoder_indices_to_tokens(self): decoder_inout_dim = 4 vocab, decoder_net = create_vocab_and_decoder_net(decoder_inout_dim) auto_regressive_seq_decoder = AutoRegressiveSeqDecoder( vocab, decoder_net, 10, Embedding(vocab.get_vocab_size(), decoder_inout_dim) ) predictions = torch.tensor([[3, 2, 5, 0, 0], [2, 2, 3, 5, 0]]) tokens_ground_truth = [["B", "A"], ["A", "A", "B"]] predicted_tokens = auto_regressive_seq_decoder.indices_to_tokens(predictions.numpy()) assert predicted_tokens == tokens_ground_truth
def from_params(cls, vocab: Vocabulary, params: Params) -> 'HierarchicalCRF': embedder_params = params.pop('text_field_embedder') text_field_embedder = TextFieldEmbedder.from_params( vocab, embedder_params) duration_embedder = Embedding.from_params( None, params.pop('duration_embedder')) label_embedder = Embedding.from_params(vocab, params.pop('label_embedder')) inner_encoder = Seq2VecEncoder.from_params(params.pop('inner_encoder')) outer_encoder = Seq2SeqEncoder.from_params(params.pop('outer_encoder')) segment_embedder = SegmentEmbedder.from_params( params.pop('segment_embedder')) weight_function = WeightFunction.from_params( params.pop('weight_function')) label_namespace = params.pop('label_namespace', 'labels') max_length = params.pop_int('max_length', None) dropout = params.pop_float('dropout', None) initializer = InitializerApplicator.from_params( params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params( params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, duration_embedder=duration_embedder, label_embedder=label_embedder, inner_encoder=inner_encoder, outer_encoder=outer_encoder, segment_embedder=segment_embedder, weight_function=weight_function, label_namespace=label_namespace, max_length=max_length, dropout=dropout, initializer=initializer, regularizer=regularizer)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, pos_tag_embedding: Embedding = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.0, input_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(BiaffineDependencyParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder encoder_dim = encoder.get_output_dim() self.head_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.child_arc_projection = torch.nn.Linear(encoder_dim, arc_representation_dim) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("head_tags") self.head_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.child_tag_projection = torch.nn.Linear(encoder_dim, tag_representation_dim) self.tag_bilinear = torch.nn.modules.Bilinear(tag_representation_dim, tag_representation_dim, num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") self.use_mst_decoding_for_validation = use_mst_decoding_for_validation tags = self.vocab.get_token_to_index_vocabulary("pos") punctuation_tag_indices = {tag: index for tag, index in tags.items() if tag in POS_TO_IGNORE} self._pos_to_ignore = set(punctuation_tag_indices.values()) logger.info(f"Found POS tags correspoding to the following punctuation : {punctuation_tag_indices}. " "Ignoring words with these POS tags for evaluation.") self._attachment_scores = AttachmentScores() initializer(self)
def build_searnn_decoder(decoder_input_dim, embedder=None, symbols=["A", "B"], **kwargs): vocab, decoder_net, loss_criterion = \ create_vocab_decoder_net_and_criterion(decoder_input_dim, symbols=symbols) embedder = embedder or Embedding(num_embeddings=vocab.get_vocab_size(), embedding_dim=decoder_input_dim) return LMPLReinforceDecoder(vocab, 10, decoder_net, embedder, loss_criterion, **kwargs)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, tag_representation_dim: int, arc_representation_dim: int, tag_feedforward: FeedForward = None, arc_feedforward: FeedForward = None, pos_tag_embedding: Embedding = None, dropout: float = 0.0, input_dropout: float = 0.0, edge_prediction_threshold: float = 0.5, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(GraphParser, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.encoder = encoder self.edge_prediction_threshold = edge_prediction_threshold if not 0 < edge_prediction_threshold < 1: raise ConfigurationError(f"edge_prediction_threshold must be between " f"0 and 1 (exclusive) but found {edge_prediction_threshold}.") encoder_dim = encoder.get_output_dim() self.head_arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = copy.deepcopy(self.head_arc_feedforward) self.arc_attention = BilinearMatrixAttention(arc_representation_dim, arc_representation_dim, use_input_biases=True) num_labels = self.vocab.get_vocab_size("labels") self.head_tag_feedforward = tag_feedforward or \ FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = copy.deepcopy(self.head_tag_feedforward) self.tag_bilinear = BilinearMatrixAttention(tag_representation_dim, tag_representation_dim, label_dim=num_labels) self._pos_tag_embedding = pos_tag_embedding or None self._dropout = InputVariationalDropout(dropout) self._input_dropout = Dropout(input_dropout) representation_dim = text_field_embedder.get_output_dim() if pos_tag_embedding is not None: representation_dim += pos_tag_embedding.get_output_dim() check_dimensions_match(representation_dim, encoder.get_input_dim(), "text field embedding dim", "encoder input dim") check_dimensions_match(tag_representation_dim, self.head_tag_feedforward.get_output_dim(), "tag representation dim", "tag feedforward output dim") check_dimensions_match(arc_representation_dim, self.head_arc_feedforward.get_output_dim(), "arc representation dim", "arc feedforward output dim") self._unlabelled_f1 = F1Measure(positive_label=1) self._arc_loss = torch.nn.BCEWithLogitsLoss(reduction='none') self._tag_loss = torch.nn.CrossEntropyLoss(reduction='none') initializer(self)