def __init__(self, N_word, N_h, N_depth, gpu, use_hs): super().__init__(None) self.N_h = N_h self.gpu = gpu self.use_hs = use_hs self.q_lstm = PytorchSeq2SeqWrapper(nn.LSTM(input_size=N_word, hidden_size=N_h//2, num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)) self.hs_lstm = PytorchSeq2SeqWrapper(nn.LSTM(input_size=N_word, hidden_size=N_h//2, num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)) self.col_lstm = PytorchSeq2SeqWrapper(nn.LSTM(input_size=N_word, hidden_size=N_h//2, num_layers=N_depth, batch_first=True, dropout=0.3, bidirectional=True)) self.q_att = nn.Linear(N_h, N_h) self.hs_att = nn.Linear(N_h, N_h) self.rt_out_q = nn.Linear(N_h, N_h) self.rt_out_hs = nn.Linear(N_h, N_h) self.rt_out_c = nn.Linear(N_h, N_h) self.rt_out = nn.Sequential(nn.Tanh(), nn.Linear(N_h, 2)) #for 2 operators self.softmax = nn.Softmax() #dim=1 self.CE = nn.CrossEntropyLoss() self.log_softmax = nn.LogSoftmax() self.mlsml = nn.MultiLabelSoftMarginLoss() self.bce_logit = nn.BCEWithLogitsLoss() self.sigm = nn.Sigmoid() if gpu: self.cuda()
def test_get_output_dim(self): input_dim = 10 hidden_dim = 15 lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_dim, hidden_dim, bidirectional=True, batch_first=True)) embedder = Seq2SeqEncoderTokenEmbedder(lstm) assert embedder.get_output_dim() == hidden_dim * 2 lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_dim, hidden_dim, bidirectional=False, batch_first=True)) embedder = Seq2SeqEncoderTokenEmbedder(lstm) assert embedder.get_output_dim() == hidden_dim lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(input_dim, hidden_dim, bidirectional=True, batch_first=True)) embedder = Seq2SeqEncoderTokenEmbedder(lstm, 100) assert embedder.get_output_dim() == 100
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, share_encoder: Seq2VecEncoder = None, private_encoder: Seq2VecEncoder = None, dropout: float = None, input_dropout: float = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: RegularizerApplicator = None) -> None: super(JointSentimentClassifier, self).__init__(vocab=vocab, regularizer=regularizer) self._text_field_embedder = text_field_embedder if share_encoder is None and private_encoder is None: share_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) share_encoder = PytorchSeq2SeqWrapper(share_rnn) private_rnn = nn.LSTM( input_size=self._text_field_embedder.get_output_dim(), hidden_size=150, batch_first=True, dropout=dropout, bidirectional=True) private_encoder = PytorchSeq2SeqWrapper(private_rnn) logger.info("Using LSTM as encoder") self._domain_embeddings = Embedding( len(TASKS_NAME), self._text_field_embedder.get_output_dim()) self._share_encoder = share_encoder self._s_domain_discriminator = Discriminator( share_encoder.get_output_dim(), len(TASKS_NAME)) self._p_domain_discriminator = Discriminator( private_encoder.get_output_dim(), len(TASKS_NAME)) # TODO individual valid discriminator self._valid_discriminator = Discriminator( self._domain_embeddings.get_output_dim(), 2) for task in TASKS_NAME: tagger = SentimentClassifier( vocab=vocab, text_field_embedder=self._text_field_embedder, share_encoder=self._share_encoder, private_encoder=copy.deepcopy(private_encoder), s_domain_discriminator=self._s_domain_discriminator, p_domain_discriminator=self._p_domain_discriminator, valid_discriminator=self._valid_discriminator, dropout=dropout, input_dropout=input_dropout, label_smoothing=0.1, initializer=initializer) self.add_module("_tagger_{}".format(task), tagger) logger.info("Multi-Task Learning Model has been instantiated.")
def test_get_dimension_is_correct(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=2, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) assert encoder.get_output_dim() == 14 assert encoder.get_input_dim() == 2 lstm = LSTM(bidirectional=False, num_layers=3, input_size=2, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) assert encoder.get_output_dim() == 7 assert encoder.get_input_dim() == 2
def get_masked_copynet_with_attention(vocab: Vocabulary, max_decoding_steps: int = 20, beam_size: int = 1) -> MaskedCopyNet: word_embeddings = Embedding( num_embeddings=vocab.get_vocab_size("tokens"), embedding_dim=EMB_DIM ) word_embeddings = BasicTextFieldEmbedder({"tokens": word_embeddings}) masker_embeddings = Embedding( num_embeddings=vocab.get_vocab_size("mask_tokens"), embedding_dim=MASK_EMB_DIM ) masker_embeddings = BasicTextFieldEmbedder({"tokens": masker_embeddings}) attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=HID_DIM * 2) mask_attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=MASK_EMB_DIM) lstm = PytorchSeq2SeqWrapper(nn.LSTM(EMB_DIM, HID_DIM, batch_first=True, bidirectional=True)) return MaskedCopyNet( vocab=vocab, embedder=word_embeddings, encoder=lstm, max_decoding_steps=max_decoding_steps, attention=attention, mask_embedder=masker_embeddings, mask_attention=mask_attention, beam_size=beam_size )
def test_rnn_sentence_extractor(self): # Hyperparameters batch_size = 3 num_sents = 5 input_hidden_size = 7 hidden_size = 11 # Setup a model gru = GRU(input_size=input_hidden_size, hidden_size=hidden_size, bidirectional=True, batch_first=True) rnn = PytorchSeq2SeqWrapper(gru) feed_forward = FeedForward(input_dim=hidden_size * 2, num_layers=2, hidden_dims=[10, 1], activations=[Activation.by_name('tanh')(), Activation.by_name('linear')()]) extractor = RNNSentenceExtractor(rnn, feed_forward) # Setup some dummy data sentence_encodings = torch.randn(batch_size, num_sents, input_hidden_size) mask = torch.ones(batch_size, num_sents) # Pass the data through and verify the size of the output extraction_scores = extractor(sentence_encodings, mask) assert extraction_scores.size() == (batch_size, num_sents)
def test_forward_pulls_out_correct_tensor_for_unsorted_batches(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.rand([5, 7, 3]) tensor[0, 3:, :] = 0 tensor[1, 4:, :] = 0 tensor[2, 2:, :] = 0 tensor[3, 6:, :] = 0 mask = torch.ones(5, 7) mask[0, 3:] = 0 mask[1, 4:] = 0 mask[2, 2:] = 0 mask[3, 6:] = 0 input_tensor = Variable(tensor) mask = Variable(mask) sequence_lengths = get_lengths_from_binary_sequence_mask(mask) sorted_inputs, sorted_sequence_lengths, restoration_indices, _ = sort_batch_by_length(input_tensor, sequence_lengths) packed_sequence = pack_padded_sequence(sorted_inputs, sorted_sequence_lengths.data.tolist(), batch_first=True) lstm_output, _ = lstm(packed_sequence) encoder_output = encoder(input_tensor, mask) lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True) assert_almost_equal(encoder_output.data.numpy(), lstm_tensor.index_select(0, restoration_indices).data.numpy())
def test_forward_pulls_out_correct_tensor_with_sequence_lengths(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) input_tensor = torch.rand([5, 7, 3]) input_tensor[1, 6:, :] = 0 input_tensor[2, 4:, :] = 0 input_tensor[3, 2:, :] = 0 input_tensor[4, 1:, :] = 0 mask = torch.ones(5, 7).bool() mask[1, 6:] = False mask[2, 4:] = False mask[3, 2:] = False mask[4, 1:] = False sequence_lengths = get_lengths_from_binary_sequence_mask(mask) packed_sequence = pack_padded_sequence(input_tensor, sequence_lengths.data.tolist(), batch_first=True) lstm_output, _ = lstm(packed_sequence) encoder_output = encoder(input_tensor, mask) lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True) assert_almost_equal(encoder_output.data.numpy(), lstm_tensor.data.numpy())
def __init__(self, input_size: int, hidden_size: int, num_layers: int = 1, dropout: float = 0.0, residual: bool = True, rnn_type: str = "lstm") -> None: super(ResidualRNN, self).__init__() self._input_size = input_size self._hidden_size = hidden_size self._dropout = torch.nn.Dropout(p=dropout) self._residual = residual rnn_type = rnn_type.lower() if rnn_type == "lstm": rnn_cell = torch.nn.LSTM elif rnn_type == "gru": rnn_cell = torch.nn.GRU else: raise ConfigurationError(f"Unknown RNN cell type {rnn_type}") layers = [] for layer_index in range(num_layers): # Use hidden size on later layers so that the first layer projects and all other layers are residual input_ = input_size if layer_index == 0 else hidden_size rnn = rnn_cell(input_, hidden_size, bidirectional=True, batch_first=True) layer = PytorchSeq2SeqWrapper(rnn) layers.append(layer) self.add_module("rnn_layer_{}".format(layer_index), layer) self._layers = layers
def __init__( self, vocab: Vocabulary, embed: TextFieldEmbedder, encoder_size: int, decoder_size: int, num_layers: int, beam_size: int, max_decoding_steps: int, use_bleu: bool = True, initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) self.START, self.END = self.vocab.get_token_index( START_SYMBOL), self.vocab.get_token_index(END_SYMBOL) self.OOV = self.vocab.get_token_index(self.vocab._oov_token) # pylint: disable=protected-access self.PAD = self.vocab.get_token_index(self.vocab._padding_token) # pylint: disable=protected-access self.COPY = self.vocab.get_token_index("@@COPY@@") self.KEEP = self.vocab.get_token_index("@@KEEP@@") self.DROP = self.vocab.get_token_index("@@DROP@@") self.SYMBOL = (self.START, self.END, self.PAD, self.KEEP, self.DROP) self.vocab_size = vocab.get_vocab_size() self.EMB = embed self.emb_size = self.EMB.token_embedder_tokens.output_dim self.encoder_size, self.decoder_size = encoder_size, decoder_size self.FACT_ENCODER = FeedForward(3 * self.emb_size, 1, encoder_size, nn.Tanh()) self.ATTN = AdditiveAttention(encoder_size + decoder_size, encoder_size) self.COPY_ATTN = AdditiveAttention(decoder_size, encoder_size) module = nn.LSTM(self.emb_size, encoder_size // 2, num_layers, bidirectional=True, batch_first=True) self.BUFFER = PytorchSeq2SeqWrapper( module) # BiLSTM to encode draft text self.STREAM = nn.LSTMCell(2 * encoder_size, decoder_size) # Store revised text self.BEAM = BeamSearch(self.END, max_steps=max_decoding_steps, beam_size=beam_size) self.U = nn.Sequential(nn.Linear(2 * encoder_size, decoder_size), nn.Tanh()) self.ADD = nn.Sequential(nn.Linear(self.emb_size, encoder_size), nn.Tanh()) self.P = nn.Sequential( nn.Linear(encoder_size + decoder_size, decoder_size), nn.Tanh()) self.W = nn.Linear(decoder_size, self.vocab_size) self.G = nn.Sequential(nn.Linear(decoder_size, 1), nn.Sigmoid()) initializer(self) self._bleu = BLEU( exclude_indices=set(self.SYMBOL)) if use_bleu else None
def trainModel(train_dataset, validation_dataset, vocab): EMBEDDING_DIM = 6 HIDDEN_DIM = 6 token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, bidirectional=False, batch_first=True)) model = LstmTagger(word_embeddings, lstm, vocab) if torch.cuda.is_available(): cuda_device = 0 model = model.cuda(cuda_device) else: cuda_device = -1 # optimizer = optim.AdamW(model.parameters(), lr=1e-4, eps=1e-8) optimizer = optim.SGD(model.parameters(), lr=0.1) iterator = BucketIterator(batch_size=2, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, patience=10, num_epochs=100, cuda_device=cuda_device) trainer.train() return model
def prepare1(): """ First part of preparing data for training :return: biLSTM model object, biLSTM vocabulary, data for training, data for validation, cuda biLSTM object, biLSTM reader object """ reader = PosDatasetReader() train_dataset = reader.read(train_path) validation_dataset = reader.read(validation_path) vocab = Vocabulary.from_instances(train_dataset + validation_dataset) EMBEDDING_DIM = 200 HIDDEN_DIM = 200 token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)) model = LstmTagger(word_embeddings, lstm, vocab) if torch.cuda.is_available(): cuda_device = 0 model = model.cuda(cuda_device) else: cuda_device = -1 return model, vocab, train_dataset, validation_dataset, cuda_device, reader
def running_NER(): reader = PosDatasetReader() train_dataset = reader.read('../data/700_multi_data/600_ner_train.txt') validation_dataset = reader.read('../data/700_multi_data/66_ner_test.txt') vocab = Vocabulary.from_files("../model_store/vocabulary") # '''vocab part''' # train_1 = reader.read('../data/train/train.json') # train_2 = reader.read('../data/train/dev.json') token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True)) model = LstmTagger(word_embeddings, lstm, vocab) optimizer = optim.SGD(model.parameters(), lr=0.1) iterator = BucketIterator(batch_size=2, sorting_keys=[("sentence", "num_tokens")]) iterator.index_with(vocab) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, patience=10, num_epochs=1000) trainer.train()
def test_forward_pulls_out_correct_tensor_without_sequence_lengths(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=2, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) input_tensor = Variable(torch.FloatTensor([[[.7, .8], [.1, 1.5]]])) lstm_output = lstm(input_tensor) encoder_output = encoder(input_tensor, None) assert_almost_equal(encoder_output.data.numpy(), lstm_output[0].data.numpy())
def test_forward_works_even_with_empty_sequences(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) tensor = torch.autograd.Variable(torch.rand([5, 7, 3])) tensor[1, 6:, :] = 0 tensor[2, :, :] = 0 tensor[3, 2:, :] = 0 tensor[4, :, :] = 0 mask = torch.autograd.Variable(torch.ones(5, 7)) mask[1, 6:] = 0 mask[2, :] = 0 mask[3, 2:] = 0 mask[4, :] = 0 results = encoder.forward(tensor, mask) for i in (0, 1, 3): assert not (results[i] == 0.).data.all() for i in (2, 4): assert (results[i] == 0.).data.all()
def create_seq2seqmodel(vocab, src_embedders, tgt_embedders, hidden_dim=100, num_layers=1, encoder=None, max_decoding_steps=20, beam_size=1, use_bleu=True, device=0): encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(src_embedders.get_output_dim(), hidden_dim, batch_first=True)) model = SimpleSeq2Seq(vocab, src_embedders, encoder, max_decoding_steps, target_namespace="target_tokens", target_embedding_dim=tgt_embedders.get_output_dim(), beam_size=beam_size, use_bleu=use_bleu) # encoder = BartEncoder('facebook/bart-base', use_pretrained_embeddings=True) # encoder = PretrainedTransformerEmbedder(model_name='facebook/bart-base', sub_module="encoder") # model = Bart(model_name='facebook/bart-base', vocab=vocab, max_decoding_steps=max_decoding_steps, # beam_size=beam_size, encoder=encoder) model.to(device) return model
def create_seq2seqmodel(vocab, src_embedders, tgt_embedders, hidden_dim=100, num_layers=1, max_decoding_steps=20, beam_size=1, use_bleu=True, device=0): encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(src_embedders.get_output_dim(), hidden_dim, batch_first=True)) decoder_net = LstmCellDecoderNet( decoding_dim=encoder.get_output_dim(), target_embedding_dim=tgt_embedders.get_output_dim()) decoder = AutoRegressiveSeqDecoder(vocab, decoder_net, max_decoding_steps, tgt_embedders, beam_size=beam_size) model = ComposedSeq2Seq(vocab, src_embedders, encoder, decoder) # model = SimpleSeq2Seq(vocab, src_embedders, encoder, max_decoding_steps, target_namespace="target_tokens", # target_embedding_dim=tgt_embedders.get_output_dim(), beam_size=beam_size, # use_bleu=use_bleu) model.to(device) return model
def test_wrapper_stateful(self): lstm = LSTM(bidirectional=True, num_layers=2, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm, stateful=True) # To test the stateful functionality we need to call the encoder multiple times. # Different batch sizes further tests some of the logic. batch_sizes = [5, 10, 8] sequence_lengths = [4, 6, 7] states = [] for batch_size, sequence_length in zip(batch_sizes, sequence_lengths): tensor = Variable(torch.rand([batch_size, sequence_length, 3])) mask = Variable(torch.ones(batch_size, sequence_length)) mask.data[0, 3:] = 0 encoder_output = encoder(tensor, mask) states.append(encoder._states) # pylint: disable=protected-access # Check that the output is masked properly. assert_almost_equal(encoder_output[0, 3:, :].data.numpy(), numpy.zeros((4, 14))) for k in range(2): assert_almost_equal(states[-1][k][:, -2:, :].data.numpy(), states[-2][k][:, -2:, :].data.numpy())
def build_seq2seq_model(flags, data_reader, vocab: Vocabulary, source_namespace: str = 'source_tokens', target_namespace: str = 'target_tokens') -> Model: source_embedding = Embedding( vocab.get_vocab_size(namespace=source_namespace), embedding_dim=flags.source_embedding_dim) source_embedder = BasicTextFieldEmbedder({'tokens': source_embedding}) lstm_encoder = PytorchSeq2SeqWrapper( torch.nn.LSTM(flags.source_embedding_dim, flags.encoder_hidden_dim, batch_first=True, bidirectional=flags.encoder_bidirectional)) attention = DotProductAttention() model = SimpleSeq2Seq(vocab, source_embedder, lstm_encoder, flags.max_decode_length, target_embedding_dim=flags.decoder_hidden_dim, target_namespace=target_namespace, attention=attention, beam_size=flags.beam_size, use_bleu=True) return model
def generate_res_file(): reader = PosDatasetReader() vocab = Vocabulary.from_files("../model_store/vocabulary") token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True)) model2 = LstmTagger(word_embeddings, lstm, vocab) with open("../model_store/model.th", 'rb') as f: model2.load_state_dict(torch.load(f)) predictor2 = SentenceTaggerPredictor(model2, dataset_reader=reader) train_read_file = open('../data/only_sentence/raw_test.json', 'r') train_write_file = open('../data/only_sentence/ner_test.json', 'w') for line in train_read_file: tag_logits2 = predictor2.predict( line.replace('.', '').replace(',', '').replace('\n', ''))['tag_logits'] tag_ids = np.argmax(tag_logits2, axis=-1) res = [model2.vocab.get_token_from_index(i, 'labels') for i in tag_ids] for i in range(len(res)): train_write_file.write(res[i] + ' ') # train_write_file.write(str(tag_logits2)) train_write_file.write('\n') train_write_file.flush() train_read_file.close() train_write_file.close() print('finish') # generate_res_file()
def get_encoder(input_dim, output_dim, encoder_type, args): if encoder_type == "pass": return PassThroughEncoder(input_dim) if encoder_type == "bilstm": return PytorchSeq2SeqWrapper( AllenNLPSequential(torch.nn.ModuleList( [get_encoder(input_dim, output_dim, "bilstm-unwrapped", args)]), input_dim, output_dim, bidirectional=True, residual_connection=args.residual_connection, dropout=args.dropout)) if encoder_type == "bilstm-unwrapped": return torch.nn.LSTM( input_dim, output_dim, batch_first=True, bidirectional=True, dropout=args.dropout, ) if encoder_type == "self_attention": return IntraSentenceAttentionEncoder(input_dim=input_dim, projection_dim=output_dim) if encoder_type == "stacked_self_attention": return StackedSelfAttentionEncoder( input_dim=input_dim, hidden_dim=output_dim, projection_dim=output_dim, feedforward_hidden_dim=output_dim, num_attention_heads=5, num_layers=3, dropout_prob=args.dropout, ) raise RuntimeError(f"Unknown encoder type={encoder_type}")
def __init__(self, embedder: TextFieldEmbedder, hidden_dim: int, latent_dim: int, vocab: Vocabulary, device: torch.device, word_dropout_rate: float = 0.2, anneal_steps: int = 500, embedding_dropout_rate: float = 0.0): super().__init__(vocab) self.embedder = embedder self.embedding_dim = embedder.get_output_dim() self.hidden_dim = hidden_dim self.latent_dim = latent_dim self.vocab = vocab self.label_size = self.vocab.get_vocab_size("class_labels") self.device = device self.word_dropout_rate = word_dropout_rate self.anneal_steps = anneal_steps self.embedding_dropout = nn.Dropout(embedding_dropout_rate) self.encoder_rnn = PytorchSeq2SeqWrapper( torch.nn.GRU(self.embedding_dim, self.hidden_dim, num_layers=1, batch_first=True)) self.decoder_rnn = PytorchSeq2SeqWrapper( torch.nn.GRU(self.embedding_dim, self.hidden_dim, num_layers=1, batch_first=True)) self.hidden2mean = nn.Linear(self.hidden_dim, self.latent_dim) self.hidden2log_var = nn.Linear(self.hidden_dim, self.latent_dim) self.latent2hidden = nn.Linear(self.latent_dim + self.label_size, self.hidden_dim) self.outputs2vocab = nn.Linear(self.hidden_dim, self.vocab.get_vocab_size()) self.metrics = {} self.step = 0
def get_wrapped_encoder(encoder_list): return PytorchSeq2SeqWrapper( AllenNLPSequential(torch.nn.ModuleList(encoder_list), elmo_embedding_dim, hidden_dim, bidirectional=True, residual_connection=residual_connection, dropout=dropout))
def test_wrapper_raises_if_batch_first_is_false(self): with pytest.raises(ConfigurationError): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7) _ = PytorchSeq2SeqWrapper(lstm)
def multitask_learning(): # load datasetreader # Save logging to a local file # Multitasking log.getLogger().addHandler(log.FileHandler(directory+"/log.log")) lr = 0.00001 batch_size = 2 epochs = 10 max_seq_len = 512 max_span_width = 30 #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,) token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False) conll_reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer}) swag_reader = SWAGDatasetReader(tokenizer=token_indexer.wordpiece_tokenizer,lazy=True, token_indexers=token_indexer) EMBEDDING_DIM = 1024 HIDDEN_DIM = 200 conll_datasets, swag_datasets = load_datasets(conll_reader, swag_reader, directory) conll_vocab = Vocabulary() swag_vocab = Vocabulary() conll_iterator = BasicIterator(batch_size=batch_size) conll_iterator.index_with(conll_vocab) swag_vocab = Vocabulary() swag_iterator = BasicIterator(batch_size=batch_size) swag_iterator.index_with(swag_vocab) from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",top_layer_only=True, requires_grad=True) word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True) BERT_DIM = word_embedding.get_output_dim() seq2seq = PytorchSeq2SeqWrapper(torch.nn.LSTM(BERT_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)) seq2vec = PytorchSeq2VecWrapper(torch.nn.LSTM(BERT_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)) mention_feedforward = FeedForward(input_dim = 2336, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU()) antecedent_feedforward = FeedForward(input_dim = 7776, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU()) model1 = CoreferenceResolver(vocab=conll_vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2) model2 = SWAGExampleModel(vocab=swag_vocab, text_field_embedder=word_embedding, phrase_encoder=seq2vec) optimizer1 = optim.Adam(model1.parameters(), lr=lr) optimizer2 = optim.Adam(model2.parameters(), lr=lr) swag_train_iterator = swag_iterator(swag_datasets[0], num_epochs=1, shuffle=True) conll_train_iterator = conll_iterator(conll_datasets[0], num_epochs=1, shuffle=True) swag_val_iterator = swag_iterator(swag_datasets[1], num_epochs=1, shuffle=True) conll_val_iterator:q = conll_iterator(conll_datasets[1], num_epochs=1, shuffle=True) task_infos = {"swag": {"model": model2, "optimizer": optimizer2, "loss": 0.0, "iterator": swag_iterator, "train_data": swag_datasets[0], "val_data": swag_datasets[1], "num_train": len(swag_datasets[0]), "num_val": len(swag_datasets[1]), "lr": lr, "score": {"accuracy":0.0}}, \ "conll": {"model": model1, "iterator": conll_iterator, "loss": 0.0, "val_data": conll_datasets[1], "train_data": conll_datasets[0], "optimizer": optimizer1, "num_train": len(conll_datasets[0]), "num_val": len(conll_datasets[1]),"lr": lr, "score": {"coref_prediction": 0.0, "coref_recall": 0.0, "coref_f1": 0.0,"mention_recall": 0.0}}} USE_GPU = 1 trainer = MultiTaskTrainer( task_infos=task_infos, num_epochs=epochs, serialization_dir=directory + "saved_models/multitask/" ) metrics = trainer.train()
def __init__(self, input_dim: int, hidden_dim: int, num_layers: int = 2, bias: bool = True, dropout: float = 0.0, bidirectional: bool = False, maxout: bool = False) -> None: super().__init__() self._input_dim = input_dim self._hidden_dim = hidden_dim self._num_layers = num_layers self._maxout = maxout self._num_directions = 2 if bidirectional else 1 self._lstm_layers = [ PytorchSeq2SeqWrapper( torch.nn.LSTM( input_dim, hidden_dim, num_layers=1, bias=bias, dropout=dropout, bidirectional=bidirectional, batch_first=True, )) ] if self._num_layers > 1: for _ in range(1, self._num_layers): self._lstm_layers.append( PytorchSeq2SeqWrapper( torch.nn.LSTM( self._num_directions * hidden_dim, hidden_dim, num_layers=1, bias=bias, dropout=dropout, bidirectional=bidirectional, batch_first=True, ))) for i, lstm_layer in enumerate(self._lstm_layers): self.add_module('lstm_layer_%d' % i, lstm_layer)
def gru_seq2seq(input_dim: int, output_dim: int, num_layers: int = 1, bidirectional: bool = False, dropout: float = 0.0 ) -> Seq2SeqEncoder: """ Our encoder is going to be an LSTM. We have to wrap it for AllenNLP, though. """ return PytorchSeq2SeqWrapper(torch.nn.GRU( input_dim, output_dim, batch_first=True, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout))
def create_model(vocab): # prepare model EMBEDDING_DIM = 100 token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) HIDDEN_DIM = 100 lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True)) model = LstmTagger(word_embeddings, lstm, vocab) return model
def test_forward_does_not_compress_tensors_padded_to_greater_than_the_max_sequence_length(self): lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True) encoder = PytorchSeq2SeqWrapper(lstm) input_tensor = torch.rand([5, 8, 3]) input_tensor[:, 7, :] = 0 mask = torch.ones(5, 8) mask[:, 7] = 0 encoder_output = encoder(input_tensor, mask) assert encoder_output.size(1) == 8
def get_model(vocab: Vocabulary) -> CrfTagger: hidden_dimension = 256 layers = 2 bidirectional = True total_embedding_dim = 0 token_embedding = Embedding(num_embeddings=vocab.get_vocab_size("tokens"), embedding_dim=100, trainable=True) total_embedding_dim += 100 params = Params({ "embedding": { "embedding_dim": 16, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 16, "num_filters": 128, "ngram_filter_sizes": [3], "conv_layer_activation": "relu", }, }) char_embedding = TokenCharactersEncoder.from_params(vocab=vocab, params=params) total_embedding_dim += 128 active_embedders = { "tokens": token_embedding, "token_characters": char_embedding, } word_embeddings = BasicTextFieldEmbedder(active_embedders) network = LSTM(total_embedding_dim, hidden_dimension, num_layers=layers, batch_first=True, bidirectional=bidirectional) encoder = PytorchSeq2SeqWrapper(network, stateful=True) # Finally, we can instantiate the model. model = CrfTagger( vocab=vocab, text_field_embedder=word_embeddings, encoder=encoder, label_encoding="BIO", constrain_crf_decoding=True, calculate_span_f1=True, ) return model