def __init__(self, N_word, N_h, N_depth, gpu, use_hs):
        super().__init__(None)
        self.N_h = N_h
        self.gpu = gpu
        self.use_hs = use_hs

        self.q_lstm = PytorchSeq2SeqWrapper(nn.LSTM(input_size=N_word, hidden_size=N_h//2,
                num_layers=N_depth, batch_first=True,
                dropout=0.3, bidirectional=True))

        self.hs_lstm = PytorchSeq2SeqWrapper(nn.LSTM(input_size=N_word, hidden_size=N_h//2,
                num_layers=N_depth, batch_first=True,
                dropout=0.3, bidirectional=True))

        self.col_lstm = PytorchSeq2SeqWrapper(nn.LSTM(input_size=N_word, hidden_size=N_h//2,
                num_layers=N_depth, batch_first=True,
                dropout=0.3, bidirectional=True))

        self.q_att = nn.Linear(N_h, N_h)
        self.hs_att = nn.Linear(N_h, N_h)
        self.rt_out_q = nn.Linear(N_h, N_h)
        self.rt_out_hs = nn.Linear(N_h, N_h)
        self.rt_out_c = nn.Linear(N_h, N_h)
        self.rt_out = nn.Sequential(nn.Tanh(), nn.Linear(N_h, 2)) #for 2 operators

        self.softmax = nn.Softmax() #dim=1
        self.CE = nn.CrossEntropyLoss()
        self.log_softmax = nn.LogSoftmax()
        self.mlsml = nn.MultiLabelSoftMarginLoss()
        self.bce_logit = nn.BCEWithLogitsLoss()
        self.sigm = nn.Sigmoid()
        if gpu:
            self.cuda()
Ejemplo n.º 2
0
    def test_get_output_dim(self):
        input_dim = 10
        hidden_dim = 15

        lstm = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(input_dim,
                          hidden_dim,
                          bidirectional=True,
                          batch_first=True))
        embedder = Seq2SeqEncoderTokenEmbedder(lstm)
        assert embedder.get_output_dim() == hidden_dim * 2

        lstm = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(input_dim,
                          hidden_dim,
                          bidirectional=False,
                          batch_first=True))
        embedder = Seq2SeqEncoderTokenEmbedder(lstm)
        assert embedder.get_output_dim() == hidden_dim

        lstm = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(input_dim,
                          hidden_dim,
                          bidirectional=True,
                          batch_first=True))
        embedder = Seq2SeqEncoderTokenEmbedder(lstm, 100)
        assert embedder.get_output_dim() == 100
Ejemplo n.º 3
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 share_encoder: Seq2VecEncoder = None,
                 private_encoder: Seq2VecEncoder = None,
                 dropout: float = None,
                 input_dropout: float = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: RegularizerApplicator = None) -> None:
        super(JointSentimentClassifier, self).__init__(vocab=vocab,
                                                       regularizer=regularizer)

        self._text_field_embedder = text_field_embedder
        if share_encoder is None and private_encoder is None:
            share_rnn = nn.LSTM(
                input_size=self._text_field_embedder.get_output_dim(),
                hidden_size=150,
                batch_first=True,
                dropout=dropout,
                bidirectional=True)
            share_encoder = PytorchSeq2SeqWrapper(share_rnn)
            private_rnn = nn.LSTM(
                input_size=self._text_field_embedder.get_output_dim(),
                hidden_size=150,
                batch_first=True,
                dropout=dropout,
                bidirectional=True)
            private_encoder = PytorchSeq2SeqWrapper(private_rnn)
            logger.info("Using LSTM as encoder")
            self._domain_embeddings = Embedding(
                len(TASKS_NAME), self._text_field_embedder.get_output_dim())
        self._share_encoder = share_encoder

        self._s_domain_discriminator = Discriminator(
            share_encoder.get_output_dim(), len(TASKS_NAME))

        self._p_domain_discriminator = Discriminator(
            private_encoder.get_output_dim(), len(TASKS_NAME))

        # TODO individual valid discriminator
        self._valid_discriminator = Discriminator(
            self._domain_embeddings.get_output_dim(), 2)

        for task in TASKS_NAME:
            tagger = SentimentClassifier(
                vocab=vocab,
                text_field_embedder=self._text_field_embedder,
                share_encoder=self._share_encoder,
                private_encoder=copy.deepcopy(private_encoder),
                s_domain_discriminator=self._s_domain_discriminator,
                p_domain_discriminator=self._p_domain_discriminator,
                valid_discriminator=self._valid_discriminator,
                dropout=dropout,
                input_dropout=input_dropout,
                label_smoothing=0.1,
                initializer=initializer)
            self.add_module("_tagger_{}".format(task), tagger)

        logger.info("Multi-Task Learning Model has been instantiated.")
Ejemplo n.º 4
0
 def test_get_dimension_is_correct(self):
     lstm = LSTM(bidirectional=True, num_layers=3, input_size=2, hidden_size=7, batch_first=True)
     encoder = PytorchSeq2SeqWrapper(lstm)
     assert encoder.get_output_dim() == 14
     assert encoder.get_input_dim() == 2
     lstm = LSTM(bidirectional=False, num_layers=3, input_size=2, hidden_size=7, batch_first=True)
     encoder = PytorchSeq2SeqWrapper(lstm)
     assert encoder.get_output_dim() == 7
     assert encoder.get_input_dim() == 2
Ejemplo n.º 5
0
def get_masked_copynet_with_attention(vocab: Vocabulary,
                                      max_decoding_steps: int = 20,
                                      beam_size: int = 1) -> MaskedCopyNet:

    word_embeddings = Embedding(
        num_embeddings=vocab.get_vocab_size("tokens"),
        embedding_dim=EMB_DIM
    )
    word_embeddings = BasicTextFieldEmbedder({"tokens": word_embeddings})

    masker_embeddings = Embedding(
        num_embeddings=vocab.get_vocab_size("mask_tokens"),
        embedding_dim=MASK_EMB_DIM
    )
    masker_embeddings = BasicTextFieldEmbedder({"tokens": masker_embeddings})

    attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=HID_DIM * 2)
    mask_attention = AdditiveAttention(vector_dim=HID_DIM * 2, matrix_dim=MASK_EMB_DIM)
    lstm = PytorchSeq2SeqWrapper(nn.LSTM(EMB_DIM, HID_DIM, batch_first=True, bidirectional=True))

    return MaskedCopyNet(
        vocab=vocab,
        embedder=word_embeddings,
        encoder=lstm,
        max_decoding_steps=max_decoding_steps,
        attention=attention,
        mask_embedder=masker_embeddings,
        mask_attention=mask_attention,
        beam_size=beam_size
    )
Ejemplo n.º 6
0
    def test_rnn_sentence_extractor(self):
        # Hyperparameters
        batch_size = 3
        num_sents = 5
        input_hidden_size = 7
        hidden_size = 11

        # Setup a model
        gru = GRU(input_size=input_hidden_size,
                  hidden_size=hidden_size,
                  bidirectional=True,
                  batch_first=True)
        rnn = PytorchSeq2SeqWrapper(gru)
        feed_forward = FeedForward(input_dim=hidden_size * 2,
                                   num_layers=2,
                                   hidden_dims=[10, 1],
                                   activations=[Activation.by_name('tanh')(), Activation.by_name('linear')()])
        extractor = RNNSentenceExtractor(rnn, feed_forward)

        # Setup some dummy data
        sentence_encodings = torch.randn(batch_size, num_sents, input_hidden_size)
        mask = torch.ones(batch_size, num_sents)

        # Pass the data through and verify the size of the output
        extraction_scores = extractor(sentence_encodings, mask)
        assert extraction_scores.size() == (batch_size, num_sents)
Ejemplo n.º 7
0
    def test_forward_pulls_out_correct_tensor_for_unsorted_batches(self):
        lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm)
        tensor = torch.rand([5, 7, 3])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, 6:, :] = 0
        mask = torch.ones(5, 7)
        mask[0, 3:] = 0
        mask[1, 4:] = 0
        mask[2, 2:] = 0
        mask[3, 6:] = 0

        input_tensor = Variable(tensor)
        mask = Variable(mask)
        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        sorted_inputs, sorted_sequence_lengths, restoration_indices, _ = sort_batch_by_length(input_tensor,
                                                                                              sequence_lengths)
        packed_sequence = pack_padded_sequence(sorted_inputs,
                                               sorted_sequence_lengths.data.tolist(),
                                               batch_first=True)
        lstm_output, _ = lstm(packed_sequence)
        encoder_output = encoder(input_tensor, mask)
        lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True)
        assert_almost_equal(encoder_output.data.numpy(),
                            lstm_tensor.index_select(0, restoration_indices).data.numpy())
Ejemplo n.º 8
0
    def test_forward_pulls_out_correct_tensor_with_sequence_lengths(self):
        lstm = LSTM(bidirectional=True,
                    num_layers=3,
                    input_size=3,
                    hidden_size=7,
                    batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm)
        input_tensor = torch.rand([5, 7, 3])
        input_tensor[1, 6:, :] = 0
        input_tensor[2, 4:, :] = 0
        input_tensor[3, 2:, :] = 0
        input_tensor[4, 1:, :] = 0
        mask = torch.ones(5, 7).bool()
        mask[1, 6:] = False
        mask[2, 4:] = False
        mask[3, 2:] = False
        mask[4, 1:] = False

        sequence_lengths = get_lengths_from_binary_sequence_mask(mask)
        packed_sequence = pack_padded_sequence(input_tensor,
                                               sequence_lengths.data.tolist(),
                                               batch_first=True)
        lstm_output, _ = lstm(packed_sequence)
        encoder_output = encoder(input_tensor, mask)
        lstm_tensor, _ = pad_packed_sequence(lstm_output, batch_first=True)
        assert_almost_equal(encoder_output.data.numpy(),
                            lstm_tensor.data.numpy())
Ejemplo n.º 9
0
    def __init__(self,
                 input_size: int,
                 hidden_size: int,
                 num_layers: int = 1,
                 dropout: float = 0.0,
                 residual: bool = True,
                 rnn_type: str = "lstm") -> None:
        super(ResidualRNN, self).__init__()

        self._input_size = input_size
        self._hidden_size = hidden_size
        self._dropout = torch.nn.Dropout(p=dropout)
        self._residual = residual

        rnn_type = rnn_type.lower()
        if rnn_type == "lstm":
            rnn_cell = torch.nn.LSTM
        elif rnn_type == "gru":
            rnn_cell = torch.nn.GRU
        else:
            raise ConfigurationError(f"Unknown RNN cell type {rnn_type}")

        layers = []
        for layer_index in range(num_layers):
            # Use hidden size on later layers so that the first layer projects and all other layers are residual
            input_ = input_size if layer_index == 0 else hidden_size
            rnn = rnn_cell(input_, hidden_size, bidirectional=True, batch_first=True)
            layer = PytorchSeq2SeqWrapper(rnn)
            layers.append(layer)
            self.add_module("rnn_layer_{}".format(layer_index), layer)
        self._layers = layers
Ejemplo n.º 10
0
    def __init__(
        self,
        vocab: Vocabulary,
        embed: TextFieldEmbedder,
        encoder_size: int,
        decoder_size: int,
        num_layers: int,
        beam_size: int,
        max_decoding_steps: int,
        use_bleu: bool = True,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super().__init__(vocab)

        self.START, self.END = self.vocab.get_token_index(
            START_SYMBOL), self.vocab.get_token_index(END_SYMBOL)
        self.OOV = self.vocab.get_token_index(self.vocab._oov_token)  # pylint: disable=protected-access
        self.PAD = self.vocab.get_token_index(self.vocab._padding_token)  # pylint: disable=protected-access
        self.COPY = self.vocab.get_token_index("@@COPY@@")
        self.KEEP = self.vocab.get_token_index("@@KEEP@@")
        self.DROP = self.vocab.get_token_index("@@DROP@@")

        self.SYMBOL = (self.START, self.END, self.PAD, self.KEEP, self.DROP)
        self.vocab_size = vocab.get_vocab_size()
        self.EMB = embed

        self.emb_size = self.EMB.token_embedder_tokens.output_dim
        self.encoder_size, self.decoder_size = encoder_size, decoder_size
        self.FACT_ENCODER = FeedForward(3 * self.emb_size, 1, encoder_size,
                                        nn.Tanh())
        self.ATTN = AdditiveAttention(encoder_size + decoder_size,
                                      encoder_size)
        self.COPY_ATTN = AdditiveAttention(decoder_size, encoder_size)
        module = nn.LSTM(self.emb_size,
                         encoder_size // 2,
                         num_layers,
                         bidirectional=True,
                         batch_first=True)
        self.BUFFER = PytorchSeq2SeqWrapper(
            module)  # BiLSTM to encode draft text
        self.STREAM = nn.LSTMCell(2 * encoder_size,
                                  decoder_size)  # Store revised text

        self.BEAM = BeamSearch(self.END,
                               max_steps=max_decoding_steps,
                               beam_size=beam_size)

        self.U = nn.Sequential(nn.Linear(2 * encoder_size, decoder_size),
                               nn.Tanh())
        self.ADD = nn.Sequential(nn.Linear(self.emb_size, encoder_size),
                                 nn.Tanh())

        self.P = nn.Sequential(
            nn.Linear(encoder_size + decoder_size, decoder_size), nn.Tanh())
        self.W = nn.Linear(decoder_size, self.vocab_size)
        self.G = nn.Sequential(nn.Linear(decoder_size, 1), nn.Sigmoid())

        initializer(self)
        self._bleu = BLEU(
            exclude_indices=set(self.SYMBOL)) if use_bleu else None
Ejemplo n.º 11
0
def trainModel(train_dataset, validation_dataset, vocab):
    EMBEDDING_DIM = 6
    HIDDEN_DIM = 6
    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_DIM)
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    lstm = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, bidirectional=False, batch_first=True))
    model = LstmTagger(word_embeddings, lstm, vocab)
    if torch.cuda.is_available():
        cuda_device = 0
        model = model.cuda(cuda_device)
    else:
        cuda_device = -1
    # optimizer = optim.AdamW(model.parameters(), lr=1e-4, eps=1e-8)
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    iterator = BucketIterator(batch_size=2, sorting_keys=[("tokens", "num_tokens")])
    iterator.index_with(vocab)
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=train_dataset,
                      validation_dataset=validation_dataset,
                      patience=10,
                      num_epochs=100,
                      cuda_device=cuda_device)
    trainer.train()
    return model
Ejemplo n.º 12
0
def prepare1():
    """
    First part of preparing data for training
    :return: biLSTM model object, biLSTM vocabulary, data for training, data for validation, cuda biLSTM object,
             biLSTM reader object
    """
    reader = PosDatasetReader()
    train_dataset = reader.read(train_path)
    validation_dataset = reader.read(validation_path)

    vocab = Vocabulary.from_instances(train_dataset + validation_dataset)

    EMBEDDING_DIM = 200
    HIDDEN_DIM = 200

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM)
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    lstm = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))

    model = LstmTagger(word_embeddings, lstm, vocab)
    if torch.cuda.is_available():
        cuda_device = 0
        model = model.cuda(cuda_device)
    else:
        cuda_device = -1

    return model, vocab, train_dataset, validation_dataset, cuda_device, reader
Ejemplo n.º 13
0
def running_NER():
    reader = PosDatasetReader()
    train_dataset = reader.read('../data/700_multi_data/600_ner_train.txt')
    validation_dataset = reader.read('../data/700_multi_data/66_ner_test.txt')

    vocab = Vocabulary.from_files("../model_store/vocabulary")

    # '''vocab part'''
    # train_1 = reader.read('../data/train/train.json')
    # train_2 = reader.read('../data/train/dev.json')

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_DIM)
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
    model = LstmTagger(word_embeddings, lstm, vocab)
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    iterator = BucketIterator(batch_size=2,
                              sorting_keys=[("sentence", "num_tokens")])
    iterator.index_with(vocab)
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=train_dataset,
                      validation_dataset=validation_dataset,
                      patience=10,
                      num_epochs=1000)
    trainer.train()
Ejemplo n.º 14
0
 def test_forward_pulls_out_correct_tensor_without_sequence_lengths(self):
     lstm = LSTM(bidirectional=True, num_layers=3, input_size=2, hidden_size=7, batch_first=True)
     encoder = PytorchSeq2SeqWrapper(lstm)
     input_tensor = Variable(torch.FloatTensor([[[.7, .8], [.1, 1.5]]]))
     lstm_output = lstm(input_tensor)
     encoder_output = encoder(input_tensor, None)
     assert_almost_equal(encoder_output.data.numpy(), lstm_output[0].data.numpy())
Ejemplo n.º 15
0
    def test_forward_works_even_with_empty_sequences(self):
        lstm = LSTM(bidirectional=True,
                    num_layers=3,
                    input_size=3,
                    hidden_size=7,
                    batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm)

        tensor = torch.autograd.Variable(torch.rand([5, 7, 3]))
        tensor[1, 6:, :] = 0
        tensor[2, :, :] = 0
        tensor[3, 2:, :] = 0
        tensor[4, :, :] = 0
        mask = torch.autograd.Variable(torch.ones(5, 7))
        mask[1, 6:] = 0
        mask[2, :] = 0
        mask[3, 2:] = 0
        mask[4, :] = 0

        results = encoder.forward(tensor, mask)

        for i in (0, 1, 3):
            assert not (results[i] == 0.).data.all()
        for i in (2, 4):
            assert (results[i] == 0.).data.all()
Ejemplo n.º 16
0
def create_seq2seqmodel(vocab,
                        src_embedders,
                        tgt_embedders,
                        hidden_dim=100,
                        num_layers=1,
                        encoder=None,
                        max_decoding_steps=20,
                        beam_size=1,
                        use_bleu=True,
                        device=0):
    encoder = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(src_embedders.get_output_dim(),
                      hidden_dim,
                      batch_first=True))
    model = SimpleSeq2Seq(vocab,
                          src_embedders,
                          encoder,
                          max_decoding_steps,
                          target_namespace="target_tokens",
                          target_embedding_dim=tgt_embedders.get_output_dim(),
                          beam_size=beam_size,
                          use_bleu=use_bleu)
    # encoder = BartEncoder('facebook/bart-base', use_pretrained_embeddings=True)
    # encoder = PretrainedTransformerEmbedder(model_name='facebook/bart-base', sub_module="encoder")
    # model = Bart(model_name='facebook/bart-base', vocab=vocab, max_decoding_steps=max_decoding_steps,
    #              beam_size=beam_size, encoder=encoder)
    model.to(device)
    return model
Ejemplo n.º 17
0
def create_seq2seqmodel(vocab,
                        src_embedders,
                        tgt_embedders,
                        hidden_dim=100,
                        num_layers=1,
                        max_decoding_steps=20,
                        beam_size=1,
                        use_bleu=True,
                        device=0):
    encoder = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(src_embedders.get_output_dim(),
                      hidden_dim,
                      batch_first=True))
    decoder_net = LstmCellDecoderNet(
        decoding_dim=encoder.get_output_dim(),
        target_embedding_dim=tgt_embedders.get_output_dim())
    decoder = AutoRegressiveSeqDecoder(vocab,
                                       decoder_net,
                                       max_decoding_steps,
                                       tgt_embedders,
                                       beam_size=beam_size)
    model = ComposedSeq2Seq(vocab, src_embedders, encoder, decoder)
    # model = SimpleSeq2Seq(vocab, src_embedders, encoder, max_decoding_steps, target_namespace="target_tokens",
    #                        target_embedding_dim=tgt_embedders.get_output_dim(), beam_size=beam_size,
    #                        use_bleu=use_bleu)
    model.to(device)
    return model
Ejemplo n.º 18
0
    def test_wrapper_stateful(self):
        lstm = LSTM(bidirectional=True,
                    num_layers=2,
                    input_size=3,
                    hidden_size=7,
                    batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm, stateful=True)

        # To test the stateful functionality we need to call the encoder multiple times.
        # Different batch sizes further tests some of the logic.
        batch_sizes = [5, 10, 8]
        sequence_lengths = [4, 6, 7]
        states = []
        for batch_size, sequence_length in zip(batch_sizes, sequence_lengths):
            tensor = Variable(torch.rand([batch_size, sequence_length, 3]))
            mask = Variable(torch.ones(batch_size, sequence_length))
            mask.data[0, 3:] = 0
            encoder_output = encoder(tensor, mask)
            states.append(encoder._states)  # pylint: disable=protected-access

        # Check that the output is masked properly.
        assert_almost_equal(encoder_output[0, 3:, :].data.numpy(),
                            numpy.zeros((4, 14)))

        for k in range(2):
            assert_almost_equal(states[-1][k][:, -2:, :].data.numpy(),
                                states[-2][k][:, -2:, :].data.numpy())
Ejemplo n.º 19
0
def build_seq2seq_model(flags,
                        data_reader,
                        vocab: Vocabulary,
                        source_namespace: str = 'source_tokens',
                        target_namespace: str = 'target_tokens') -> Model:
    source_embedding = Embedding(
        vocab.get_vocab_size(namespace=source_namespace),
        embedding_dim=flags.source_embedding_dim)
    source_embedder = BasicTextFieldEmbedder({'tokens': source_embedding})
    lstm_encoder = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(flags.source_embedding_dim,
                      flags.encoder_hidden_dim,
                      batch_first=True,
                      bidirectional=flags.encoder_bidirectional))
    attention = DotProductAttention()
    model = SimpleSeq2Seq(vocab,
                          source_embedder,
                          lstm_encoder,
                          flags.max_decode_length,
                          target_embedding_dim=flags.decoder_hidden_dim,
                          target_namespace=target_namespace,
                          attention=attention,
                          beam_size=flags.beam_size,
                          use_bleu=True)
    return model
Ejemplo n.º 20
0
def generate_res_file():
    reader = PosDatasetReader()
    vocab = Vocabulary.from_files("../model_store/vocabulary")

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_DIM)
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))

    model2 = LstmTagger(word_embeddings, lstm, vocab)

    with open("../model_store/model.th", 'rb') as f:
        model2.load_state_dict(torch.load(f))
    predictor2 = SentenceTaggerPredictor(model2, dataset_reader=reader)

    train_read_file = open('../data/only_sentence/raw_test.json', 'r')
    train_write_file = open('../data/only_sentence/ner_test.json', 'w')
    for line in train_read_file:
        tag_logits2 = predictor2.predict(
            line.replace('.', '').replace(',', '').replace('\n',
                                                           ''))['tag_logits']
        tag_ids = np.argmax(tag_logits2, axis=-1)
        res = [model2.vocab.get_token_from_index(i, 'labels') for i in tag_ids]
        for i in range(len(res)):
            train_write_file.write(res[i] + ' ')
        # train_write_file.write(str(tag_logits2))
        train_write_file.write('\n')
        train_write_file.flush()
    train_read_file.close()
    train_write_file.close()
    print('finish')


# generate_res_file()
Ejemplo n.º 21
0
def get_encoder(input_dim, output_dim, encoder_type, args):
    if encoder_type == "pass":
        return PassThroughEncoder(input_dim)
    if encoder_type == "bilstm":
        return PytorchSeq2SeqWrapper(
            AllenNLPSequential(torch.nn.ModuleList(
                [get_encoder(input_dim, output_dim, "bilstm-unwrapped",
                             args)]),
                               input_dim,
                               output_dim,
                               bidirectional=True,
                               residual_connection=args.residual_connection,
                               dropout=args.dropout))
    if encoder_type == "bilstm-unwrapped":
        return torch.nn.LSTM(
            input_dim,
            output_dim,
            batch_first=True,
            bidirectional=True,
            dropout=args.dropout,
        )
    if encoder_type == "self_attention":
        return IntraSentenceAttentionEncoder(input_dim=input_dim,
                                             projection_dim=output_dim)
    if encoder_type == "stacked_self_attention":
        return StackedSelfAttentionEncoder(
            input_dim=input_dim,
            hidden_dim=output_dim,
            projection_dim=output_dim,
            feedforward_hidden_dim=output_dim,
            num_attention_heads=5,
            num_layers=3,
            dropout_prob=args.dropout,
        )
    raise RuntimeError(f"Unknown encoder type={encoder_type}")
Ejemplo n.º 22
0
    def __init__(self,
                 embedder: TextFieldEmbedder,
                 hidden_dim: int,
                 latent_dim: int,
                 vocab: Vocabulary,
                 device: torch.device,
                 word_dropout_rate: float = 0.2,
                 anneal_steps: int = 500,
                 embedding_dropout_rate: float = 0.0):
        super().__init__(vocab)

        self.embedder = embedder
        self.embedding_dim = embedder.get_output_dim()
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim

        self.vocab = vocab
        self.label_size = self.vocab.get_vocab_size("class_labels")

        self.device = device
        self.word_dropout_rate = word_dropout_rate
        self.anneal_steps = anneal_steps
        self.embedding_dropout = nn.Dropout(embedding_dropout_rate)

        self.encoder_rnn = PytorchSeq2SeqWrapper(
            torch.nn.GRU(self.embedding_dim,
                         self.hidden_dim,
                         num_layers=1,
                         batch_first=True))

        self.decoder_rnn = PytorchSeq2SeqWrapper(
            torch.nn.GRU(self.embedding_dim,
                         self.hidden_dim,
                         num_layers=1,
                         batch_first=True))

        self.hidden2mean = nn.Linear(self.hidden_dim, self.latent_dim)
        self.hidden2log_var = nn.Linear(self.hidden_dim, self.latent_dim)

        self.latent2hidden = nn.Linear(self.latent_dim + self.label_size,
                                       self.hidden_dim)
        self.outputs2vocab = nn.Linear(self.hidden_dim,
                                       self.vocab.get_vocab_size())

        self.metrics = {}
        self.step = 0
Ejemplo n.º 23
0
 def get_wrapped_encoder(encoder_list):
     return PytorchSeq2SeqWrapper(
         AllenNLPSequential(torch.nn.ModuleList(encoder_list),
                            elmo_embedding_dim,
                            hidden_dim,
                            bidirectional=True,
                            residual_connection=residual_connection,
                            dropout=dropout))
    def test_wrapper_raises_if_batch_first_is_false(self):

        with pytest.raises(ConfigurationError):
            lstm = LSTM(bidirectional=True,
                        num_layers=3,
                        input_size=3,
                        hidden_size=7)
            _ = PytorchSeq2SeqWrapper(lstm)
Ejemplo n.º 25
0
def multitask_learning():
    # load datasetreader 
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory+"/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 10 
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False)
    conll_reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer})
    swag_reader = SWAGDatasetReader(tokenizer=token_indexer.wordpiece_tokenizer,lazy=True, token_indexers=token_indexer)
    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    conll_datasets, swag_datasets = load_datasets(conll_reader, swag_reader, directory)
    conll_vocab = Vocabulary()
    swag_vocab = Vocabulary()
    conll_iterator = BasicIterator(batch_size=batch_size)
    conll_iterator.index_with(conll_vocab)

    swag_vocab = Vocabulary()
    swag_iterator = BasicIterator(batch_size=batch_size)
    swag_iterator.index_with(swag_vocab)


    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder

    bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",top_layer_only=True, requires_grad=True)

    word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True)
    BERT_DIM = word_embedding.get_output_dim()

    seq2seq = PytorchSeq2SeqWrapper(torch.nn.LSTM(BERT_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    seq2vec = PytorchSeq2VecWrapper(torch.nn.LSTM(BERT_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    mention_feedforward = FeedForward(input_dim = 2336, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim = 7776, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    model1 = CoreferenceResolver(vocab=conll_vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2)

    model2 = SWAGExampleModel(vocab=swag_vocab, text_field_embedder=word_embedding, phrase_encoder=seq2vec)
    optimizer1 = optim.Adam(model1.parameters(), lr=lr)
    optimizer2 = optim.Adam(model2.parameters(), lr=lr)

    swag_train_iterator = swag_iterator(swag_datasets[0], num_epochs=1, shuffle=True)
    conll_train_iterator = conll_iterator(conll_datasets[0], num_epochs=1, shuffle=True)
    swag_val_iterator = swag_iterator(swag_datasets[1], num_epochs=1, shuffle=True)
    conll_val_iterator:q = conll_iterator(conll_datasets[1], num_epochs=1, shuffle=True)
    task_infos = {"swag": {"model": model2, "optimizer": optimizer2, "loss": 0.0, "iterator": swag_iterator, "train_data": swag_datasets[0], "val_data": swag_datasets[1], "num_train": len(swag_datasets[0]), "num_val": len(swag_datasets[1]), "lr": lr, "score": {"accuracy":0.0}}, \
                    "conll": {"model": model1, "iterator": conll_iterator, "loss": 0.0, "val_data": conll_datasets[1], "train_data": conll_datasets[0], "optimizer": optimizer1, "num_train": len(conll_datasets[0]), "num_val": len(conll_datasets[1]),"lr": lr, "score": {"coref_prediction": 0.0, "coref_recall": 0.0, "coref_f1": 0.0,"mention_recall": 0.0}}}
    USE_GPU = 1
    trainer = MultiTaskTrainer(
        task_infos=task_infos, 
        num_epochs=epochs,
        serialization_dir=directory + "saved_models/multitask/"
    ) 
    metrics = trainer.train()
Ejemplo n.º 26
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 num_layers: int = 2,
                 bias: bool = True,
                 dropout: float = 0.0,
                 bidirectional: bool = False,
                 maxout: bool = False) -> None:
        super().__init__()
        self._input_dim = input_dim
        self._hidden_dim = hidden_dim
        self._num_layers = num_layers
        self._maxout = maxout

        self._num_directions = 2 if bidirectional else 1

        self._lstm_layers = [
            PytorchSeq2SeqWrapper(
                torch.nn.LSTM(
                    input_dim,
                    hidden_dim,
                    num_layers=1,
                    bias=bias,
                    dropout=dropout,
                    bidirectional=bidirectional,
                    batch_first=True,
                ))
        ]
        if self._num_layers > 1:
            for _ in range(1, self._num_layers):
                self._lstm_layers.append(
                    PytorchSeq2SeqWrapper(
                        torch.nn.LSTM(
                            self._num_directions * hidden_dim,
                            hidden_dim,
                            num_layers=1,
                            bias=bias,
                            dropout=dropout,
                            bidirectional=bidirectional,
                            batch_first=True,
                        )))
        for i, lstm_layer in enumerate(self._lstm_layers):
            self.add_module('lstm_layer_%d' % i, lstm_layer)
Ejemplo n.º 27
0
def gru_seq2seq(input_dim: int, output_dim: int, num_layers: int = 1,
                bidirectional: bool = False, dropout: float = 0.0
                ) -> Seq2SeqEncoder:
    """
    Our encoder is going to be an LSTM. We have to wrap it for AllenNLP,
    though.
    """
    return PytorchSeq2SeqWrapper(torch.nn.GRU(
        input_dim, output_dim, batch_first=True, num_layers=num_layers,
        bidirectional=bidirectional, dropout=dropout))
Ejemplo n.º 28
0
def create_model(vocab):
    # prepare model
    EMBEDDING_DIM = 100
    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_DIM)
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    HIDDEN_DIM = 100
    lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
    model = LstmTagger(word_embeddings, lstm, vocab)
    return model
    def test_forward_does_not_compress_tensors_padded_to_greater_than_the_max_sequence_length(self):

        lstm = LSTM(bidirectional=True, num_layers=3, input_size=3, hidden_size=7, batch_first=True)
        encoder = PytorchSeq2SeqWrapper(lstm)
        input_tensor = torch.rand([5, 8, 3])
        input_tensor[:, 7, :] = 0
        mask = torch.ones(5, 8)
        mask[:, 7] = 0

        encoder_output = encoder(input_tensor, mask)
        assert encoder_output.size(1) == 8
Ejemplo n.º 30
0
def get_model(vocab: Vocabulary) -> CrfTagger:
    hidden_dimension = 256
    layers = 2
    bidirectional = True
    total_embedding_dim = 0

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size("tokens"),
                                embedding_dim=100,
                                trainable=True)

    total_embedding_dim += 100

    params = Params({
        "embedding": {
            "embedding_dim": 16,
            "vocab_namespace": "token_characters"
        },
        "encoder": {
            "type": "cnn",
            "embedding_dim": 16,
            "num_filters": 128,
            "ngram_filter_sizes": [3],
            "conv_layer_activation": "relu",
        },
    })
    char_embedding = TokenCharactersEncoder.from_params(vocab=vocab,
                                                        params=params)
    total_embedding_dim += 128

    active_embedders = {
        "tokens": token_embedding,
        "token_characters": char_embedding,
    }

    word_embeddings = BasicTextFieldEmbedder(active_embedders)

    network = LSTM(total_embedding_dim,
                   hidden_dimension,
                   num_layers=layers,
                   batch_first=True,
                   bidirectional=bidirectional)

    encoder = PytorchSeq2SeqWrapper(network, stateful=True)

    # Finally, we can instantiate the model.
    model = CrfTagger(
        vocab=vocab,
        text_field_embedder=word_embeddings,
        encoder=encoder,
        label_encoding="BIO",
        constrain_crf_decoding=True,
        calculate_span_f1=True,
    )
    return model