コード例 #1
0
 def test_lstms_are_interleaved(self):
     lstm = StackedAlternatingLstm(3, 7, 8)
     for i, layer in enumerate(lstm.lstm_layers):
         if i % 2 == 0:
             assert layer.go_forward
         else:
             assert not layer.go_forward
コード例 #2
0
    def test_wrapper_works_with_alternating_lstm(self):
        model = PytorchSeq2VecWrapper(
            StackedAlternatingLstm(input_size=4, hidden_size=5, num_layers=3))

        input_tensor = torch.randn(2, 3, 4)
        mask = torch.ones(2, 3).bool()
        output = model(input_tensor, mask)
        assert tuple(output.size()) == (2, 5)
コード例 #3
0
 def test_stacked_alternating_lstm_completes_forward_pass(self):
     input_tensor = torch.rand(4, 5, 3)
     input_tensor[1, 4:, :] = 0.0
     input_tensor[2, 2:, :] = 0.0
     input_tensor[3, 1:, :] = 0.0
     input_tensor = pack_padded_sequence(input_tensor, [5, 4, 2, 1], batch_first=True)
     lstm = StackedAlternatingLstm(3, 7, 3)
     output, _ = lstm(input_tensor)
     output_sequence, _ = pad_packed_sequence(output, batch_first=True)
     numpy.testing.assert_array_equal(output_sequence.data[1, 4:, :].numpy(), 0.0)
     numpy.testing.assert_array_equal(output_sequence.data[2, 2:, :].numpy(), 0.0)
     numpy.testing.assert_array_equal(output_sequence.data[3, 1:, :].numpy(), 0.0)
コード例 #4
0
    def get_models_and_inputs(batch_size, input_size, output_size, num_layers,
                              timesteps, dropout_prob):

        # Import is here because the layer requires a GPU.
        from allennlp.modules.alternating_highway_lstm import AlternatingHighwayLSTM

        baseline = StackedAlternatingLstm(
            input_size,
            output_size,
            num_layers,
            dropout_prob,
            use_input_projection_bias=False).cuda()
        kernel_version = AlternatingHighwayLSTM(input_size, output_size,
                                                num_layers,
                                                dropout_prob).cuda()

        # Copy weights from non-cuda version into cuda version,
        # so we are starting from exactly the same place.
        weight_index = 0
        bias_index = 0
        for layer_index in range(num_layers):

            layer = getattr(baseline, u'layer_%d' % layer_index)
            input_weight = layer.input_linearity.weight
            state_weight = layer.state_linearity.weight
            bias = layer.state_linearity.bias

            kernel_version.weight.data[weight_index: weight_index + input_weight.nelement()]\
                .view_as(input_weight.t()).copy_(input_weight.data.t())
            weight_index += input_weight.nelement()

            kernel_version.weight.data[weight_index: weight_index + state_weight.nelement()]\
                .view_as(state_weight.t()).copy_(state_weight.data.t())
            weight_index += state_weight.nelement()

            kernel_version.bias.data[bias_index:bias_index +
                                     bias.nelement()].copy_(bias.data)
            bias_index += bias.nelement()

        baseline_input = torch.randn(batch_size,
                                     timesteps,
                                     input_size,
                                     requires_grad=True).cuda()
        # Clone variable so different models are
        # completely separate in the graph.
        kernel_version_input = baseline_input.clone()

        lengths = [timesteps - int((i / 2)) for i in range(batch_size)]
        lengths = lengths[:batch_size]

        return baseline, kernel_version, baseline_input, kernel_version_input, lengths
コード例 #5
0
 def __init__(
     self,
     input_size: int,
     hidden_size: int,
     num_layers: int,
     recurrent_dropout_probability: float = 0.0,
     use_highway: bool = True,
     use_input_projection_bias: bool = True,
 ) -> None:
     module = StackedAlternatingLstm(
         input_size=input_size,
         hidden_size=hidden_size,
         num_layers=num_layers,
         recurrent_dropout_probability=recurrent_dropout_probability,
         use_highway=use_highway,
         use_input_projection_bias=use_input_projection_bias,
     )
     super().__init__(module=module)
コード例 #6
0
ファイル: model.py プロジェクト: CaiYufan-sjtu/GCNOIE
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 binary_feature_dim: int,
                 embedding_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_smoothing: float = None,
                 ignore_span_metric: bool = False,
                 srl_eval_path: str = DEFAULT_SRL_EVAL_PATH) -> None:
        super(GCN_model, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        if srl_eval_path is not None:
            # For the span based evaluation, we don't want to consider labels
            # for verb, because the verb index is provided to the model.
            self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"])
        else:
            self.span_metric = None

        self.encoder = encoder
        self.gcn_layer = GCN(nfeat=self.encoder.get_output_dim(), nhid=200, nclass=64, dropout=0.1)
        self.decoder = PytorchSeq2SeqWrapper(
            StackedAlternatingLstm(input_size=64, hidden_size=32,
                                   num_layers=2, recurrent_dropout_probability=0.1, use_highway=True))
        self.tag_projection_layer = TimeDistributed(Linear(32, self.num_classes))
        # self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes))

        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric

        check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim,
                               encoder.get_input_dim(),
                               "text embedding dim + verb indicator embedding dim",
                               "encoder input dim")
        initializer(self)
コード例 #7
0
def main():
    parser = create_parser()
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    model_id = create_model_id(args)

    if not path.exists(args.out_dir):
        print("# Create directory: {}".format(args.out_dir))
        os.mkdir(args.out_dir)

    # log file
    out_dir = path.join(args.out_dir, "out-" + model_id)
    print("# Create output directory: {}".format(out_dir))
    os.mkdir(out_dir)
    log = StandardLogger(path.join(out_dir, "log-" + model_id + ".txt"))
    log.write(args=args)
    write_args_log(args, path.join(out_dir, "args.json"))

    # dataset reader
    token_indexers = {
        "tokens": SingleIdTokenIndexer(),
        "elmo": ELMoTokenCharactersIndexer(),
        "bert": PretrainedBertIndexer(BERT_MODEL, use_starting_offsets=True),
        "xlnet": PretrainedTransformerIndexer(XLNET_MODEL, do_lowercase=False)
    }

    reader = SrlDatasetReader(token_indexers)

    # dataset
    train_dataset = reader.read_with_ratio(args.train, args.data_ratio)
    validation_dataset = reader.read_with_ratio(args.dev, 100)
    pseudo_dataset = reader.read_with_ratio(
        args.pseudo, args.data_ratio) if args.pseudo else []
    all_dataset = train_dataset + validation_dataset + pseudo_dataset
    if args.test:
        test_dataset = reader.read_with_ratio(args.test, 100)
        all_dataset += test_dataset

    vocab = Vocabulary.from_instances(all_dataset)

    # embedding
    input_size = args.binary_dim * 2 if args.multi_predicate else args.binary_dim
    if args.glove:
        token_embedding = Embedding(
            num_embeddings=vocab.get_vocab_size('tokens'),
            embedding_dim=GLOVE_DIM,
            trainable=True,
            pretrained_file=GLOVE)
        input_size += GLOVE_DIM
    else:
        token_embedding = Embedding(
            num_embeddings=vocab.get_vocab_size('tokens'),
            embedding_dim=args.embed_dim,
            trainable=True)
        input_size += args.embed_dim
    token_embedders = {"tokens": token_embedding}

    if args.elmo:
        elmo_embedding = ElmoTokenEmbedder(options_file=ELMO_OPT,
                                           weight_file=ELMO_WEIGHT)
        token_embedders["elmo"] = elmo_embedding
        input_size += ELMO_DIM

    if args.bert:
        bert_embedding = PretrainedBertEmbedder(BERT_MODEL)
        token_embedders["bert"] = bert_embedding
        input_size += BERT_DIM

    if args.xlnet:
        xlnet_embedding = PretrainedTransformerEmbedder(XLNET_MODEL)
        token_embedders["xlnet"] = xlnet_embedding
        input_size += XLNET_DIM

    word_embeddings = BasicTextFieldEmbedder(token_embedders=token_embedders,
                                             allow_unmatched_keys=True,
                                             embedder_to_indexer_map={
                                                 "bert":
                                                 ["bert", "bert-offsets"],
                                                 "elmo": ["elmo"],
                                                 "tokens": ["tokens"],
                                                 "xlnet": ["xlnet"]
                                             })
    # encoder
    if args.highway:
        lstm = PytorchSeq2SeqWrapper(
            StackedAlternatingLstm(input_size=input_size,
                                   hidden_size=args.hidden_dim,
                                   num_layers=args.n_layers,
                                   recurrent_dropout_probability=args.dropout))
    else:
        pytorch_lstm = torch.nn.LSTM(input_size=input_size,
                                     hidden_size=args.hidden_dim,
                                     num_layers=int(args.n_layers / 2),
                                     batch_first=True,
                                     dropout=args.dropout,
                                     bidirectional=True)
        # initialize
        for name, param in pytorch_lstm.named_parameters():
            if 'weight_ih' in name:
                torch.nn.init.xavier_uniform_(param.data)
            elif 'weight_hh' in name:
                # Wii, Wif, Wic, Wio
                for n in range(4):
                    torch.nn.init.orthogonal_(
                        param.data[args.hidden_dim * n:args.hidden_dim *
                                   (n + 1)])
            elif 'bias' in name:
                param.data.fill_(0)

        lstm = PytorchSeq2SeqWrapper(pytorch_lstm)

    # model
    hidden_dim = args.hidden_dim if args.highway else args.hidden_dim * 2  # pytorch.nn.LSTMはconcatされるので2倍
    model = SemanticRoleLabelerWithAttention(
        vocab=vocab,
        text_field_embedder=word_embeddings,
        encoder=lstm,
        binary_feature_dim=args.binary_dim,
        embedding_dropout=args.embed_dropout,
        attention_dropout=0.0,
        use_attention=args.attention,
        use_multi_predicate=args.multi_predicate,
        hidden_dim=hidden_dim)

    if args.model:
        print("# Load model parameter: {}".format(args.model))
        with open(args.model, 'rb') as f:
            state_dict = torch.load(f, map_location='cpu')
            model.load_state_dict(state_dict)

    if torch.cuda.is_available():
        cuda_device = 0
        model = model.cuda(cuda_device)
    else:
        cuda_device = -1

    # optimizer
    if args.optimizer == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    elif args.optimizer == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate)
    elif args.optimizer == "Adadelta":
        optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95)
    else:
        raise ValueError("unsupported value: '{}'".format(args.optimizer))

    # iterator
    # iterator = BucketIterator(batch_size=args.batch, sorting_keys=[("tokens", "num_tokens")])
    iterator = BasicIterator(batch_size=args.batch)
    iterator.index_with(vocab)

    if not args.test_only:
        # Train
        print("# Train Method: {}".format(args.train_method))
        print("# Start Train", flush=True)
        if args.train_method == "concat":
            trainer = Trainer(model=model,
                              optimizer=optimizer,
                              iterator=iterator,
                              train_dataset=train_dataset + pseudo_dataset,
                              validation_dataset=validation_dataset,
                              validation_metric="+f1-measure-overall",
                              patience=args.early_stopping,
                              num_epochs=args.max_epoch,
                              num_serialized_models_to_keep=5,
                              grad_clipping=args.grad_clipping,
                              serialization_dir=out_dir,
                              cuda_device=cuda_device)
            trainer.train()
        elif args.train_method == "pre-train":
            pre_train_out_dir = path.join(out_dir + "pre-train")
            fine_tune_out_dir = path.join(out_dir + "fine-tune")
            os.mkdir(pre_train_out_dir)
            os.mkdir(fine_tune_out_dir)

            trainer = Trainer(model=model,
                              optimizer=optimizer,
                              iterator=iterator,
                              train_dataset=pseudo_dataset,
                              validation_dataset=validation_dataset,
                              validation_metric="+f1-measure-overall",
                              patience=args.early_stopping,
                              num_epochs=args.max_epoch,
                              num_serialized_models_to_keep=3,
                              grad_clipping=args.grad_clipping,
                              serialization_dir=pre_train_out_dir,
                              cuda_device=cuda_device)
            trainer.train()

            if args.optimizer == "Adam":
                optimizer = torch.optim.Adam(model.parameters(),
                                             lr=args.learning_rate)
            elif args.optimizer == "SGD":
                optimizer = torch.optim.SGD(model.parameters(),
                                            lr=args.learning_rate)
            elif args.optimizer == "Adadelta":
                optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95)
            else:
                raise ValueError("unsupported value: '{}'".format(
                    args.optimizer))
            trainer = Trainer(model=model,
                              optimizer=optimizer,
                              iterator=iterator,
                              train_dataset=train_dataset,
                              validation_dataset=validation_dataset,
                              validation_metric="+f1-measure-overall",
                              patience=args.early_stopping,
                              num_epochs=args.max_epoch,
                              num_serialized_models_to_keep=3,
                              grad_clipping=args.grad_clipping,
                              serialization_dir=fine_tune_out_dir,
                              cuda_device=cuda_device)
            trainer.train()
        else:
            raise ValueError("Unsupported Value '{}'".format(
                args.train_method))

    # Test
    if args.test:
        print("# Test")
        result = evaluate(model=model,
                          instances=test_dataset,
                          data_iterator=iterator,
                          cuda_device=cuda_device,
                          batch_weight_key="")
        with open(path.join(out_dir, "test.score"), 'w') as fo:
            json.dump(result, fo)

    log.write_endtime()
コード例 #8
0
            if label[0] == "I":
                start_transitions[i] = float("-inf")

        return start_transitions


reader = SrlReader()
train_dataset = reader.read(cached_path("data/train"))
validation_dataset = reader.read(cached_path("data/dev"))
vocab = Vocabulary.from_instances(train_dataset + validation_dataset)
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=100,
                            pretrained_file="https://s3-us-west-2.amazonaws.com/allennlp/"
                                            "datasets/glove/glove.6B.100d.txt.gz", trainable=True)
encoder = PytorchSeq2SeqWrapper(StackedAlternatingLstm(input_size=100,
                                                       hidden_size=300,
                                                       num_layers=4,
                                                       recurrent_dropout_probability=0.1,
                                                       use_highway=True))
source_embedder = BasicTextFieldEmbedder({"tokens": token_embedding})
model = SemanticRoleLabeler(vocab, source_embedder, encoder, binary_feature_dim=100)
optimizer = optim.Adadelta(model.parameters(), rho=0.95)
iterator = BucketIterator(batch_size=100, sorting_keys=[("tokens", "num_tokens")])
iterator.index_with(vocab)
trainer = Trainer(
                model=model,
                optimizer=optimizer,
                iterator=iterator,
                train_dataset=train_dataset,
                validation_dataset=validation_dataset,
                validation_metric="+f1-measure-overall",
                grad_clipping=1.0,