Esempio n. 1
0
    def test_stacked_bidirectional_lstm_dropout_version_is_different(self, dropout_name: str):
        stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11,
                                                num_layers=3)
        if dropout_name == 'layer_dropout_probability':
            dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11,
                                                            num_layers=3,
                                                            layer_dropout_probability=0.9)
        elif dropout_name == 'recurrent_dropout_probability':
            dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11,
                                                            num_layers=3,
                                                            recurrent_dropout_probability=0.9)
        else:
            raise ValueError('Do not recognise the following dropout name '
                             f'{dropout_name}')
        # Initialize all weights to be == 1.
        constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5}))
        initializer = InitializerApplicator([(".*", constant_init)])
        initializer(stacked_lstm)
        initializer(dropped_stacked_lstm)

        initial_state = torch.randn([3, 5, 11])
        initial_memory = torch.randn([3, 5, 11])

        tensor = torch.rand([5, 7, 10])
        sequence_lengths = torch.LongTensor([7, 7, 7, 7, 7])

        sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(tensor, sequence_lengths)
        lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True)

        stacked_output, stacked_state = stacked_lstm(lstm_input, (initial_state, initial_memory))
        dropped_output, dropped_state = dropped_stacked_lstm(lstm_input, (initial_state, initial_memory))
        dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True)
        stacked_output_sequence, _ = pad_packed_sequence(stacked_output, batch_first=True)
        if dropout_name == 'layer_dropout_probability':
            with pytest.raises(AssertionError):
                numpy.testing.assert_array_almost_equal(dropped_output_sequence.data.numpy(),
                                                        stacked_output_sequence.data.numpy(), decimal=4)
        if dropout_name == 'recurrent_dropout_probability':
            with pytest.raises(AssertionError):
                numpy.testing.assert_array_almost_equal(dropped_state[0].data.numpy(),
                                                        stacked_state[0].data.numpy(), decimal=4)
            with pytest.raises(AssertionError):
                numpy.testing.assert_array_almost_equal(dropped_state[1].data.numpy(),
                                                        stacked_state[1].data.numpy(), decimal=4)
Esempio n. 2
0
 def test_stacked_bidirectional_lstm_completes_forward_pass(self):
     input_tensor = torch.rand(4, 5, 3)
     input_tensor[1, 4:, :] = 0.
     input_tensor[2, 2:, :] = 0.
     input_tensor[3, 1:, :] = 0.
     input_tensor = pack_padded_sequence(input_tensor, [5, 4, 2, 1], batch_first=True)
     lstm = StackedBidirectionalLstm(3, 7, 3)
     output, _ = lstm(input_tensor)
     output_sequence, _ = pad_packed_sequence(output, batch_first=True)
     numpy.testing.assert_array_equal(output_sequence.data[1, 4:, :].numpy(), 0.0)
     numpy.testing.assert_array_equal(output_sequence.data[2, 2:, :].numpy(), 0.0)
     numpy.testing.assert_array_equal(output_sequence.data[3, 1:, :].numpy(), 0.0)
Esempio n. 3
0
 def __init__(
     self,
     input_size: int,
     hidden_size: int,
     num_layers: int,
     recurrent_dropout_probability: float = 0.0,
     layer_dropout_probability: float = 0.0,
     use_highway: bool = True,
 ) -> None:
     module = StackedBidirectionalLstm(
         input_size=input_size,
         hidden_size=hidden_size,
         num_layers=num_layers,
         recurrent_dropout_probability=recurrent_dropout_probability,
         layer_dropout_probability=layer_dropout_probability,
         use_highway=use_highway,
     )
     super().__init__(module=module)
Esempio n. 4
0
    def __init__(self,
                 input_dim: int,
                 combination: str = "x,y",
                 num_width_embeddings: int = None,
                 span_width_embedding_dim: int = None,
                 bucket_widths: bool = False,
                 use_exclusive_start_indices: bool = False) -> None:
        super().__init__()

        self._input_dim = input_dim
        self._combination = combination

        self._encoder = PytorchSeq2SeqWrapper(
            StackedBidirectionalLstm(self._input_dim,
                                     int(floor(self._input_dim / 2)), 1))
        self._span_extractor = BidirectionalEndpointSpanExtractor(
            self._input_dim, "y", "y", num_width_embeddings,
            span_width_embedding_dim, bucket_widths)
 def __init__(self,
              vocab,
              pretrained_model: str = "bert-base-uncased",
              requires_grad: bool = True):
     super(ChatClassification, self).__init__()
     self.vocab = vocab
     self.turn_pooler = BertPooler(pretrained_model,
                                   requires_grad,
                                   dropout=0.0)
     #self.turn_pooler =
     self.chat_encoder = StackedBidirectionalLstm(
         hidden_size=400,
         input_size=768,
         num_layers=1,
         recurrent_dropout_probability=0.3,
         use_highway=True)
     self.classif_layer = torch.nn.Linear(
         in_features=self.chat_encoder.hidden_size, out_features=2)
     self.accuracy = CategoricalAccuracy()
Esempio n. 6
0
    def __init__(self,
                 vocab: Vocabulary,
                 span_emb_dim: int,
                 tree_prop: int = 1,
                 tree_dropout: float = 0.0,
                 tree_children: str = 'attention',
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(Tree, self).__init__(vocab, regularizer)

        self._span_emb_dim = span_emb_dim
        assert span_emb_dim % 2 == 0

        self._f_network = FeedForward(input_dim=2 * span_emb_dim,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=torch.nn.Sigmoid(),
                                      dropout=0)

        self._tree_prop = tree_prop

        self._tree_children = tree_children
        if self._tree_children == 'attention':
            self._global_attention = TimeDistributed(
                torch.nn.Linear(span_emb_dim, 1))
        elif self._tree_children == 'pooling':
            pass
        elif self._tree_children == 'conv':
            self._conv = torch.nn.Conv1d(span_emb_dim,
                                         span_emb_dim,
                                         kernel_size=3,
                                         padding=1)
        elif self._tree_children == 'rnn':
            self._encoder = PytorchSeq2SeqWrapper(
                StackedBidirectionalLstm(span_emb_dim,
                                         int(floor(span_emb_dim / 2)), 1))
        else:
            raise RuntimeError('invalid tree_children option: {}'.format(
                self._tree_children))

        self._dropout = torch.nn.Dropout(p=tree_dropout)

        initializer(self)
Esempio n. 7
0
    def __init__(self, input_size, hidden_size, num_layers,
                 input_keep_prob,
                 recurrent_keep_prob,
                 layer_norm=False,
                 first_dropout=0,
                 bidirectional=True
                 ):
        super(AllenNLPLSTMLayer, self).__init__()
        from allennlp.modules.stacked_bidirectional_lstm import StackedBidirectionalLstm
        self.rnn = StackedBidirectionalLstm(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            recurrent_dropout_probability=1 - recurrent_keep_prob,
            layer_dropout_probability=1 - input_keep_prob,
            use_highway=False
        )

        self.layer_norm = LayerNorm(hidden_size * 2) if layer_norm else None
        self.first_dropout = Dropout(first_dropout)
        # self.reset_parameters()
        self.output_dim = hidden_size * (2 if bidirectional else 1)
Esempio n. 8
0
    def __init__(self, conf: Dict,
                 input_batchers: Dict[str, Union[WordBatch, CharacterBatch]],
                 n_class: int, use_cuda: bool):
        super(SeqLabelModel, self).__init__()
        self.n_class = n_class
        self.use_cuda = use_cuda
        self.input_dropout = torch.nn.Dropout2d(p=conf["dropout"])
        self.dropout = InputVariationalDropout(p=conf['dropout'])

        input_layers = {}
        for i, c in enumerate(conf['input']):
            if c['type'] == 'embeddings':
                if 'pretrained' in c:
                    embs = load_embedding_txt(c['pretrained'], c['has_header'])
                    logger.info('loaded {0} embedding entries.'.format(
                        len(embs[0])))
                else:
                    embs = None
                name = c['name']
                mapping = input_batchers[name].mapping
                layer = Embeddings(c['dim'],
                                   mapping,
                                   fix_emb=c['fixed'],
                                   embs=embs,
                                   normalize=c.get('normalize', False),
                                   input_field_name=name)
                logger.info('embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], layer.n_V, layer.n_d))
                input_layers[name] = layer

            elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder':
                name = c['name']
                mapping = input_batchers[name].mapping
                embeddings = Embeddings(
                    c['dim'],
                    mapping,
                    fix_emb=False,
                    embs=None,
                    normalize=False,
                    input_field_name='{0}_ch_emb'.format(name))
                logger.info('character embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], embeddings.n_V, embeddings.n_d))
                if c['type'] == 'lstm_encoder':
                    layer = LstmTokenEmbedder(c['dim'],
                                              embeddings,
                                              conf['dropout'],
                                              use_cuda,
                                              input_field_name=name)
                elif c['type'] == 'cnn_encoder':
                    layer = ConvTokenEmbedder(c['dim'],
                                              embeddings,
                                              c['filters'],
                                              c.get('n_highway', 1),
                                              c.get('activation', 'relu'),
                                              use_cuda,
                                              input_field_name=name)
                else:
                    raise ValueError('Unknown type: {}'.format(c['type']))
                input_layers[name] = layer

            elif c['type'] == 'elmo':
                name = c['name']
                layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda)
                input_layers[name] = layer

            else:
                raise ValueError('{} unknown input layer'.format(c['type']))

        self.input_layers = torch.nn.ModuleDict(input_layers)
        input_encoders = []
        input_dim = 0
        for i, c in enumerate(conf['input_encoder']):
            input_info = {
                name: self.input_layers[name].get_output_dim()
                for name in c['input']
            }

            if c['type'] == 'affine':
                input_encoder = AffineTransformInputEncoder(
                    input_info, c['dim'], use_cuda)
            elif c['type'] == 'sum':
                input_encoder = SummationInputEncoder(input_info, use_cuda)
            elif c['type'] == 'concat':
                input_encoder = ConcatenateInputEncoder(input_info, use_cuda)
            else:
                raise ValueError('{} unknown input encoder'.format(c['type']))

            input_dim += input_encoder.get_output_dim()
            input_encoders.append(input_encoder)

        self.input_encoders = torch.nn.ModuleList(input_encoders)

        encoder_name = conf['encoder']['type'].lower()
        if encoder_name == 'stacked_bidirectional_lstm':
            lstm = StackedBidirectionalLstm(
                input_size=input_dim,
                hidden_size=conf['encoder']['hidden_dim'],
                num_layers=conf['encoder']['n_layers'],
                recurrent_dropout_probability=conf['dropout'],
                layer_dropout_probability=conf['dropout'],
                use_highway=conf['encoder'].get('use_highway', True))
            self.encoder = PytorchSeq2SeqWrapper(lstm, stateful=False)
            encoded_input_dim = self.encoder.get_output_dim()
        elif encoder_name == 'project':
            self.encoder = ProjectedEncoder(input_dim,
                                            conf['encoder']['hidden_dim'],
                                            dropout=conf['dropout'])
            encoded_input_dim = self.encoder.get_output_dim()
        elif encoder_name == 'dummy':
            self.encoder = DummyEncoder()
            encoded_input_dim = input_dim
        else:
            raise ValueError('Unknown input encoder: {}'.format(encoder_name))

        if conf["classifier"]["type"].lower() == 'crf':
            self.classify_layer = CRFLayer(encoded_input_dim, n_class,
                                           use_cuda)
        else:
            self.classify_layer = ClassifyLayer(encoded_input_dim, n_class,
                                                use_cuda)

        self.encode_time = 0
        self.emb_time = 0
        self.classify_time = 0
Esempio n. 9
0
    def __init__(self, n_relations: int, conf: Dict,
                 input_batchers: Dict[str, InputBatch], use_cuda: bool):
        super(BiaffineParser, self).__init__()
        self.n_relations = n_relations
        self.conf = conf
        self.use_cuda = use_cuda
        self.use_mst_decoding_for_validation = conf[
            'use_mst_decoding_for_validation']

        input_layers = {}
        for i, c in enumerate(conf['input']):
            if c['type'] == 'embeddings':
                if 'pretrained' in c:
                    embs = load_embedding_txt(c['pretrained'], c['has_header'])
                    logger.info('loaded {0} embedding entries.'.format(
                        len(embs[0])))
                else:
                    embs = None
                name = c['name']
                mapping = input_batchers[name].mapping
                layer = Embeddings(name,
                                   c['dim'],
                                   mapping,
                                   fix_emb=c['fixed'],
                                   embs=embs,
                                   normalize=c.get('normalize', False))
                logger.info('embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], layer.n_V, layer.n_d))
                input_layers[name] = layer

            elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder':
                name = c['name']
                mapping = input_batchers[name].mapping
                embeddings = Embeddings('{0}_ch_emb',
                                        c['dim'],
                                        mapping,
                                        fix_emb=False,
                                        embs=None,
                                        normalize=False)
                logger.info('character embedding for field {0} '
                            'created with {1} x {2}.'.format(
                                c['field'], embeddings.n_V, embeddings.n_d))
                if c['type'] == 'lstm_encoder':
                    layer = LstmTokenEmbedder(name, c['dim'], embeddings,
                                              conf['dropout'], use_cuda)
                elif c['type'] == 'cnn_encoder':
                    layer = ConvTokenEmbedder(name, c['dim'], embeddings,
                                              c['filters'],
                                              c.get('n_highway', 1),
                                              c.get('activation',
                                                    'relu'), use_cuda)
                else:
                    raise ValueError('Unknown type: {}'.format(c['type']))
                input_layers[name] = layer

            elif c['type'] == 'elmo':
                name = c['name']
                layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda)
                input_layers[name] = layer

            else:
                raise ValueError('{} unknown input layer'.format(c['type']))

        self.input_layers = torch.nn.ModuleDict(input_layers)

        input_encoders = []
        input_dim = 0
        for i, c in enumerate(conf['input_encoder']):
            input_info = {
                name: [
                    entry['dim'] for entry in conf['input']
                    if entry['name'] == name
                ][0]
                for name in c['input']
            }

            if c['type'] == 'affine':
                input_encoder = AffineTransformInputEncoder(
                    input_info, c['dim'], use_cuda)
            elif c['type'] == 'sum':
                input_encoder = SummationInputEncoder(input_info, use_cuda)
            elif c['type'] == 'concat':
                input_encoder = ConcatenateInputEncoder(input_info, use_cuda)
            else:
                raise ValueError('{} unknown input encoder'.format(c['type']))

            input_dim += input_encoder.get_output_dim()
            input_encoders.append(input_encoder)

        self.input_encoders = torch.nn.ModuleList(input_encoders)

        c = conf['context_encoder']
        if c['type'] == 'stacked_bidirectional_lstm_dozat':
            self.encoder = PytorchSeq2SeqWrapper(
                InputDropoutedStackedBidirectionalLstm(
                    DozatLstmCell,
                    num_layers=c['num_layers'],
                    input_size=input_dim,
                    hidden_size=c['hidden_dim'],
                    recurrent_dropout_probability=c[
                        'recurrent_dropout_probability'],
                    layer_dropout_probability=c['layer_dropout_probability'],
                    activation=Activation.by_name("leaky_relu")()),
                stateful=False)
        elif c['type'] == 'stacked_bidirectional_lstm_ma':
            self.encoder = PytorchSeq2SeqWrapper(
                InputDropoutedStackedBidirectionalLstm(
                    MaLstmCell,
                    num_layers=c['num_layers'],
                    input_size=input_dim,
                    hidden_size=c['hidden_dim'],
                    recurrent_dropout_probability=c[
                        'recurrent_dropout_probability'],
                    layer_dropout_probability=c['layer_dropout_probability'],
                    activation=Activation.by_name("tanh")()),
                stateful=False)
        elif c['type'] == 'stacked_bidirectional_lstm':
            self.encoder = PytorchSeq2SeqWrapper(StackedBidirectionalLstm(
                num_layers=c['num_layers'],
                input_size=input_dim,
                hidden_size=c['hidden_dim'],
                recurrent_dropout_probability=c[
                    'recurrent_dropout_probability'],
                layer_dropout_probability=c['layer_dropout_probability']),
                                                 stateful=False)
        else:
            self.encoder = DummyContextEncoder()

        encoder_dim = self.encoder.get_output_dim()
        c = conf['biaffine_parser']
        self.arc_representation_dim = arc_representation_dim = c[
            'arc_representation_dim']
        self.tag_representation_dim = tag_representation_dim = c[
            'tag_representation_dim']

        self.head_sentinel_ = torch.nn.Parameter(
            torch.randn([1, 1, encoder_dim]))

        self.head_arc_feedforward = FeedForward(encoder_dim, 1,
                                                arc_representation_dim,
                                                Activation.by_name("elu")())
        self.child_arc_feedforward = FeedForward(encoder_dim, 1,
                                                 arc_representation_dim,
                                                 Activation.by_name("elu")())

        self.head_tag_feedforward = FeedForward(encoder_dim, 1,
                                                tag_representation_dim,
                                                Activation.by_name("elu")())
        self.child_tag_feedforward = FeedForward(encoder_dim, 1,
                                                 tag_representation_dim,
                                                 Activation.by_name("elu")())

        arc_attention_version = c.get('arc_attention_version', 'v1')
        if arc_attention_version == 'v2':
            self.arc_attention = BilinearMatrixAttentionV2(
                arc_representation_dim,
                arc_representation_dim,
                use_input_biases=True)
        else:
            self.arc_attention = BilinearMatrixAttention(
                arc_representation_dim,
                arc_representation_dim,
                use_input_biases=True)

        self.tag_bilinear = BilinearWithBias(tag_representation_dim,
                                             tag_representation_dim,
                                             n_relations)

        self.input_dropout_ = torch.nn.Dropout2d(p=conf['dropout'])
        self.dropout_ = InputVariationalDropout(p=conf['dropout'])

        self.input_encoding_timer = TimeRecoder()
        self.context_encoding_timer = TimeRecoder()
        self.classification_timer = TimeRecoder()