def test_preprocess_transform_returns_vectors_of_given_max_len(self): model = ElmoModel( max_len=50, fasttext_model=self.fasttext_model, elmo_embedder=self.elmo_embedder, rnn_units = 100, bidirectional=False, **self.model_args, ) X = ["Esto no es agresivo", "Esto sí es agresivo"] model.preprocess_fit(X) ret = model.preprocess_transform(X) self.assertEqual(len(ret), 2) self.assertEqual(ret[0].shape, (2, 50, self.elmo_dim)) self.assertEqual(ret[1].shape, (2, 50, self.ft_dim))
def test_it_is_created_with_both(self): model = ElmoModel( max_len=50, fasttext_model=self.fasttext_model, elmo_embedder=self.elmo_embedder, **self.model_args, ) self.assertEqual(model.layers[3].input_shape, (None, 50, self.ft_dim + self.elmo_dim))
def test_it_creates_with_embedding_size(self): model = ElmoModel( max_len=50, fasttext_model=self.fasttext_model, elmo_embedder=self.elmo_embedder, rnn_units = 100, bidirectional=False, **self.model_args, ) self.assertEqual( model.layers[4].output_shape, (None, 100) )
def test_with_real_embedder(self): model = ElmoModel( max_len=50, fasttext_model=None, elmo_embedder=Embedder("models/elmo/es/"), **self.model_args, ) X = ["Esto no es agresivo", "Esto sí es agresivo"] y = np.array([0, 1]).reshape(-1, 1) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X, y, epochs=2)
def create_model(params, embedder): params = params.copy() K.clear_session() dropout = params.pop('dropout') recursive_class = params.pop('recursive_class') dense_last_layer = params.pop('dense_last_layer') char_model = CharModel(vocab_size=params.pop('char__vocab_size'), max_charlen=params.pop('char__max_charlen'), embedding_dim=params.pop('char__embedding_dim'), tokenize_args={ "stem": params.pop('char__stem'), "alpha_only": params.pop('char__alpha_only'), }, filters=params.pop('char__filters'), kernel_size=params.pop('char__kernel_size'), pooling_size=params.pop('char__pooling_size'), dense_units=dense_last_layer, recursive_class=recursive_class, dropout=dropout) elmo_model = ElmoModel( max_len=50, embedder=embedder, lstm_units=params.pop('elmo__lstm_units'), tokenize_args={'deaccent': params.pop('elmo__deaccent')}, dense_units=dense_last_layer, recursive_class=recursive_class, dropout=dropout) bow_model = BowModel( num_words=params.pop('bow__num_words'), dense_units=[1024, dense_last_layer], dropout=dropout, ) merge_model = MergeModel([char_model, elmo_model, bow_model]) optimizer_args = {"lr": params.pop('lr'), "decay": params.pop('decay')} merge_model.compile(loss='binary_crossentropy', optimizer=Adam(**optimizer_args), metrics=['accuracy']) assert (len(params) == 0) return merge_model
def test_it_can_be_fitted(self): model = ElmoModel( max_len=50, fasttext_model=self.fasttext_model, elmo_embedder=self.elmo_embedder, rnn_units = 100, bidirectional=False, **self.model_args, ) X = ["Esto no es agresivo", "Esto sí es agresivo"] y = np.array([0, 1]).reshape(-1, 1) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X, y, epochs=2)