def test_fit_and_predict_with_new_labels_set_via_method(self): # Initialize model invalid_entities = {"PAD": 0, "BACKGROUND": 1, "test3": 2} cnn_model = CharacterLevelCnnModel(invalid_entities) cnn_model._construct_model() invalid_entities2 = {"PAD": 0, "BACKGROUND": 1} cnn_model.set_label_mapping(invalid_entities2) cnn_model._reconstruct_model() cnn_model.set_label_mapping(self.label_mapping) # data for model data_gen = [[ np.array([['test']]), # x_data np.zeros((1, 3400, max(self.label_mapping.values()) + 1)) ] # y_data ] cv_gen = data_gen cnn_model._construct_model() # set different labels cnn_model.set_label_mapping(self.label_mapping) history, f1, f1_report = cnn_model.fit(data_gen, cv_gen) # test predict on just the text cnn_model.predict(data_gen[0][0])
def test_param_validation(self): # Make sure all parameters can be altered. Make sure non-valid params # are caught parameters = { 'max_length': 10, 'max_char_encoding_id': 11, 'size_fc': [64, 64], 'dropout': 0.9, 'size_conv': 11, 'default_label': "BACKGROUND", 'num_fil': [48 for _ in range(2)] } invalid_parameters = { 'max_length': -1, 'max_char_encoding_id': "words", 'size_fc': 5, 'dropout': 0.9, 'size_conv': 11, 'optimizer': 6, 'num_fil': [48 for _ in range(2)], 'fake_extra_param': "fails" } cnn_model = CharacterLevelCnnModel(label_mapping=self.label_mapping, parameters=parameters) cnn_model._construct_model() self.assertDictEqual(parameters, cnn_model._parameters) with self.assertRaises(ValueError): CharacterLevelCnnModel(label_mapping=self.label_mapping, parameters=invalid_parameters)
def test_model_construct(self): # Default Model Construct cnn_model = CharacterLevelCnnModel(label_mapping=self.label_mapping) cnn_model._construct_model() # Test Details cnn_model.details() expected_layers = [ "input_1", "lambda", "embedding", "conv1d", "dropout", "batch_normalization", "conv1d_1", "dropout_1", "batch_normalization_1", "conv1d_2", "dropout_2", "batch_normalization_2", "conv1d_3", "dropout_3", "batch_normalization_3", "dense", "dropout_4", "dense_1", "dropout_5", "dense_2", "tf_op_layer_ArgMax", "thresh_arg_max_layer", ] model_layers = [layer.name for layer in cnn_model._model.layers] self.assertEqual(len(expected_layers), len(model_layers)) self.assertEqual(17, cnn_model.num_labels)
def test_param_validation(self): # Make sure all parameters can be altered. Make sure non-valid params # are caught parameters = { "max_length": 10, "max_char_encoding_id": 11, "size_fc": [64, 64], "dropout": 0.9, "size_conv": 11, "default_label": "UNKNOWN", "num_fil": [48 for _ in range(2)], } invalid_parameters = { "max_length": -1, "max_char_encoding_id": "words", "size_fc": 5, "dropout": 0.9, "size_conv": 11, "optimizer": 6, "num_fil": [48 for _ in range(2)], "fake_extra_param": "fails", } cnn_model = CharacterLevelCnnModel(label_mapping=self.label_mapping, parameters=parameters) cnn_model._construct_model() self.assertDictEqual(parameters, cnn_model._parameters) with self.assertRaises(ValueError): CharacterLevelCnnModel(label_mapping=self.label_mapping, parameters=invalid_parameters)
def test_validation_evaluate_and_classification_report(self, *mocks): cnn_model = CharacterLevelCnnModel(self.label_mapping) cnn_model._construct_model() # validation data val_gen = [[ np.array([['123 fake st']]), np.zeros((1, 3400, max(self.label_mapping.values()) + 1)) ]] val_gen[0][1][:, :11, self.label_mapping['ADDRESS']] = 1 f1, f1_report = cnn_model._validate_training(val_gen, 32, True, True) self.assertIsNotNone(f1) self.assertIsNotNone(f1_report) self.assertEqual(11, f1_report['ADDRESS']['support'])
def test_validation(self): # model cnn_model = CharacterLevelCnnModel(label_mapping=self.label_mapping) cnn_model._construct_model() # data for model cv_data_gen = [[ np.array([['test']]), # x_data np.zeros((1, 3400, max(self.label_mapping.values()) + 1)) ] # y_data ] # validation cnn_model._validate_training(cv_data_gen, batch_size_test=32, verbose_log=True, verbose_keras=False)
def test_model_construct(self): # Default Model Construct cnn_model = CharacterLevelCnnModel(label_mapping=self.label_mapping) cnn_model._construct_model() # Test Details cnn_model.details() expected_layers = [ 'input_1', 'lambda', 'embedding', 'conv1d', 'dropout', 'batch_normalization', 'conv1d_1', 'dropout_1', 'batch_normalization_1', 'conv1d_2', 'dropout_2', 'batch_normalization_2', 'conv1d_3', 'dropout_3', 'batch_normalization_3', 'dense', 'dropout_4', 'dense_1', 'dropout_5', 'dense_2', 'tf_op_layer_ArgMax', 'thresh_arg_max_layer' ] model_layers = [layer.name for layer in cnn_model._model.layers] self.assertEqual(len(expected_layers), len(model_layers)) self.assertEqual(17, cnn_model.num_labels)
def test_fit_and_predict_with_new_labels(self): # Initialize model cnn_model = CharacterLevelCnnModel(self.label_mapping) # data for model data_gen = [[ np.array([['test']]), # x_data np.zeros((1, 3400, max(self.label_mapping.values()) + 1)) ] # y_data ] cv_gen = data_gen cnn_model._construct_model() # fit with new labels history, f1, f1_report = cnn_model.fit( data_gen, cv_gen, label_mapping=self.label_mapping) # predict after fitting on just the text cnn_model.predict(data_gen[0][0])