def test_evaluate(nlp: Pipeline): examples = load_examples(dataset='semeval', domain='restaurant', test=True) metric = tf.metrics.Accuracy() result = nlp.evaluate(examples[:40], metric, batch_size=10) result = result.numpy() # The model predicts the first 40 labels perfectly. assert result == 1 result = nlp.evaluate(examples[40:50], metric, batch_size=10) assert np.isclose(result, 0.98)
def test_predict(nlp: Pipeline): text_1 = ("We are great fans of Slack, but we wish the subscriptions " "were more accessible to small startups.") text_2 = "We are great fans of Slack" aspect = "Slack" examples = [Example(text_1, aspect), Example(text_2, aspect)] tokenized_examples = nlp.tokenize(examples) input_batch = nlp.encode(tokenized_examples) output_batch = nlp.predict(input_batch) assert output_batch.scores.shape == [2, 3] assert output_batch.hidden_states.shape == [2, 13, 25, 768] assert output_batch.attentions.shape == [2, 12, 12, 25, 25] assert output_batch.attention_grads.shape == [2, 12, 12, 25, 25] scores = output_batch.scores.numpy() assert np.argmax(scores, axis=-1).tolist() == [2, 2]
def _evaluate(nlp: Pipeline, domain: str, name: str) -> np.ndarray: partial_results = [] dataset = absa.load_examples('semeval', domain, test=True) batches = absa.utils.batches(dataset, batch_size=32) for batch in batches: predictions = nlp.transform(batch) predictions = list(predictions) # Keep in memory to append at the end. new_batch = [mask_tokens(nlp, prediction, k=2) for prediction in predictions] new_predictions = nlp.transform(new_batch) y_ref = [e.sentiment.value for e in predictions] y_new = [e.sentiment.value for e in new_predictions] partial_results.extend(zip(y_ref, y_new)) # It's not a generator because we cache function results. return np.array(partial_results)
def test_preprocess(nlp: Pipeline): # We split a document into spans (in this case separated by the new line). nlp.text_splitter = lambda text: text.split('\n') raw_document = ("This is the test sentence 1.\n" "This is the test sentence 2.\n" "This is the test sentence 3.") task = nlp.preprocess(text=raw_document, aspects=['aspect_1', 'aspect_2']) assert isinstance(task, Task) assert len(task.subtasks) == 2 assert list(task.subtasks) == ['aspect_1', 'aspect_2'] assert len(task.examples) == 6 assert task.indices == [(0, 3), (3, 6)] subtask_1, subtask_2 = task assert subtask_1.text == subtask_2.text == raw_document assert subtask_1.aspect == 'aspect_1' assert len(subtask_1.examples) == 3
def nlp() -> Pipeline: name = 'absa/classifier-rest-0.2' tokenizer = transformers.BertTokenizer.from_pretrained(name) # We pass a config explicitly (however, it can be downloaded automatically) config = BertABSCConfig.from_pretrained(name) model = BertABSClassifier.from_pretrained(name, config=config) professor = Professor() # Without both pattern and reference recognizers. nlp = Pipeline(model, tokenizer, professor, text_splitter=None) return nlp
def mask_examples(nlp: Pipeline, domain: str, is_test: bool): dataset = absa.load_examples('semeval', domain, is_test) for i, example in enumerate(dataset): yield i, -1, example # Predict without a mask. [tokenized_example] = nlp.tokenize([example]) n = len(tokenized_example.text_tokens) for index in range(n): new_example = mask_tokens(nlp, tokenized_example, indices=[index]) yield i, index, new_example
def _retrieve_labels(nlp: Pipeline, domain: str, is_test: bool) -> np.ndarray: partial_results = [] examples = mask_examples(nlp, domain, is_test) batches = absa.utils.batches(examples, batch_size=32) for batch in batches: indices, mask_index, batch_examples = zip(*batch) predictions = nlp.transform(batch_examples) y_hat = [e.sentiment.value for e in predictions] partial_results.extend(zip(indices, mask_index, y_hat)) return np.array(partial_results)
def test_postprocess(nlp: Pipeline): text = ("We are great fans of Slack.\n" "The Slack often has bugs.\n" "best of all is the warm vibe") # Define a naive text_splitter. nlp.text_splitter = lambda text: text.split('\n') task = nlp.preprocess(text, aspects=['slack', 'price']) predictions = nlp.transform(task.examples) completed_task = nlp.postprocess(task, predictions) assert len(completed_task.examples) == 6 assert completed_task.indices == [(0, 3), (3, 6)] slack, price = completed_task assert slack.text == price.text == text # The sentiment among fragments are different. We normalize scores. assert np.allclose(slack.scores, [0.03, 0.48, 0.48], atol=0.01) # Please note that there is a problem with the neutral sentiment. assert np.allclose(price.scores, [0.02, 0.49, 0.49], atol=0.01)
def test_review(nlp: Pipeline): text_1 = ("We are great fans of Slack, but we wish the subscriptions " "were more accessible to small startups.") text_2 = "The Slack often has bugs." aspect = "slack" examples = [Example(text_1, aspect), Example(text_2, aspect)] tokenized_examples = nlp.tokenize(examples) input_batch = nlp.encode(tokenized_examples) output_batch = nlp.predict(input_batch) predictions = nlp.review(tokenized_examples, output_batch) predictions = list(predictions) labeled_1, labeled_2 = predictions assert labeled_1.sentiment == Sentiment.positive assert labeled_2.sentiment == Sentiment.negative assert isinstance(labeled_1, PredictedExample) assert isinstance(labeled_1.scores, list) assert isinstance(labeled_1.review, Review) assert not labeled_1.review.is_reference assert not labeled_1.review.patterns
def _retrieve_labels( nlp: Pipeline, domain: str, part_parts: Tuple[int, int] ) -> np.ndarray: partial_results = [] examples = mask_examples(nlp, domain, part_parts) batches = absa.utils.batches(examples, batch_size=32) for batch in batches: indices, *masked_tokens_ij, batch_examples = zip(*batch) predictions = nlp.transform(batch_examples) y_hat = [e.sentiment.value for e in predictions] partial_results.extend(zip(indices, *masked_tokens_ij, y_hat)) return np.array(partial_results)
def test_encode(nlp: Pipeline): text_1 = ("We are great fans of Slack, but we wish the subscriptions " "were more accessible to small startups.") text_2 = "We are great fans of Slack" aspect = "Slack" examples = [Example(text_1, aspect), Example(text_2, aspect)] tokenized_examples = nlp.tokenize(examples) input_batch = nlp.encode(tokenized_examples) assert isinstance(input_batch.token_ids, tf.Tensor) # 101 the CLS token, 102 the SEP tokens. token_ids = input_batch.token_ids.numpy() values = [101, 2057, 2024, 2307, 4599, 1997, 19840, 102, 19840, 102] assert token_ids[1, :10].tolist() == values assert token_ids[0, :7].tolist() == values[:7] # The second sequence should be padded (shorter), # and attention mask should be set. assert np.allclose(token_ids[1, 10:], 0) attention_mask = input_batch.attention_mask.numpy() assert np.allclose(attention_mask[1, 10:], 0) # Check how the tokenizer marked the segments. token_type_ids = input_batch.token_type_ids.numpy() assert token_type_ids[0, -2:].tolist() == [1, 1] assert np.allclose(token_type_ids[0, :-2], 0)
def mask_examples(nlp: Pipeline, domain: str, part_parts: Tuple[int, int]): dataset = absa.load_examples('semeval', domain, test=True) # Split a dataset because it's better to cache more freq. part, parts = part_parts chunks = utils.split(dataset, n=parts) dataset_chunk = chunks[part] for i, example in enumerate(dataset_chunk): yield i, -1, -1, example # Predict without a mask. [tokenized_example] = nlp.tokenize([example]) n = len(tokenized_example.text_tokens) x, y = np.triu_indices(n, k=1) # Exclude the diagonal. for ij in zip(x, y): new_example = mask_tokens(nlp, tokenized_example, indices=ij) yield i, *ij, new_example
def mask_examples( nlp: Pipeline, domain: str, part_parts: Tuple[int, int] ): dataset = absa.load_examples('semeval', domain, test=True) # Filter out examples that contain a key token or a pair of key tokens, # and that are other than negative. y_ref, _, mask_1 = key_token_labels(nlp, domain, is_test=True) y_ref, _, mask_2 = key_token_pair_labels(nlp, domain, parts=10) mask = ~(mask_1 | mask_2) & y_ref == Sentiment.negative.value dataset = [e for e, is_correct in zip(dataset, mask) if is_correct] # Split a dataset because it's better to cache more freq. part, parts = part_parts chunks = utils.split(dataset, n=parts) dataset_chunk = chunks[part] for i, example in enumerate(dataset_chunk): yield i, -1, -1, -1, example # Predict without a mask. [tokenized_example] = nlp.tokenize([example]) n = len(tokenized_example.text_tokens) ij = np.zeros(shape=[n, n]) ij[np.triu_indices(n, k=1)] = 1 # The j shifted by 1 from i. ij = ij.reshape([n, n, 1]).astype(bool) jk = np.zeros(shape=[n, n]) jk[np.triu_indices(n, k=1)] = 1 # The k shifted by 1 from j. jk = jk.reshape([1, n, n]).astype(bool) matrix_ijk = np.where(ij & jk) for ijk in zip(*matrix_ijk): new_example = mask_tokens(nlp, tokenized_example, indices=ijk) yield i, *ijk, new_example
def test_sanity_classifier(): np.random.seed(1) tf.random.set_seed(1) # This sanity test verifies and presents how train a classifier. To # build our model, we have to define a config, which contains all required # information needed to build the `BertABSClassifier` model (including # the BERT language model). In this example, we use default parameters # (which are set up for our best performance), but of course, you can pass # your own parameters (maybe you would be interested to change the number # of polarities to classify, or properties of the BERT itself). base_model_name = 'bert-base-uncased' strategy = tf.distribute.OneDeviceStrategy('CPU') with strategy.scope(): config = BertABSCConfig.from_pretrained(base_model_name) model = BertABSClassifier.from_pretrained(base_model_name, config=config) tokenizer = transformers.BertTokenizer.from_pretrained(base_model_name) optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) # The first step to train the model is to define a dataset. The dataset # can be understood as a non-differential part of the training pipeline # The dataset knows how to transform human-understandable example into # model understandable batches. You are not obligated to use datasets, # you can create your own iterable, which transforms classifier example # to the classifier train batches. example = LabeledExample( text='The breakfast was delicious, really great.', aspect='breakfast', sentiment=Sentiment.positive) dataset = absa.training.ClassifierDataset( examples=[example, example], tokenizer=tokenizer, batch_size=2) # To easily adjust optimization process to our needs, we define custom # training loops called routines (in contrast to use built-in methods as # the `fit`). Each routine has its own optimization step wherein we can # control which and how parameters are updated (according to the custom # training paradigm presented in the TensorFlow 2.0). We iterate over a # dataset, perform train/test optimization steps, and collect results # using callbacks (which have a similar interface as the tf.keras.Callback). # Please take a look at the `train_classifier` function for more details. logger, loss_value = Logger(), LossHistory() absa.training.train_classifier( model, optimizer, dataset, epochs=10, callbacks=[logger, loss_value], strategy=strategy) # Our model should easily overfit in just 10 iterations. assert .1 < loss_value.train[1] < 1 assert loss_value.train[10] < 1e-4 # In the end, we would like to save the model. Our implementation # gentle extend the *transformers* lib capabilities, in consequences, # `BertABSClassifier` inherits from the `TFBertPreTrainedModel`, and # we can do a serialization easily. model.save_pretrained('.') # To make sure that the model serving works fine, we initialize the model # and the config once again. We perform the check on a single example. del model, config config = BertABSCConfig.from_pretrained('.') model = BertABSClassifier.from_pretrained('.', config=config) batch = next(iter(dataset)) model_outputs = model.call( batch.token_ids, attention_mask=batch.attention_mask, token_type_ids=batch.token_type_ids ) logits, *details = model_outputs loss_fn = tf.nn.softmax_cross_entropy_with_logits loss_value = loss_fn(batch.target_labels, logits, axis=-1, name='Loss') loss_value = loss_value.numpy().mean() assert loss_value < 1e-4 # The training procedure is roughly verified. Now, using our tuned model, # we can build the `BertPipeline`. The pipeline is the high level interface # to perform predictions. The model should be highly confident that this is # the positive example (verify the softmax scores). professor = Professor() nlp = Pipeline(model, tokenizer, professor) [breakfast] = nlp(example.text, aspects=['breakfast']) assert breakfast.sentiment == Sentiment.positive assert np.allclose(breakfast.scores, [0.0, 0.0, 0.99], atol=0.01) # That's all, clean up the configuration, and the temporary saved model. os.remove('config.json') os.remove('tf_model.h5')