def build_dataset(domain: str, unknown_token: str) -> Iterable[LabeledExample]: examples = absa.load_examples('semeval', domain, test=True) # Remove the information about an aspect # (change aspect to the special token) convert = lambda e: LabeledExample(e.text, unknown_token, e.sentiment) dataset = map(convert, examples) return dataset
def evaluate(domain: str): name = PRETRAINED_MODEL_NAMES[domain] dataset = absa.load_examples('semeval', domain, test=True) nlp = absa.load(name) metric = absa.training.ConfusionMatrix(num_classes=3) confusion_matrix = nlp.evaluate(dataset, metric, batch_size=32) confusion_matrix = confusion_matrix.numpy() return confusion_matrix
def test_evaluate(nlp: Pipeline): examples = load_examples(dataset='semeval', domain='restaurant', test=True) metric = tf.metrics.Accuracy() result = nlp.evaluate(examples[:40], metric, batch_size=10) result = result.numpy() # The model predicts the first 40 labels perfectly. assert result == 1 result = nlp.evaluate(examples[40:50], metric, batch_size=10) assert np.isclose(result, 0.98)
def mask_examples(nlp: Pipeline, domain: str, is_test: bool): dataset = absa.load_examples('semeval', domain, is_test) for i, example in enumerate(dataset): yield i, -1, example # Predict without a mask. [tokenized_example] = nlp.tokenize([example]) n = len(tokenized_example.text_tokens) for index in range(n): new_example = mask_tokens(nlp, tokenized_example, indices=[index]) yield i, index, new_example
def test_semeval_classification_restaurant(): examples = absa.load_examples(dataset='semeval', domain='restaurant', test=True) nlp = absa.load('absa/classifier-rest-0.2') metric = ConfusionMatrix(num_classes=3) confusion_matrix = nlp.evaluate(examples, metric, batch_size=32) confusion_matrix = confusion_matrix.numpy() accuracy = np.diagonal(confusion_matrix).sum() / confusion_matrix.sum() assert round(accuracy, 4) >= .8517
def build_train_dataset(domain: str, seed: int = 0) -> Iterable[LabeledExample]: examples = absa.load_examples('semeval', domain, test=False) convert_pos = lambda e: LabeledExample(e.text, e.aspect, 1) pos_examples = map(convert_pos, examples) random.seed(seed) nouns = ['lamp', 'window', 'table', 'cap', 'backpack', 'key', 'chair'] convert_neg = lambda e: LabeledExample(e.text, random.choice(nouns), 0) neg_examples = map(convert_neg, examples) dataset = itertools.chain(pos_examples, neg_examples) return dataset
def _evaluate(nlp: Pipeline, domain: str, name: str) -> np.ndarray: partial_results = [] dataset = absa.load_examples('semeval', domain, test=True) batches = absa.utils.batches(dataset, batch_size=32) for batch in batches: predictions = nlp.transform(batch) predictions = list(predictions) # Keep in memory to append at the end. new_batch = [mask_tokens(nlp, prediction, k=2) for prediction in predictions] new_predictions = nlp.transform(new_batch) y_ref = [e.sentiment.value for e in predictions] y_new = [e.sentiment.value for e in new_predictions] partial_results.extend(zip(y_ref, y_new)) # It's not a generator because we cache function results. return np.array(partial_results)
def mask_examples(nlp: Pipeline, domain: str, part_parts: Tuple[int, int]): dataset = absa.load_examples('semeval', domain, test=True) # Split a dataset because it's better to cache more freq. part, parts = part_parts chunks = utils.split(dataset, n=parts) dataset_chunk = chunks[part] for i, example in enumerate(dataset_chunk): yield i, -1, -1, example # Predict without a mask. [tokenized_example] = nlp.tokenize([example]) n = len(tokenized_example.text_tokens) x, y = np.triu_indices(n, k=1) # Exclude the diagonal. for ij in zip(x, y): new_example = mask_tokens(nlp, tokenized_example, indices=ij) yield i, *ij, new_example
def build_dataset(domain: str, seed: int) -> Iterable[LabeledExample]: examples = absa.load_examples('semeval', domain, test=True) # Process only positive/negative examples condition = lambda e: e.sentiment in [ Sentiment.positive, Sentiment.negative ] dataset = filter(condition, examples) random.seed(seed) nouns = ['car', 'plane', 'bottle', 'bag', 'desk', 'fridge', 'sink'] # Map unrelated aspects (verified) and expect the neutral sentiment. convert = lambda e: LabeledExample(e.text, random.choice(nouns), Sentiment. neutral) dataset = map(convert, dataset) return dataset
def build_dataset(domain: str, template: str, template_sent: Sentiment, seed: int) -> Iterable[LabeledExample]: examples = absa.load_examples('semeval', domain, test=True) mapping = { Sentiment.negative: [Sentiment.neutral, Sentiment.positive], Sentiment.positive: [Sentiment.neutral, Sentiment.negative] } condition = lambda e: e.sentiment in mapping[template_sent] dataset = filter(condition, examples) random.seed(seed) nouns = ['car', 'plane', 'bottle', 'bag', 'desk', 'fridge', 'sink'] convert = lambda e: LabeledExample(text=e.text + " " + template.format( noun=random.choice(nouns)), aspect=e.aspect, sentiment=e.sentiment) dataset = map(convert, dataset) return dataset
def mask_examples( nlp: Pipeline, domain: str, part_parts: Tuple[int, int] ): dataset = absa.load_examples('semeval', domain, test=True) # Filter out examples that contain a key token or a pair of key tokens, # and that are other than negative. y_ref, _, mask_1 = key_token_labels(nlp, domain, is_test=True) y_ref, _, mask_2 = key_token_pair_labels(nlp, domain, parts=10) mask = ~(mask_1 | mask_2) & y_ref == Sentiment.negative.value dataset = [e for e, is_correct in zip(dataset, mask) if is_correct] # Split a dataset because it's better to cache more freq. part, parts = part_parts chunks = utils.split(dataset, n=parts) dataset_chunk = chunks[part] for i, example in enumerate(dataset_chunk): yield i, -1, -1, -1, example # Predict without a mask. [tokenized_example] = nlp.tokenize([example]) n = len(tokenized_example.text_tokens) ij = np.zeros(shape=[n, n]) ij[np.triu_indices(n, k=1)] = 1 # The j shifted by 1 from i. ij = ij.reshape([n, n, 1]).astype(bool) jk = np.zeros(shape=[n, n]) jk[np.triu_indices(n, k=1)] = 1 # The k shifted by 1 from j. jk = jk.reshape([1, n, n]).astype(bool) matrix_ijk = np.where(ij & jk) for ijk in zip(*matrix_ijk): new_example = mask_tokens(nlp, tokenized_example, indices=ijk) yield i, *ijk, new_example
def test_load_semeval_classifier_examples(): examples = load_examples(dataset='semeval', domain='laptop', test=False) assert len(examples) == 2313 assert isinstance(examples[0], LabeledExample) test_examples = load_examples(dataset='semeval', domain='laptop', test=True) assert len(test_examples) == 638 assert isinstance(test_examples[0], LabeledExample) texts = {e.text for e in examples} test_texts = {e.text for e in test_examples} # Interesting, there is a one sentence which appears in both # train and test set. assert len(texts & test_texts) == 1 examples = load_examples(dataset='semeval', domain='restaurant', test=False) assert len(examples) == 3602 assert isinstance(examples[0], LabeledExample) test_examples = load_examples(dataset='semeval', domain='restaurant', test=True) assert len(test_examples) == 1120 assert isinstance(test_examples[0], LabeledExample) texts = {e.text for e in examples} test_texts = {e.text for e in test_examples} # Strange, there is also a one sentence in a restaurant dataset # which appears in both train and test set. assert len(texts & test_texts) == 1 with pytest.raises(NotFound): load_examples(dataset='mistake')
def experiment(ID: int, domain: str, base_model_name: str, epochs: int, batch_size: int = 32, learning_rate: float = 3e-5, beta_1: float = 0.9, beta_2: float = 0.999, seed: int = 1) -> float: np.random.seed(seed) tf.random.set_seed(seed) # Set up the experiment directory and paths. experiment_dir = os.path.join(ROOT_DIR, 'results', f'classifier-{domain}-{ID:03}') os.makedirs(experiment_dir, exist_ok=False) checkpoints_dir = os.path.join(experiment_dir, 'checkpoints') log_path = os.path.join(experiment_dir, 'experiment.log') callbacks_path = os.path.join(experiment_dir, 'callbacks.bin') # We should remove handlers from the previous experiment, because # the logger works on global variables. logging.getLogger('absa').handlers = [] # Load examples from the known labeled datasets like the SemEval. The # *test* set is to monitor the training (precisely it's the dev set) and # equals 10%. examples = absa.load_examples(domain=domain) train_examples, test_examples = train_test_split(examples, test_size=0.1, random_state=1) # To build our model, we can define a config, which contains all required # information needed to build the `BertABSClassifier` model (including # the BERT language model). In this example, we use default parameters # (which are set up for our best performance), but of course, you can pass # your own parameters (maybe you would be interested to change the number # of polarities to classify, or properties of the BERT itself). Moreover, we # benefit from the strategy scope to distribute the training. In this # case it's only single GPU but the multi GPU training via MirroredStrategy # can be used as well. strategy = tf.distribute.OneDeviceStrategy('GPU') with strategy.scope(): model = absa.BertABSClassifier.from_pretrained( base_model_name, output_attentions=True, output_hidden_states=True) tokenizer = transformers.BertTokenizer.from_pretrained(base_model_name) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2, epsilon=1e-8) # To train the model, we have to define a dataset. The dataset can be # understood as a non-differential part of the training pipeline. The # dataset knows how to transform human-understandable examples into model # understandable batches. You are not obligated to use datasets, you can # create your own iterable, which transforms classifier example to the # classifier train batches. dataset = ClassifierDataset(train_examples, batch_size, tokenizer) test_dataset = ClassifierDataset(test_examples, batch_size, tokenizer) # To easily adjust optimization process to our needs, we define custom # training loops called routines (in contrast to use built-in methods as # the `fit`). Now, we use the `train_classifier` routine. Each routine # has its own optimization step wherein we can control which and how # parameters are updated (according to the custom training paradigm # presented in the TensorFlow 2.0). We iterate over a dataset, perform # train/test optimization steps, and collect results using callbacks # (which have a similar interface as the tf.keras.Callback). Please take # a look at the `train_classifier` function for more details. logger = Logger(file_path=log_path) loss_history = LossHistory() acc_history = CategoricalAccuracyHistory() early_stopping = EarlyStopping(loss_history, patience=3, min_delta=0.001) checkpoints = ModelCheckpoint(model, loss_history, checkpoints_dir) callbacks = [ logger, loss_history, acc_history, checkpoints, early_stopping ] absa.training.train_classifier(model, optimizer, dataset, epochs, test_dataset, callbacks, strategy) # In the end, we would like to save the model. Our implementation # gentle extend the *transformers* lib capabilities, in consequences, # `BertABSClassifier` inherits from the `TFBertPreTrainedModel`, and # we can do a serialization easily. best_model = absa.BertABSClassifier.from_pretrained( checkpoints.best_model_dir) best_model.save_pretrained(experiment_dir) tokenizer.save_pretrained(experiment_dir) absa.utils.save([logger, loss_history, acc_history], callbacks_path) # Return the experiment metric value to do the hyper-parameters tuning. return acc_history.best_result
def evaluate(domain: str, nlp_modified: bool = False) -> np.ndarray: dataset = absa.load_examples('semeval', domain, test=True) metric = absa.training.ConfusionMatrix(num_classes=3) confusion_matrix = nlp.evaluate(dataset, metric, batch_size=32) confusion_matrix = confusion_matrix.numpy() return confusion_matrix