Example #1
0
    def predict_into_collection(
            self,
            collection: TargetTextCollection,
            key_mapping: Dict[str, str],
            batch_size: Optional[int] = None,
            append_if_exists: bool = True) -> TargetTextCollection:
        '''
        :param collection: The TargetTextCollection that is to be predicted on 
                           and to be the store of the predicted data.
        :param key_mapping: Dictionary mapping the prediction keys that contain 
                            the prediction values to the keys that will store 
                            those prediction values within the collection that 
                            has been predicted on.
        :param batch_size: Specify the batch size to predict on. If left None 
                           defaults to 64 unless it is specified in the 
                           `model_param_fp` within the constructor then 
                           the batch size from the param file is used.
        :param append_if_exists: If False and a TargetText within the collection 
                                 already has a prediction within the given key 
                                 based on the `key_mapping` then KeyError is 
                                 raised. 
        :returns: The collection that was predict on with the new predictions 
                  within the collection stored in keys that are the values of 
                  the `key_mapping` argument. Note that all predictions are 
                  sotred within Lists within their respective keys in the 
                  collection.
        :raises KeyError: If the keys from `key_mapping` is not within the 
                          prediction dictionary.
        :raises KeyError: If `append_if_exists` is False and the a TargetText 
                          within the collection already has a prediction within
                          the given key based on the `key_mapping` then this 
                          is raised. 
        '''
        for prediction, original_target in self._predict_iter(
                collection.dict_iterator(),
                batch_size=batch_size,
                yield_original_target=True):
            text_id = original_target['text_id']
            # This happens first as we want an error to be raised before any
            # data is added to the TargetTextCollection.
            for prediction_key, collection_key in key_mapping.items():
                if prediction_key not in prediction:
                    raise KeyError(
                        f'The key {prediction_key} from `key_mapping`'
                        f' {key_mapping} is not within the prediction'
                        f' {prediction} for the follwoing TargeText'
                        f' {original_target}')

            for prediction_key, collection_key in key_mapping.items():
                if collection_key not in collection[text_id]:
                    collection[text_id][collection_key] = []
                elif not append_if_exists:
                    raise KeyError(
                        f'The key {collection_key} from `key_mapping`'
                        f' {key_mapping} already exists within the'
                        f' follwoing TargeText {original_target}')
                collection[text_id][collection_key].append(
                    prediction[prediction_key])
        return collection
Example #2
0
def text_classification_prediction(model: AllenNLPModel,
                                   dataset: TargetTextCollection,
                                   prediction_key: str) -> None:
    for value in model._predict_iter(dataset.dict_iterator(),
                                     yield_original_target=True):
        prediction_object, target_object = value
        predicted_sentiment = prediction_object['label']
        true_sentiment = target_object['target_sentiments']
        number_sentiments = len(true_sentiment)
        predicted_sentiment = [predicted_sentiment] * number_sentiments
        text_id = target_object['text_id']
        if prediction_key not in dataset[text_id]:
            dataset[text_id][prediction_key] = []
        dataset[text_id][prediction_key].append(predicted_sentiment)
Example #3
0
def tokens_per_sentence(collection: TargetTextCollection, 
                        tokeniser: Callable[[str], List[str]]) -> Dict[int, int]:
    '''
    :param collection: The collection to generate the statistic for.
    :param tokeniser: The tokenizer to use to split the sentences/texts into 
                      tokens. If the collection has already been tokenised then 
                      it will use the tokens in the `tokenized_text` key within 
                      each sample in the collection else it will produce the 
                      tokens within this function and save them to that key as 
                      well. For a module of comptabile tokenisers 
                      :py:mod:`target_extraction.tokenizers`
    :returns: A dictionary of sentence lengths and their frequency.
              **This is a defaultdict where the value will be 0 if the key 
              does not exist.**
    '''
    if_tokenised = 'tokenized_text' in next(collection.dict_iterator())
    if not if_tokenised:
        collection.tokenize(tokeniser)
    length_count = defaultdict(lambda: 0)
    for target_text in collection.values():
        length_count[len(target_text['tokenized_text'])] += 1
    return length_count