Python TargetCollection.data_dict Examples

Programming Language: Python

Namespace/Package Name: bella.data_types

Class/Type: TargetCollection

Method/Function: data_dict

Examples at hotexamples.com: 5

Python TargetCollection.data_dict - 5 examples found. These are the top rated real world Python examples of bella.data_types.TargetCollection.data_dict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TargetCollection(17)

data(7)

load_from_json(7)

add(5)

data_dict(5)

subset_by_sentiment(2)

add_pred_sentiment(1)

combine_collections(1)

get(1)

sentiment_data(1)

stored_sentiments(1)

target_set(1)

to_json_file(1)

Example #1

Show file

File: test_data_types.py Project: chsuong/Bella

        def split_tests(data_: TargetCollection, train_: TargetCollection, 
                        test_: TargetCollection, test_split: float):
            data_size = len(data_)
            train_size = len(train_)
            test_size = len(test_)
            assert train_size == (data_size - int(data_size * test_split))
            assert test_size == int(data_size * test_split)
            assert data_size == (train_size + test_size)

            train_ids = []
            test_ids = []
            for data in train_.data_dict():
                train_ids.append(re.findall('\d+', data['target_id'])[0])
            for data in test_.data_dict():
                test_ids.append(re.findall('\d+', data['target_id'])[0])
            assert len(train_ids) == len(set(train_ids))
            assert len(test_ids) == len(set(test_ids))
            for train_id in train_ids:
                assert train_id not in test_ids
            for test_id in test_ids:
                assert test_id not in train_ids
            return train_ids, test_ids

Example #2

Show file

File: allennlp_model.py Project: apmoore1/Bella-AllenNLP

    def _data_to_json(data: TargetCollection, file_path: Path) -> None:
        '''
        Converts the data into json format and saves it to the given file path. 
        The AllenNLP models read the data from json formatted files.

        :param data: data to be saved into json format.
        :param file_path: file location to save the data to.
        '''
        target_data = data.data_dict()
        with file_path.open('w+') as json_file:
            for index, data in enumerate(target_data):
                if 'epoch_number' in data:
                    data['epoch_number'] = list(data['epoch_number'])
                json_encoded_data = json.dumps(data)
                if index != 0:
                    json_encoded_data = f'\n{json_encoded_data}'
                json_file.write(json_encoded_data)

Example #3

Show file

File: generate_datasets.py Project: apmoore1/example_augmentation

def generate_stats(data_path: Path) -> Dict[str, Union[int, float]]:
    target_data = []
    with data_path.open('r') as data_lines:
        for line in data_lines:
            line = json.loads(line)
            line['spans'] = [tuple(span) for span in line['spans']]
            target_data.append(Target(**line))
    target_data = TargetCollection(target_data)
    target_stats = defaultdict(lambda: 0)
    data_size = len(target_data)
    target_stats['size'] = data_size
    for i in range(1, 3):
        target_stats[f'Distinct sentiment {i}'] = len(
            target_data.subset_by_sentiment(i))
    for data in target_data.data_dict():
        target_stats[data['sentiment']] += 1
    for key, value in target_stats.items():
        if key == 'size':
            continue
        target_stats[key] = value / data_size
    return target_stats

Example #4

Show file

File: augment_embedding.py Project: apmoore1/example_augmentation

def augmented_dataset(target_related_words_sim: Dict[str, List[Tuple[str,
                                                                     float]]],
                      dataset: TargetCollection, save_fp: Path,
                      lower: bool) -> None:
    '''
    Given a dictionary of target words from the training dataset and the 
    values being all of the related words with their similarity score associated 
    to the target key, TDSA training dataset it will for each sample in the 
    training set check if the sample's target exists as a key in the given 
    dictionary and if so write the sample to the save file along with the 
    related targets and similarity scores under the following keys; 
    `alternative_targets` and `alternative_similarity`
    '''
    training_targets_in_embeddings = set(list(target_related_words_sim.keys()))
    with save_fp.open('w+') as save_file:
        count = 0
        for target_dict in dataset.data_dict():
            original_target = target_dict['target']
            if lower:
                original_target = original_target.lower()
            if original_target in training_targets_in_embeddings:
                alt_targets_similarity = target_related_words_sim[
                    original_target]
                alt_targets_similarity = sorted(alt_targets_similarity,
                                                key=lambda x: x[1],
                                                reverse=True)
                different_targets = [
                    target for target, _ in alt_targets_similarity
                ]
                alternative_similarity = [
                    similarity for _, similarity in alt_targets_similarity
                ]
                target_dict['alternative_targets'] = different_targets
                target_dict['alternative_similarity'] = alternative_similarity
                target_dict['epoch_number'] = list(target_dict['epoch_number'])
                json_target_dict = json.dumps(target_dict)
                if count != 0:
                    json_target_dict = f'\n{json_target_dict}'
                count += 1
                save_file.write(json_target_dict)

Example #5

Show file

File: allennlp_model.py Project: apmoore1/Bella-AllenNLP

    def _predict_iter(self, data: TargetCollection
                      ) -> Generator[Dict[str, Any], None, None]:
        '''
        Iterates over the predictions and yields one prediction at a time.

        This is a useful wrapper as it performs the data pre-processing and 
        assertion checks.

        :param data: Data to predict on
        :yields: A dictionary containing `class_probabilities` and `label`.
        '''
        no_model_error = 'There is no model to make predictions, either fit '\
                         'or load a model.'
        assert self.model, no_model_error
        self.model.eval()

        all_model_params = Params.from_file(self._param_fp)

        reader_params = all_model_params.get("dataset_reader")
        dataset_reader = DatasetReader.from_params(reader_params)
        predictor = TargetPredictor(self.model, dataset_reader)

        batch_size = 64
        if 'iterator' in all_model_params:
            iter_params = all_model_params.get("iterator")
            if 'batch_size' in iter_params:
                batch_size = iter_params['batch_size']
        
        json_data = data.data_dict()
        # Reference
        # https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
        for i in range(0, len(json_data), batch_size):
            json_data_batch = json_data[i:i + batch_size]
            predictions = predictor.predict_batch_json(json_data_batch)
            for prediction in predictions:
                yield prediction