Example #1
0
    def test_merge_dicts(self):
        dict1 = {"a": 3, "b": 4}
        dict2 = {"b": 5, "c": 6}

        merge_dicts_(dict1, dict2)

        expected = {"a": 3, "b": 9, "c": 6}

        self.assertEqual(expected, dict1)
def calc_stats(dest_dir, threshold):
    projects_to_ignore = []
    res_logged_stats = {}
    for logged_stats, proj_name in file_mapper(dest_dir, calc_logged_stats,
                                               ContextsDataset.LABEL_FILE_EXT):
        if float(logged_stats[WITH_LOGGING]) / (logged_stats[WITH_LOGGING] +
                                                logged_stats[NO_LOGGING]) <= (
                                                    threshold * 0.01):
            projects_to_ignore.append(proj_name)
        else:
            merge_dicts_(res_logged_stats, logged_stats)
    return projects_to_ignore, res_logged_stats
Example #3
0
    def add_vocab(self, partial_vocab: 'PartialVocab') -> List[str]:
        self.merged_word_counts, new_words = merge_dicts_(
            self.merged_word_counts, partial_vocab.merged_word_counts)
        cur_vocab_size = len(self.merged_word_counts)

        self.n_files += partial_vocab.n_files
        new_stats_entry = (
            self.n_files, cur_vocab_size,
            self.merged_word_counts[placeholders['non_eng']],
            self.merged_word_counts[placeholders['non_eng_content']])
        self.stats.extend(partial_vocab.stats + [new_stats_entry])
        return new_words
Example #4
0
    def add_vocab(self, partial_vocab) -> List[str]:
        if not isinstance(partial_vocab, PartialVocab):
            raise TypeError(
                f'Vocab must be a PartialVocab, but is {type(partial_vocab)}')

        self.merged_word_counts, new_words = merge_dicts_(
            self.merged_word_counts, partial_vocab.merged_word_counts)
        cur_vocab_size = len(self.merged_word_counts)

        self.n_files += partial_vocab.n_files
        new_stats_entry = (
            self.n_files, cur_vocab_size,
            self.merged_word_counts[placeholders['non_eng']],
            self.merged_word_counts[placeholders['non_eng_content']])
        self.stats.extend(partial_vocab.stats + [new_stats_entry])
        return new_words