Beispiel #1
0
    def preprocess_corpus(self, ents_corpus):
        ents_corpus_list = [
            ents_corpus[pmcid]['ents_proc1'] for pmcid in ents_corpus
        ]
        ents_corpus_list_flat = flatten_nested_list(ents_corpus_list)
        # Get unique entity strings for pre-processing
        unique_ents = list(set(ents_corpus_list_flat))
        ents_counter = Counter(ents_corpus_list_flat)
        # Spell check preprocessed entity strings
        spell_checker = self._create_spell_checker_dict(
            ents_counter, self.n_spell_check_thres)
        unique_ents_proc = [
            self._spell_check(ent_string, spell_checker) if
            ents_counter[ent_string] < self.n_spell_check_thres else ent_string
            for ent_string in unique_ents
        ]
        # Replace entities per article with spell corrected entity strings and create Counter
        orig_to_proc = {
            orig: proc
            for orig, proc in zip(unique_ents, unique_ents_proc)
        }
        # proc_to_orig = {proc: orig for orig, proc in zip(unique_ents, unique_ents_proc)}
        ents_corpus_list_proc = self._replace_ents(ents_corpus_list,
                                                   orig_to_proc)
        # Count up entity occurrences
        if self.ignore_article_counts:
            ents_counter = Counter([
                ent.lower() for article in ents_corpus_list_proc
                for ent in list(set(article))
            ])
        else:
            ents_counter = Counter(flatten_nested_list(ents_corpus_list_proc))
        # Threshold entities by their counts - specified by user: '******'
        proc_to_final = {
            ent: (ent if count > self.n_thres else None)
            for ent, count in ents_counter.items()
        }
        # Replace entities per article with final entity dictionary
        ents_corpus_final = self._update_article_dict(ents_corpus,
                                                      orig_to_proc,
                                                      proc_to_final)

        return ents_corpus_final
Beispiel #2
0
 def _list_luns(self):
     self._check_self()
     return set(flatten_nested_list([t.luns for t in self.tpgs]))
Beispiel #3
0
 def _list_network_portals(self):
     self._check_self()
     return set(flatten_nested_list([t.network_portals for t in self.tpgs]))
Beispiel #4
0
 def _list_node_acls(self):
     self._check_self()
     return set(flatten_nested_list([t.node_acls for t in self.tpgs]))
Beispiel #5
0
 def _list_tpgs(self):
     self._check_self()
     return set(flatten_nested_list([t.tpgs for t in self.targets]))
Beispiel #6
0
 def _list_storage_objects(self):
     self._check_self()
     return set(flatten_nested_list([backstore.storage_objects
                                     for backstore in self.backstores]))