Esempio n. 1
0
 def __init__(self, output=None, corpus=None,
              attribute="categories",
              categories=None, mode="combined"):
     """
     Create a CategoryToCorpus module, which loads a corpus with tagged
     documents.
     If corpus is passed in, it adds to an existing corpus.
     mode is the corpus loading method to use.  If set to "combined", all
     documents in a category are concatenated to a single document.
     Otherwise each document is loaded separately.
     """
     self.output = output
     self.corpora = {}
     # combined mode has a single corpus
     if corpus == None:
         self.corpus = Corpus()
     else:
         self.corpus = corpus
     self.module_type = enumModuleType(enumModuleType.Document)
     self.module_processing_type = \
         enumModuleProcessingType(enumModuleProcessingType.PostProcess)
     self.attribute = attribute
     self.categories = categories
     self.mode = mode
     self.pp = pprint.PrettyPrinter(indent=4)
Esempio n. 2
0
 def __init__(self, output=None, corpus=None):
     self.output = output
     self.corpus = Corpus() if (corpus == None) else corpus
     self.module_type = enumModuleType(enumModuleType.Document)
     self.module_processing_type = \
         enumModuleProcessingType(enumModuleProcessingType.PostProcess)
Esempio n. 3
0
 def __init__(self, output=None):
     self.output = output
     self.cooccur_graph = Graph()
     self.module_type = enumModuleType(enumModuleType.Document)
     self.module_processing_type = \
         enumModuleProcessingType(enumModuleProcessingType.PostProcess)
Esempio n. 4
0
 def __init__(self):
     self.module_type = enumModuleType(enumModuleType.Document)
     self.module_processing_type = \
         enumModuleProcessingType(enumModuleProcessingType.PostProcess)
     self.index = {}