Example #1
0
 def __init__(self, id, label, edges=None, **metas):
     # double heritage
     whitelist.WhitelistFile.__init__(self)
     PyTextMiner.__init__(self, {}, id, label, edges, **metas)
     ### same database as tinasoft, but temp
     self.storage = self._get_storage()
     ### cache for corpus
     self.corpus = {}
Example #2
0
 def __init__(self,
         id,
         content=None,
         edges=None,
         **metas):
     if content is None:
         content = id
     PyTextMiner.__init__(self, content, id, id, edges, **metas)
Example #3
0
 def __init__(
         self,
         content,
         id,
         label,
         edges=None,
         **metas
     ):
     PyTextMiner.__init__(self, content, id, label, edges, **metas)
     if 'keyword' not in self.edges:
         self.edges['keyword']={}
Example #4
0
 def __init__(self, tokenlist, id=None, label=None, edges=None, postag=None, **metas):
     """
     initiate the object
     normalize must be local value for pickling reasons
     """
     # normlist is the normalized list of tokens
     normalized_tokens = [NGram.normalize(word) for word in tokenlist]
     # prepares postag
     if postag is not None:
         metas["postag"] = postag
     else:
         metas["postag"] = ["?"]
     # default emtpy edges
     if edges is None:
         edges = { 'label': {}, 'postag' : {} }
     PyTextMiner.__init__(self, normalized_tokens, id, label, edges, **metas)
     # updates majors forms before returning instance
     self.addForm(normalized_tokens, metas["postag"], 1)
Example #5
0
 def __init__(self, name, edges=None, **metas):
     # list of corpus id
     content = []
     if edges is not None and 'Corpus' in edges:
         content = edges['Corpus'].keys()
     PyTextMiner.__init__(self, content, name, name, edges=edges, **metas)
Example #6
0
 def __init__(self, id, label, edges=None, **metas):
     PyTextMiner.__init__(self, {}, id, label, edges, **metas)
     ### same database as tinasoft, but temp
     self.storage = self._get_storage()
     ### cache for corpus
     self.corpus = {}