def __init__(self, id, label, edges=None, **metas): # double heritage whitelist.WhitelistFile.__init__(self) PyTextMiner.__init__(self, {}, id, label, edges, **metas) ### same database as tinasoft, but temp self.storage = self._get_storage() ### cache for corpus self.corpus = {}
def __init__(self, id, content=None, edges=None, **metas): if content is None: content = id PyTextMiner.__init__(self, content, id, id, edges, **metas)
def __init__( self, content, id, label, edges=None, **metas ): PyTextMiner.__init__(self, content, id, label, edges, **metas) if 'keyword' not in self.edges: self.edges['keyword']={}
def __init__(self, tokenlist, id=None, label=None, edges=None, postag=None, **metas): """ initiate the object normalize must be local value for pickling reasons """ # normlist is the normalized list of tokens normalized_tokens = [NGram.normalize(word) for word in tokenlist] # prepares postag if postag is not None: metas["postag"] = postag else: metas["postag"] = ["?"] # default emtpy edges if edges is None: edges = { 'label': {}, 'postag' : {} } PyTextMiner.__init__(self, normalized_tokens, id, label, edges, **metas) # updates majors forms before returning instance self.addForm(normalized_tokens, metas["postag"], 1)
def __init__(self, name, edges=None, **metas): # list of corpus id content = [] if edges is not None and 'Corpus' in edges: content = edges['Corpus'].keys() PyTextMiner.__init__(self, content, name, name, edges=edges, **metas)
def __init__(self, id, label, edges=None, **metas): PyTextMiner.__init__(self, {}, id, label, edges, **metas) ### same database as tinasoft, but temp self.storage = self._get_storage() ### cache for corpus self.corpus = {}