def __init__(self, root, fileids='.*'): """ Corpus reader designed to work with National Corpus of Polish. See http://nkjp.pl/ for more details about NKJP. use example: import nltk import nkjp from nkjp import NKJPCorpusReader x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='') # obtain the whole corpus x.header() x.raw() x.words() x.tagged_words(tags=['subst', 'comp']) #Link to find more tags: nkjp.pl/poliqarp/help/ense2.html x.sents() x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='Wilk*') # obtain particular file(s) x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy']) x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp']) """ if isinstance(fileids, string_types): XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml') else: XMLCorpusReader.__init__( self, root, [fileid + '/header.xml' for fileid in fileids] ) self._paths = self.get_paths()
def __init__(self, root, fileids='.*'): """ Corpus reader designed to work with National Corpus of Polish. See http://nkjp.pl/ for more details about NKJP. use example: import nltk import nkjp from nkjp import NKJPCorpusReader x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='') # obtain the whole corpus x.header() x.raw() x.words() x.tagged_words(tags=['subst', 'comp']) #Link to find more tags: nkjp.pl/poliqarp/help/ense2.html x.sents() x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='Wilk*') # obtain particular file(s) x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy']) x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp']) """ if isinstance(fileids, string_types): XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml') else: XMLCorpusReader.__init__( self, root, [fileid + '/header.xml' for fileid in fileids] ) self._paths = self.get_paths()
def __init__(self, *args, **kwargs): if 'textid_file' in kwargs: self._textids = kwargs['textid_file'] else: self._textids = None XMLCorpusReader.__init__(self, *args) CategorizedCorpusReader.__init__(self, kwargs) self._init_textids()
def __init__(self, *args, **kwargs): if 'textid_file' in kwargs: self._textids = kwargs['textid_file'] else: self._textids = None XMLCorpusReader.__init__(self, *args) CategorizedCorpusReader.__init__(self, kwargs) self._init_textids()
def __init__(self, root, fileids, wrap_etree=False): XMLCorpusReader.__init__(self, root, fileids, wrap_etree) self._lemma_to_class = defaultdict(list) """A dictionary mapping from verb lemma strings to lists of verbnet class identifiers.""" self._wordnet_to_class = defaultdict(list) """A dictionary mapping from wordnet identifier strings to lists of verbnet class identifiers.""" self._class_to_fileid = {} """A dictionary mapping from class identifiers to corresponding file identifiers. The keys of this dictionary provide a complete list of all classes and subclasses.""" self._shortid_to_longid = {} # Initialize the dictionaries. Use the quick (regexp-based) # method instead of the slow (xml-based) method, because it # runs 2-30 times faster. self._quick_index()
def __init__(self, root, fileids, wrap_etree=False): XMLCorpusReader.__init__(self, root, fileids, wrap_etree) self._lemma_to_class = defaultdict(list) """A dictionary mapping from verb lemma strings to lists of verbnet class identifiers.""" self._wordnet_to_class = defaultdict(list) """A dictionary mapping from wordnet identifier strings to lists of verbnet class identifiers.""" self._class_to_fileid = {} """A dictionary mapping from class identifiers to corresponding file identifiers. The keys of this dictionary provide a complete list of all classes and subclasses.""" self._shortid_to_longid = {} # Initialize the dictionaries. Use the quick (regexp-based) # method instead of the slow (xml-based) method, because it # runs 2-30 times faster. self._quick_index()
def __init__(self, root, fileids, wordnet, lazy=True): XMLCorpusReader.__init__(self, root, fileids) self._lazy = lazy self._wordnet = wordnet
def __init__(self, root, fileids='.*'): XMLCorpusReader.__init__(self, root, fileids)
def __init__(self, root, fileids, lazy=True): XMLCorpusReader.__init__(self, root, fileids) self._lazy = lazy
def __init__(self, root, fileids, lazy=True): XMLCorpusReader.__init__(self, root, fileids) self._lazy = lazy
def __init__(self, root, fileids, wordnet, lazy=True): XMLCorpusReader.__init__(self, root, fileids) self._lazy = lazy self._wordnet = wordnet
def __init__(self, root, fileids): XMLCorpusReader.__init__(self, root, fileids)
def __init__(self, root, fileid): self.path = root + fileid XMLCorpusReader.__init__(self, root, fileid)
def __init__(self, root, fileid): XMLCorpusReader.__init__(self, root, fileid) self._fileid = self._fileids[0] self.elt = self.xml() self.data = _xml_to_dict(self.elt)