Beispiel #1
0
 def __init__(self):
     '''データのロード'''
     root = nltk.data.find('corpora/wordnet')
     cd = os.path.dirname(__file__)
     if cd == "":
         cd = "."
     filename = cd + '/wnjpn-ok.tab'
     WordNetCorpusReader.__init__(self, root)
     with codecs.open(filename, encoding="utf-8") as f:
         self._jword2offset = {}
         counter = 0
         for line in f:
             try:
                 _cells = line.strip().split('\t')
                 _offset_pos = _cells[0]
                 _word = _cells[1]
                 if len(_cells) > 2: _tag = _cells[2]
                 _offset, _pos = _offset_pos.split('-')
                 self._jword2offset[_word] = {
                     'offset': int(_offset),
                     'pos': _pos
                 }
                 counter += 1
             except:
                 print("failed to lead line %d" % counter)
Beispiel #2
0
 def __init__(self, root, filename):
     WordNetCorpusReader.__init__(self, root)
     import codecs
     f=codecs.open(filename, encoding="utf-8")
     self._jword2offset = {}
     for line in f:
         _cells = line.strip().split('\t')
         _offset_pos = _cells[0]
         _word = _cells[1]
         if len(_cells)>2: _tag = _cells[2]
         _offset, _pos = _offset_pos.split('-')
         try:
           self._jword2offset[_word].append({'offset': int(_offset), 'pos': _pos})
         except:
           self._jword2offset[_word]=[{'offset': int(_offset), 'pos': _pos}]
Beispiel #3
0
 def __init__(self, root, filename):
     WordNetCorpusReader.__init__(self, root)
     import codecs
     f = codecs.open(filename, encoding="utf-8")
     self._jword2offset = {}
     for line in f:
         _cells = line.strip().split('\t')
         _offset_pos = _cells[0]
         _word = _cells[1]
         if len(_cells) > 2: _tag = _cells[2]
         _offset, _pos = _offset_pos.split('-')
         try:
             self._jword2offset[_word].append({
                 'offset': int(_offset),
                 'pos': _pos
             })
         except:
             self._jword2offset[_word] = [{
                 'offset': int(_offset),
                 'pos': _pos
             }]
Beispiel #4
0
 def __init__(self):
     "データのロード"
     root = nltk.data.find('corpora/wordnet')
     cd = os.path.dirname(__file__)
     if cd == "":
         cd = "."
     filename = cd+'/wnjpn-ok.tab'
     WordNetCorpusReader.__init__(self, root)
     import codecs
     with codecs.open(filename, encoding="utf-8") as f:
         self._jword2offset = {}
         counter = 0
         for line in f:
             try:
                 _cells = line.strip().split('\t')
                 _offset_pos = _cells[0]
                 _word = _cells[1]
                 if len(_cells)>2: _tag = _cells[2]
                 _offset, _pos = _offset_pos.split('-')
                 self._jword2offset[_word] = {'offset': int(_offset), 'pos': _pos}
                 counter += 1
             except:
                 print "failed to lead line %d" % counter