Python zen2han Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: zen2han

Examples at hotexamples.com: 3

Python zen2han - 3 examples found. These are the top rated real world Python examples of utils.zen2han extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: selection.py Project: yasusii/fooling

 def __init__(self, s):
   Predicate.__init__(self)
   s = zen2han(s)
   self.q = s
   if s.startswith('-') or s.startswith('!'):
     s = s[1:]
     self.neg = True
   self.r0 = self.r1 = self.r2 = []
   self.sentid = None
   self.setup_feats(s)
   #print (self.checkpat, self.matchpat, self.r0, self.r1, self.r2)
   return

Example #2

Show file

 def __init__(self, s):
     Predicate.__init__(self)
     s = zen2han(s)
     self.q = s
     if s.startswith('-') or s.startswith('!'):
         s = s[1:]
         self.neg = True
     self.r0 = self.r1 = self.r2 = []
     self.sentid = None
     self.setup_feats(s)
     #print (self.checkpat, self.matchpat, self.r0, self.r1, self.r2)
     return

Example #3

Show file

File: indexer.py Project: yasusii/fooling

 def index_doc(self, doc, maxsents=100000):
   if self.maker == None:
     self.create_new_idx()
   docid = len(self.docinfo)+1
   self.docinfo.append((docid, doc))
   if 2 <= self.verbose:
     print >>sys.stderr, 'Reading: %r' % doc
   elif 1 <= self.verbose:
     sys.stderr.write('.'); sys.stderr.flush()
   terms = self.terms
   # other features
   add_features(terms, docid, 0,
                ( PROP_LABEL+x for x in self.corpus.loc_labels(doc.loc) ))
   add_features(terms, docid, 0, doc.get_feats())
   # sents
   sentid = 0
   title = doc.get_title()
   if title and sentid < maxsents:
     title = zen2han(rmsp(title))
     self.maker.add(pack('>cii', PROP_SENT, docid, sentid), title.encode('utf-8'))
     add_features(terms, docid, sentid, set(doc.splitterms(title)))
     sentid += 1
   for sent in doc.get_sents():
     sent = zen2han(rmsp(sent))
     if not sent: continue
     self.maker.add(pack('>cii', PROP_SENT, docid, sentid), sent.encode('utf-8'))
     add_features(terms, docid, sentid, set(doc.splitterms(sent)))
     sentid += 1
     if maxsents <= sentid: break
   if ((self.max_docs_threshold and self.max_docs_threshold <= len(self.docinfo)) or 
       (self.max_terms_threshold and self.max_terms_threshold <= len(terms))):
     self.flush()
   for subdoc in doc.get_subdocs():
     if subdoc:
       self.index_doc(subdoc, maxsents=maxsents)
   return True