예제 #1
0
파일: split.py 프로젝트: Oneplus/cnccgbank
 def process_annotator_into_substs(self, fn):
     substs = {}
     
     slashes = defaultdict(set)
     with file(fn, 'r') as f:
         for lineno, line in enumerate(f):
             line = line.rstrip()
             
             fields = line.split()
             if len(fields) != 3:
                 raise FilterException, ("Missing field at line %d of annotator file %s." 
                                         % (lineno, self.anno_filename))
                                         
             category_string, replacement_mode_string, slash_index = fields
             debug("Slash %s of %s goes to %s=%d", slash_index, re.sub(r'[-.*@]', '', category_string), replacement_mode_string,self.mode_string_to_index(replacement_mode_string))
             slashes[re.sub(r'[-.*@]', '', category_string)].add(
                                     ( int(slash_index), self.mode_string_to_index(replacement_mode_string) ))
             
         for (category_string, replacements) in slashes.iteritems():
             moded_category = parse_category(category_string)
             moded_category.labelled()
             
             for (subcategory, slash_index) in moded_category.slashes():
                 result = find(lambda (index, mode): index == slash_index, replacements)
                 if result:
                     replacement_slash, replacement_mode = result
                     debug("Setting mode of slash %s of %s to %s", slash_index, moded_category, replacement_mode)
                     subcategory.mode = replacement_mode
                     
             substs[category_string] = moded_category
     
     return substs
예제 #2
0
파일: split.py 프로젝트: Oneplus/cnccgbank
 def accept_leaf(self, leaf):
     result = find(lambda (c, i): (c == leaf.cat), self.cats_to_split)
     if not (result and leaf.cat.is_complex()): return
     
     cat, slash_index = result
     
     appls = list(applications_per_slash(leaf, False))
     try:
         appl = appls[slash_index]
         if str(appl).endswith('comp'):
             if self.mode_on_slash(leaf.cat, slash_index) != COMP:
                 self.fix_cat_for(leaf, slash_index, "comp")
                 
         elif str(appl).endswith('appl'):
             if self.mode_on_slash(leaf.cat, slash_index) != APPLY:
                 self.fix_cat_for(leaf, slash_index, "apply")
                 
     except IndexError:
         pass