def process_annotator_into_substs(self, fn): substs = {} slashes = defaultdict(set) with file(fn, 'r') as f: for lineno, line in enumerate(f): line = line.rstrip() fields = line.split() if len(fields) != 3: raise FilterException, ("Missing field at line %d of annotator file %s." % (lineno, self.anno_filename)) category_string, replacement_mode_string, slash_index = fields debug("Slash %s of %s goes to %s=%d", slash_index, re.sub(r'[-.*@]', '', category_string), replacement_mode_string,self.mode_string_to_index(replacement_mode_string)) slashes[re.sub(r'[-.*@]', '', category_string)].add( ( int(slash_index), self.mode_string_to_index(replacement_mode_string) )) for (category_string, replacements) in slashes.iteritems(): moded_category = parse_category(category_string) moded_category.labelled() for (subcategory, slash_index) in moded_category.slashes(): result = find(lambda (index, mode): index == slash_index, replacements) if result: replacement_slash, replacement_mode = result debug("Setting mode of slash %s of %s to %s", slash_index, moded_category, replacement_mode) subcategory.mode = replacement_mode substs[category_string] = moded_category return substs
def accept_leaf(self, leaf): result = find(lambda (c, i): (c == leaf.cat), self.cats_to_split) if not (result and leaf.cat.is_complex()): return cat, slash_index = result appls = list(applications_per_slash(leaf, False)) try: appl = appls[slash_index] if str(appl).endswith('comp'): if self.mode_on_slash(leaf.cat, slash_index) != COMP: self.fix_cat_for(leaf, slash_index, "comp") elif str(appl).endswith('appl'): if self.mode_on_slash(leaf.cat, slash_index) != APPLY: self.fix_cat_for(leaf, slash_index, "apply") except IndexError: pass