def __init__(self, name, idx=0): if isinstance(name, DRSVar): name = future_string(name) m = self._NumSuffix.match(name) if m is None: self._name = name self._idx = idx else: self._name = m.group(1) self._idx = idx if len(m.group(2)) == 0 or idx != 0 else int(m.group(2))
def build_from_ldc_ccgbank(fn_dict, outdir, verbose=False, verify=True): print('Building function templates from LDC ccgbank...') allfiles = [] ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'AUTO') dirlist1 = os.listdir(ldcpath) for dir1 in dirlist1: ldcpath1 = os.path.join(ldcpath, dir1) if os.path.isdir(ldcpath1): dirlist2 = os.listdir(ldcpath1) for dir2 in dirlist2: ldcpath2 = os.path.join(ldcpath1, dir2) if os.path.isfile(ldcpath2): allfiles.append(ldcpath2) failed_parse = [] failed_rules = [] rules = [] progress = 0 for fn in allfiles: progress = print_progress(progress, 10) with open(fn, 'r') as fd: lines = fd.readlines() for hdr,ccgbank in zip(lines[0::2], lines[1::2]): pt = None try: pt = parse_ccg_derivation(ccgbank) extract_predarg_categories_from_pt(pt, rules) except Exception as e: failed_parse.append(safe_utf8_encode('CCGBANK: ' + ccgbank.strip())) failed_parse.append(safe_utf8_encode('Error: %s' % e)) # Now attempt to track undefined unary rules if pt is not None: try: builder = Ccg2Drs() builder.build_execution_sequence(pt) # Calling this will track undefined builder.get_predarg_ccgbank() except Exception as e: pass progress = (progress / 10) * 1000 for predarg in rules: progress = print_progress(progress, 1000) try: catkey = predarg.clean(True) template = FunctorTemplate.create_from_category(predarg) if template is None: continue if catkey.signature not in fn_dict: fn_dict[catkey.signature] = template elif verify: f1 = fn_dict[catkey.signature] t1 = future_string(f1) t2 = future_string(template) assert t1 == t2, 'verify failed\n t1=%s\n t2=%s\n f1=%s\n f2=%s' % (t1, t2, f1.predarg_category, predarg) except Exception as e: failed_rules.append(safe_utf8_encode('%s: %s' % (predarg, e))) # DEBUG ? if False: try: FunctorTemplate.create_from_category(predarg) except Exception: pass print_progress(progress, done=True) if len(failed_parse) != 0: print('Warning: ldc - %d parses failed' % (len(failed_parse)/2)) with open(os.path.join(outdir, 'parse_ccg_derivation_failed.dat'), 'w') as fd: fd.write(b'\n'.join(failed_parse)) if verbose: for x, m in failed_parse: print(m) if len(failed_rules) != 0: print('Warning: ldc - %d rules failed' % len(failed_rules)) with open(os.path.join(outdir, 'functor_ldc_templates_failed.dat'), 'w') as fd: fd.write(b'\n'.join(failed_rules)) if verbose: for m in failed_rules: print(m) return fn_dict
def __repr__(self): """Return the model as a string.""" return native_string(self._clean_category.signature + ':' + future_string(self))
def __init__(self, s=None): super(ConstString, self).__init__() self._s = future_string(s) if s is not None else '' self._h = hash(self._s)
def __repr__(self): return future_string(self)