Beispiel #1
0
 def __init__(self, name, idx=0):
     if isinstance(name, DRSVar):
         name = future_string(name)
     m = self._NumSuffix.match(name)
     if m is None:
         self._name = name
         self._idx = idx
     else:
         self._name = m.group(1)
         self._idx = idx if len(m.group(2)) == 0 or idx != 0 else int(m.group(2))
Beispiel #2
0
def build_from_ldc_ccgbank(fn_dict, outdir, verbose=False, verify=True):
    print('Building function templates from LDC ccgbank...')

    allfiles = []
    ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data', 'AUTO')
    dirlist1 = os.listdir(ldcpath)
    for dir1 in dirlist1:
        ldcpath1 = os.path.join(ldcpath, dir1)
        if os.path.isdir(ldcpath1):
            dirlist2 = os.listdir(ldcpath1)
            for dir2 in dirlist2:
                ldcpath2 = os.path.join(ldcpath1, dir2)
                if os.path.isfile(ldcpath2):
                    allfiles.append(ldcpath2)

    failed_parse = []
    failed_rules = []
    rules = []
    progress = 0
    for fn in allfiles:
        progress = print_progress(progress, 10)
        with open(fn, 'r') as fd:
            lines = fd.readlines()
        for hdr,ccgbank in zip(lines[0::2], lines[1::2]):
            pt = None
            try:
                pt = parse_ccg_derivation(ccgbank)
                extract_predarg_categories_from_pt(pt, rules)
            except Exception as e:
                failed_parse.append(safe_utf8_encode('CCGBANK: ' + ccgbank.strip()))
                failed_parse.append(safe_utf8_encode('Error: %s' % e))
            # Now attempt to track undefined unary rules
            if pt is not None:
                try:
                    builder = Ccg2Drs()
                    builder.build_execution_sequence(pt)
                    # Calling this will track undefined
                    builder.get_predarg_ccgbank()
                except Exception as e:
                    pass

    progress = (progress / 10) * 1000
    for predarg in rules:
        progress = print_progress(progress, 1000)
        try:
            catkey = predarg.clean(True)
            template = FunctorTemplate.create_from_category(predarg)
            if template is None:
                continue
            if catkey.signature not in fn_dict:
                fn_dict[catkey.signature] = template
            elif verify:
                f1 = fn_dict[catkey.signature]
                t1 = future_string(f1)
                t2 = future_string(template)
                assert t1 == t2, 'verify failed\n  t1=%s\n  t2=%s\n  f1=%s\n  f2=%s' % (t1, t2, f1.predarg_category, predarg)
        except Exception as e:
            failed_rules.append(safe_utf8_encode('%s: %s' % (predarg, e)))
            # DEBUG ?
            if False:
                try:
                    FunctorTemplate.create_from_category(predarg)
                except Exception:
                    pass

    print_progress(progress, done=True)

    if len(failed_parse) != 0:
        print('Warning: ldc - %d parses failed' % (len(failed_parse)/2))
        with open(os.path.join(outdir, 'parse_ccg_derivation_failed.dat'), 'w') as fd:
            fd.write(b'\n'.join(failed_parse))
        if verbose:
            for x, m in failed_parse:
                print(m)

    if len(failed_rules) != 0:
        print('Warning: ldc - %d rules failed' % len(failed_rules))
        with open(os.path.join(outdir, 'functor_ldc_templates_failed.dat'), 'w') as fd:
            fd.write(b'\n'.join(failed_rules))
        if verbose:
            for m in failed_rules:
                print(m)

    return fn_dict
Beispiel #3
0
 def __repr__(self):
     """Return the model as a string."""
     return native_string(self._clean_category.signature + ':' +
                          future_string(self))
Beispiel #4
0
 def __init__(self, s=None):
     super(ConstString, self).__init__()
     self._s = future_string(s) if s is not None else ''
     self._h = hash(self._s)
Beispiel #5
0
 def __repr__(self):
     return future_string(self)