Ejemplo n.º 1
0
def mkmodel(config):
   this_dir = os.path.dirname(os.path.abspath(__file__))
   root_dir = os.path.dirname(os.path.dirname(this_dir))
   
   name = config["name"]
   features = [isinstance(f, str) and [f] or f for f in config["features"]]
   features.sort(key=lambda fs: ftrs.FEATURE_JOIN_STRING.join(fs).encode("UTF-8"))

   with open(os.path.join(CORPORA_DIR, name + ".tok")) as fp:
      ctxs = context.read_contexts(fp)
   with open(os.path.join(root_dir, "models", name + ".mdl"), "w") as fp:
      version = dump_model(name, features, ctxs, fp)
   with open(os.path.join(root_dir, "src", name + ".cm"), "w") as fp:
      dump_code(name, version, features, fp)
Ejemplo n.º 2
0
def mkmodel(config):
    this_dir = os.path.dirname(os.path.abspath(__file__))
    root_dir = os.path.dirname(os.path.dirname(this_dir))

    name = config["name"]
    features = [isinstance(f, str) and [f] or f for f in config["features"]]
    features.sort(
        key=lambda fs: ftrs.FEATURE_JOIN_STRING.join(fs).encode("UTF-8"))

    with open(os.path.join(CORPORA_DIR, name + ".tok")) as fp:
        ctxs = context.read_contexts(fp)
    with open(os.path.join(root_dir, "models", name + ".mdl"), "w") as fp:
        version = dump_model(name, features, ctxs, fp)
    with open(os.path.join(root_dir, "src", name + ".cm"), "w") as fp:
        dump_code(name, version, features, fp)
Ejemplo n.º 3
0
#!/usr/bin/env python3
"""
Reads a corpus from the standard input (see context.py for the expected format),
extracts features from it, and output a dataset file for use with bayes_fss.
"""

import sys, features, context

ctxs = context.read_contexts(sys.stdin, allow_empty_features=False)
fns = sorted(features.EXTRACTORS.items())

print("\t" + "\t".join(name for name, _ in fns))
for ctx, label in ctxs:
    fields = [fn(ctx) for _, fn in fns]
    print("%s\t%s" % (label, "\t".join(fields)))
Ejemplo n.º 4
0
#!/usr/bin/env python3

"""
Reads a corpus from the standard input (see context.py for the expected format),
extracts features from it, and output a dataset file for use with bayes_fss.
"""

import sys, features, context

ctxs = context.read_contexts(sys.stdin, allow_empty_features=False)
fns = sorted(features.EXTRACTORS.items())

print("\t" + "\t".join(name for name, _ in fns))
for ctx, label in ctxs:
   fields = [fn(ctx) for _, fn in fns]
   print("%s\t%s" % (label, "\t".join(fields)))