def mkmodel(config): this_dir = os.path.dirname(os.path.abspath(__file__)) root_dir = os.path.dirname(os.path.dirname(this_dir)) name = config["name"] features = [isinstance(f, str) and [f] or f for f in config["features"]] features.sort(key=lambda fs: ftrs.FEATURE_JOIN_STRING.join(fs).encode("UTF-8")) with open(os.path.join(CORPORA_DIR, name + ".tok")) as fp: ctxs = context.read_contexts(fp) with open(os.path.join(root_dir, "models", name + ".mdl"), "w") as fp: version = dump_model(name, features, ctxs, fp) with open(os.path.join(root_dir, "src", name + ".cm"), "w") as fp: dump_code(name, version, features, fp)
def mkmodel(config): this_dir = os.path.dirname(os.path.abspath(__file__)) root_dir = os.path.dirname(os.path.dirname(this_dir)) name = config["name"] features = [isinstance(f, str) and [f] or f for f in config["features"]] features.sort( key=lambda fs: ftrs.FEATURE_JOIN_STRING.join(fs).encode("UTF-8")) with open(os.path.join(CORPORA_DIR, name + ".tok")) as fp: ctxs = context.read_contexts(fp) with open(os.path.join(root_dir, "models", name + ".mdl"), "w") as fp: version = dump_model(name, features, ctxs, fp) with open(os.path.join(root_dir, "src", name + ".cm"), "w") as fp: dump_code(name, version, features, fp)
#!/usr/bin/env python3 """ Reads a corpus from the standard input (see context.py for the expected format), extracts features from it, and output a dataset file for use with bayes_fss. """ import sys, features, context ctxs = context.read_contexts(sys.stdin, allow_empty_features=False) fns = sorted(features.EXTRACTORS.items()) print("\t" + "\t".join(name for name, _ in fns)) for ctx, label in ctxs: fields = [fn(ctx) for _, fn in fns] print("%s\t%s" % (label, "\t".join(fields)))