parser.add_argument("--no_domain_ig", action="store_true", default=False, help="use only per-langugage IG in LD calculation") parser.add_argument("model", metavar='MODEL_DIR', help="read index and produce output in MODEL_DIR") args = parser.parse_args() lang_w_path = os.path.join(args.model, 'IGweights.lang.bin') domain_w_path = os.path.join(args.model, 'IGweights.domain') feature_path = args.output if args.output else os.path.join(args.model, 'LDfeats') # display paths if not SILENT: print "model path:", args.model print "lang weights path:", lang_w_path print "domain weights path:", domain_w_path print "feature output path:", feature_path lang_w = read_weights(lang_w_path) domain_w = read_weights(domain_w_path) if not args.no_domain_ig else None features_per_lang = select_LD_features(lang_w, domain_w, args.feats_per_lang, ignore_domain=args.no_domain_ig) if args.per_lang: with open(feature_path + '.perlang', 'w') as f: writer = csv.writer(f) for i in range(len(features_per_lang)): writer.writerow(map(repr,features_per_lang[i])) final_feature_set = reduce(set.union, map(set, features_per_lang.values())) if not SILENT: print 'selected %d features' % len(final_feature_set) write_features(sorted(final_feature_set), feature_path)
parser.add_argument("--per_lang", action="store_true", default=False, help="produce a list of features selecter per-language") parser.add_argument("--no_domain_ig", action="store_true", default=False, help="use only per-langugage IG in LD calculation") parser.add_argument("model", metavar='MODEL_DIR', help="read index and produce output in MODEL_DIR") args = parser.parse_args() lang_w_path = os.path.join(args.model, 'IGweights.lang.bin') domain_w_path = os.path.join(args.model, 'IGweights.domain') feature_path = args.output if args.output else os.path.join(args.model, 'LDfeats') # display paths print "model path:", args.model print "lang weights path:", lang_w_path print "domain weights path:", domain_w_path print "feature output path:", feature_path lang_w = read_weights(lang_w_path) domain_w = read_weights(domain_w_path) features_per_lang = select_LD_features(lang_w, domain_w, args.feats_per_lang, ignore_domain=args.no_domain_ig) if args.per_lang: with open(feature_path + '.perlang', 'w') as f: writer = csv.writer(f) for i in range(len(features_per_lang)): writer.writerow(map(repr,features_per_lang[i])) final_feature_set = reduce(set.union, map(set, features_per_lang.values())) print 'selected %d features' % len(final_feature_set) write_features(sorted(final_feature_set), feature_path) print 'wrote features to "%s"' % feature_path
metavar='MODEL_DIR', help="read index and produce output in MODEL_DIR") args = parser.parse_args() lang_w_path = os.path.join(args.model, 'IGweights.lang.bin') domain_w_path = os.path.join(args.model, 'IGweights.domain') feature_path = args.output if args.output else os.path.join( args.model, 'LDfeats') # display paths print "model path:", args.model print "lang weights path:", lang_w_path print "domain weights path:", domain_w_path print "feature output path:", feature_path lang_w = read_weights(lang_w_path) domain_w = read_weights(domain_w_path) if not args.no_domain_ig else None features_per_lang = select_LD_features(lang_w, domain_w, args.feats_per_lang, ignore_domain=args.no_domain_ig) if args.per_lang: with open(feature_path + '.perlang', 'w') as f: writer = csv.writer(f) for i in range(len(features_per_lang)): writer.writerow(map(repr, features_per_lang[i])) final_feature_set = reduce(set.union, map(set, features_per_lang.values())) print 'selected %d features' % len(final_feature_set)
""" Select the most highly-weighted N features across any number of files. Marco Lui, February 2013 """ import argparse from common import read_weights, write_features if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-n","--number", type=int, default=200, metavar='N', help="keep top N features per file") parser.add_argument("output", metavar='PATH', help="output to PATH") parser.add_argument("files", metavar="FILE", nargs='*', help="read weighted features from FILE") args = parser.parse_args() out_f = open(args.output, 'w') if args.output else sys.stdout feats = set() for path in args.files: w = read_weights(path) feats |= set(sorted(w, key=w.get, reverse=True)[:args.number]) write_features(sorted(feats), args.output)
metavar='MODEL_DIR', help="read index and produce output in MODEL_DIR") args = parser.parse_args() lang_w_path = os.path.join(args.model, 'IGweights.lang.bin') domain_w_path = os.path.join(args.model, 'IGweights.domain') feature_path = args.output if args.output else os.path.join( args.model, 'LDfeats') # display paths print "model path:", args.model print "lang weights path:", lang_w_path print "domain weights path:", domain_w_path print "feature output path:", feature_path lang_w = read_weights(lang_w_path) domain_w = read_weights(domain_w_path) features_per_lang = select_LD_features(lang_w, domain_w, args.feats_per_lang, ignore_domain=args.no_domain_ig) if args.per_lang: with open(feature_path + '.perlang', 'w') as f: writer = csv.writer(f) for i in range(len(features_per_lang)): writer.writerow(map(repr, features_per_lang[i])) final_feature_set = reduce(set.union, map(set, features_per_lang.values())) print 'selected %d features' % len(final_feature_set)