コード例 #1
0
  parser.add_argument("--no_domain_ig", action="store_true", default=False, help="use only per-langugage IG in LD calculation")
  parser.add_argument("model", metavar='MODEL_DIR', help="read index and produce output in MODEL_DIR")
  args = parser.parse_args()

  lang_w_path = os.path.join(args.model, 'IGweights.lang.bin')
  domain_w_path = os.path.join(args.model, 'IGweights.domain')
  feature_path = args.output if args.output else os.path.join(args.model, 'LDfeats')

  # display paths
  if not SILENT:
    print "model path:", args.model
    print "lang weights path:", lang_w_path
    print "domain weights path:", domain_w_path
    print "feature output path:", feature_path

  lang_w = read_weights(lang_w_path)
  domain_w = read_weights(domain_w_path) if not args.no_domain_ig else None

  features_per_lang = select_LD_features(lang_w, domain_w, args.feats_per_lang, ignore_domain=args.no_domain_ig)
  if args.per_lang:
    with open(feature_path + '.perlang', 'w') as f:
      writer = csv.writer(f)
      for i in range(len(features_per_lang)):
        writer.writerow(map(repr,features_per_lang[i]))
      

  final_feature_set = reduce(set.union, map(set, features_per_lang.values()))
  if not SILENT:
    print 'selected %d features' % len(final_feature_set)

  write_features(sorted(final_feature_set), feature_path)
コード例 #2
0
  parser.add_argument("--per_lang", action="store_true", default=False, help="produce a list of features selecter per-language")
  parser.add_argument("--no_domain_ig", action="store_true", default=False, help="use only per-langugage IG in LD calculation")
  parser.add_argument("model", metavar='MODEL_DIR', help="read index and produce output in MODEL_DIR")
  args = parser.parse_args()

  lang_w_path = os.path.join(args.model, 'IGweights.lang.bin')
  domain_w_path = os.path.join(args.model, 'IGweights.domain')
  feature_path = args.output if args.output else os.path.join(args.model, 'LDfeats')

  # display paths
  print "model path:", args.model
  print "lang weights path:", lang_w_path
  print "domain weights path:", domain_w_path
  print "feature output path:", feature_path

  lang_w = read_weights(lang_w_path)
  domain_w = read_weights(domain_w_path)

  features_per_lang = select_LD_features(lang_w, domain_w, args.feats_per_lang, ignore_domain=args.no_domain_ig)
  if args.per_lang:
    with open(feature_path + '.perlang', 'w') as f:
      writer = csv.writer(f)
      for i in range(len(features_per_lang)):
        writer.writerow(map(repr,features_per_lang[i]))
      

  final_feature_set = reduce(set.union, map(set, features_per_lang.values()))
  print 'selected %d features' % len(final_feature_set)

  write_features(sorted(final_feature_set), feature_path)
  print 'wrote features to "%s"' % feature_path 
コード例 #3
0
ファイル: LDfeatureselect.py プロジェクト: pkusp/train-langid
                        metavar='MODEL_DIR',
                        help="read index and produce output in MODEL_DIR")
    args = parser.parse_args()

    lang_w_path = os.path.join(args.model, 'IGweights.lang.bin')
    domain_w_path = os.path.join(args.model, 'IGweights.domain')
    feature_path = args.output if args.output else os.path.join(
        args.model, 'LDfeats')

    # display paths
    print "model path:", args.model
    print "lang weights path:", lang_w_path
    print "domain weights path:", domain_w_path
    print "feature output path:", feature_path

    lang_w = read_weights(lang_w_path)
    domain_w = read_weights(domain_w_path) if not args.no_domain_ig else None

    features_per_lang = select_LD_features(lang_w,
                                           domain_w,
                                           args.feats_per_lang,
                                           ignore_domain=args.no_domain_ig)
    if args.per_lang:
        with open(feature_path + '.perlang', 'w') as f:
            writer = csv.writer(f)
            for i in range(len(features_per_lang)):
                writer.writerow(map(repr, features_per_lang[i]))

    final_feature_set = reduce(set.union, map(set, features_per_lang.values()))
    print 'selected %d features' % len(final_feature_set)
コード例 #4
0
"""
Select the most highly-weighted N features across any number of files.

Marco Lui, February 2013
"""

import argparse

from common import read_weights, write_features

if __name__ == "__main__":
  parser = argparse.ArgumentParser()
  parser.add_argument("-n","--number", type=int, default=200, metavar='N', 
    help="keep top N features per file")
  parser.add_argument("output", metavar='PATH', help="output to PATH")
  parser.add_argument("files", metavar="FILE", nargs='*', help="read weighted features from FILE")
  args = parser.parse_args()

  out_f = open(args.output, 'w') if args.output else sys.stdout
  feats = set()

  for path in args.files:
    w = read_weights(path)
    feats |= set(sorted(w, key=w.get, reverse=True)[:args.number])

  write_features(sorted(feats), args.output)
コード例 #5
0
                        metavar='MODEL_DIR',
                        help="read index and produce output in MODEL_DIR")
    args = parser.parse_args()

    lang_w_path = os.path.join(args.model, 'IGweights.lang.bin')
    domain_w_path = os.path.join(args.model, 'IGweights.domain')
    feature_path = args.output if args.output else os.path.join(
        args.model, 'LDfeats')

    # display paths
    print "model path:", args.model
    print "lang weights path:", lang_w_path
    print "domain weights path:", domain_w_path
    print "feature output path:", feature_path

    lang_w = read_weights(lang_w_path)
    domain_w = read_weights(domain_w_path)

    features_per_lang = select_LD_features(lang_w,
                                           domain_w,
                                           args.feats_per_lang,
                                           ignore_domain=args.no_domain_ig)
    if args.per_lang:
        with open(feature_path + '.perlang', 'w') as f:
            writer = csv.writer(f)
            for i in range(len(features_per_lang)):
                writer.writerow(map(repr, features_per_lang[i]))

    final_feature_set = reduce(set.union, map(set, features_per_lang.values()))
    print 'selected %d features' % len(final_feature_set)