Exemple #1
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('file', help="file to read")
    parser.add_argument('-c', '--column',
                        help="project a specific column", type=int)
    parser.add_argument('-n', '--number',
                        help="output top N features", type=int)
    parser.add_argument('-v', '--value',
                        help="output the value used for ranking", action="store_true")
    parser.add_argument('-p', '--printfeat',
                        help="print the actual feature (default is to print repr)", action="store_true")
    parser.add_argument('--output', "-o",
                        default=sys.stdout, type=argparse.FileType('w'), help="write to OUTPUT")
    args = parser.parse_args()

    w = read_weights(args.file)
    n = args.number if args.number is not None else len(w)

    def show(feat):
        if args.printfeat:
            return feat
        else:
            return repr(feat)

    if args.column is not None:
        for key in sorted(w, key=lambda x: w[x][args.column], reverse=True)[:n]:
            if args.value:
                args.output.write("{0},{1}\n".format(show(key), w[key][args.column]))
            else:
                args.output.write("{0}\n".format(show(key)))
    else:
    else:
        raise ValueError("no suitable feature list")

    print >>sys.stderr, "considering {0} features".format(len(feats))

    records = dict((k, {}) for k in feats)
    headers = []

    headers.append("len")
    for k in feats:
        records[k]["len"] = len(k)

    # Document Frequency
    if os.path.exists(model_file("DF_all")):
        print >>sys.stderr, "found weights for document frequency"
        w = read_weights(model_file("DF_all"))
        headers.append("DF")
        for k in feats:
            records[k]["DF"] = w[k][0]

    # IG weights for the all-languages event
    if os.path.exists(model_file("IGweights.lang")):
        print >>sys.stderr, "found weights for lang"
        w = read_weights(model_file("IGweights.lang"))
        headers.append("IGlang")
        for k in feats:
            records[k]["IGlang"] = w[k][0]

    # IG weights for the all-domains event
    if os.path.exists(model_file("IGweights.domain")):
        print >>sys.stderr, "found weights for domain"
Exemple #3
0
    else:
        raise ValueError("no suitable feature list")

    print("considering {0} features".format(len(feats)), file=sys.stderr)

    records = dict((k, {}) for k in feats)
    headers = []

    headers.append('len')
    for k in feats:
        records[k]['len'] = len(k)

    # Document Frequency
    if os.path.exists(model_file('DF_all')):
        print("found weights for document frequency", file=sys.stderr)
        w = read_weights(model_file('DF_all'))
        headers.append('DF')
        for k in feats:
            records[k]['DF'] = w[k][0]

    # IG weights for the all-languages event
    if os.path.exists(model_file('IGweights.lang')):
        print("found weights for lang", file=sys.stderr)
        w = read_weights(model_file('IGweights.lang'))
        headers.append('IGlang')
        for k in feats:
            records[k]['IGlang'] = w[k][0]

    # IG weights for the all-domains event
    if os.path.exists(model_file('IGweights.domain')):
        print("found weights for domain", file=sys.stderr)
    else:
        raise ValueError("no suitable feature list")

    print >> sys.stderr, "considering {0} features".format(len(feats))

    records = dict((k, {}) for k in feats)
    headers = []

    headers.append('len')
    for k in feats:
        records[k]['len'] = len(k)

    # Document Frequency
    if os.path.exists(model_file('DF_all')):
        print >> sys.stderr, "found weights for document frequency"
        w = read_weights(model_file('DF_all'))
        headers.append('DF')
        for k in feats:
            records[k]['DF'] = w[k][0]

    # IG weights for the all-languages event
    if os.path.exists(model_file('IGweights.lang')):
        print >> sys.stderr, "found weights for lang"
        w = read_weights(model_file('IGweights.lang'))
        headers.append('IGlang')
        for k in feats:
            records[k]['IGlang'] = w[k][0]

    # IG weights for the all-domains event
    if os.path.exists(model_file('IGweights.domain')):
        print >> sys.stderr, "found weights for domain"
import argparse, os, csv, sys

from langid.train.common import read_weights

if __name__ == "__main__":
  parser = argparse.ArgumentParser()
  parser.add_argument('file', help="file to read")
  parser.add_argument('-c','--column',help="project a specific column", type=int)
  parser.add_argument('-n','--number',help="output top N features", type=int)
  parser.add_argument('-v','--value',help="output the value used for ranking", action="store_true")
  parser.add_argument('-p','--printfeat',help="print the actual feature (default is to print repr)", action="store_true")
  parser.add_argument('--output', "-o", default=sys.stdout, type=argparse.FileType('w'), help = "write to OUTPUT")
  args = parser.parse_args()

  w = read_weights(args.file)
  n = args.number if args.number is not None else len(w)

  def show(feat):
    if args.printfeat:
      return feat
    else:
      return repr(feat)

  if args.column is not None:
    for key in sorted(w, key=lambda x:w[x][args.column], reverse=True)[:n]:
      if args.value:
        args.output.write("{0},{1}\n".format(show(key),w[key][args.column]))
      else:
        args.output.write("{0}\n".format(show(key)))
  else: