if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('file', help="file to read") parser.add_argument('-c', '--column', help="project a specific column", type=int) parser.add_argument('-n', '--number', help="output top N features", type=int) parser.add_argument('-v', '--value', help="output the value used for ranking", action="store_true") parser.add_argument('-p', '--printfeat', help="print the actual feature (default is to print repr)", action="store_true") parser.add_argument('--output', "-o", default=sys.stdout, type=argparse.FileType('w'), help="write to OUTPUT") args = parser.parse_args() w = read_weights(args.file) n = args.number if args.number is not None else len(w) def show(feat): if args.printfeat: return feat else: return repr(feat) if args.column is not None: for key in sorted(w, key=lambda x: w[x][args.column], reverse=True)[:n]: if args.value: args.output.write("{0},{1}\n".format(show(key), w[key][args.column])) else: args.output.write("{0}\n".format(show(key))) else:
else: raise ValueError("no suitable feature list") print >>sys.stderr, "considering {0} features".format(len(feats)) records = dict((k, {}) for k in feats) headers = [] headers.append("len") for k in feats: records[k]["len"] = len(k) # Document Frequency if os.path.exists(model_file("DF_all")): print >>sys.stderr, "found weights for document frequency" w = read_weights(model_file("DF_all")) headers.append("DF") for k in feats: records[k]["DF"] = w[k][0] # IG weights for the all-languages event if os.path.exists(model_file("IGweights.lang")): print >>sys.stderr, "found weights for lang" w = read_weights(model_file("IGweights.lang")) headers.append("IGlang") for k in feats: records[k]["IGlang"] = w[k][0] # IG weights for the all-domains event if os.path.exists(model_file("IGweights.domain")): print >>sys.stderr, "found weights for domain"
else: raise ValueError("no suitable feature list") print("considering {0} features".format(len(feats)), file=sys.stderr) records = dict((k, {}) for k in feats) headers = [] headers.append('len') for k in feats: records[k]['len'] = len(k) # Document Frequency if os.path.exists(model_file('DF_all')): print("found weights for document frequency", file=sys.stderr) w = read_weights(model_file('DF_all')) headers.append('DF') for k in feats: records[k]['DF'] = w[k][0] # IG weights for the all-languages event if os.path.exists(model_file('IGweights.lang')): print("found weights for lang", file=sys.stderr) w = read_weights(model_file('IGweights.lang')) headers.append('IGlang') for k in feats: records[k]['IGlang'] = w[k][0] # IG weights for the all-domains event if os.path.exists(model_file('IGweights.domain')): print("found weights for domain", file=sys.stderr)
else: raise ValueError("no suitable feature list") print >> sys.stderr, "considering {0} features".format(len(feats)) records = dict((k, {}) for k in feats) headers = [] headers.append('len') for k in feats: records[k]['len'] = len(k) # Document Frequency if os.path.exists(model_file('DF_all')): print >> sys.stderr, "found weights for document frequency" w = read_weights(model_file('DF_all')) headers.append('DF') for k in feats: records[k]['DF'] = w[k][0] # IG weights for the all-languages event if os.path.exists(model_file('IGweights.lang')): print >> sys.stderr, "found weights for lang" w = read_weights(model_file('IGweights.lang')) headers.append('IGlang') for k in feats: records[k]['IGlang'] = w[k][0] # IG weights for the all-domains event if os.path.exists(model_file('IGweights.domain')): print >> sys.stderr, "found weights for domain"
import argparse, os, csv, sys from langid.train.common import read_weights if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('file', help="file to read") parser.add_argument('-c','--column',help="project a specific column", type=int) parser.add_argument('-n','--number',help="output top N features", type=int) parser.add_argument('-v','--value',help="output the value used for ranking", action="store_true") parser.add_argument('-p','--printfeat',help="print the actual feature (default is to print repr)", action="store_true") parser.add_argument('--output', "-o", default=sys.stdout, type=argparse.FileType('w'), help = "write to OUTPUT") args = parser.parse_args() w = read_weights(args.file) n = args.number if args.number is not None else len(w) def show(feat): if args.printfeat: return feat else: return repr(feat) if args.column is not None: for key in sorted(w, key=lambda x:w[x][args.column], reverse=True)[:n]: if args.value: args.output.write("{0},{1}\n".format(show(key),w[key][args.column])) else: args.output.write("{0}\n".format(show(key))) else: