def main(): if len(sys.argv) < 2 or sys.argv[1] in ('-h','--help'): print """ %(0)s [-v] [FILE]... Determine the language of each FILE. If -v is present, then full ranking information will be given for each file. Examples: %(0)s test_texts/*.txt %(0)s -v README """.strip() % {'0': sys.argv[0]} return elif sys.argv[1] == '-v': verbose = True fnames = sys.argv[2:] else: verbose = False fnames = sys.argv[1:] for fname in fnames: text = open(fname).read(1024) ranks = pytextcat.classify(text) ans = [] for i,(rank,(lang,enc)) in enumerate(ranks): lang = lang.replace('_',' ').title() if enc: lang = "%s (%s)" % (lang,enc) if rank < 1.05: ans.append(lang) rank = int(100*rank) - 100 ranks[i] = (lang,rank) print fname, "is probably", if len(ans) == 1: print ans[0] elif len(ans) == 2: print "%s or %s" % (ans[0], ans[1]) else: print "%s, or %s" % (', '.join(ans[:-1]), ans[-1]) if verbose: print print "Full ranking information is given below:" print print "Language Score" for lang,rank in ranks: print "%s%s%2d%% worse than best score" \ % (lang,' '*(32-len(lang)),rank) print print
def post(self): text = self.request.get('text') if not text: self.render() return text = text.encode('utf-8') ranks = classify(text) ans = [] for i,(rank,(lang,enc)) in enumerate(ranks): lang = lang.replace('_',' ').title() if enc: lang = "%s (%s)" % (lang,enc) if rank < 1.05: ans.append(lang) rank = int(100*rank) - 100 ranks[i] = (lang,rank) self.render({'text': cgi.escape(text), 'ans': humanize_list(ans, 'or'), 'ranks': ranks})
def post(self): text = self.request.get('text') if not text: self.render() return text = text.encode('utf-8') ranks = classify(text) ans = [] for i, (rank, (lang, enc)) in enumerate(ranks): lang = lang.replace('_', ' ').title() if enc: lang = "%s (%s)" % (lang, enc) if rank < 1.05: ans.append(lang) rank = int(100 * rank) - 100 ranks[i] = (lang, rank) self.render({ 'text': cgi.escape(text), 'ans': humanize_list(ans, 'or'), 'ranks': ranks })