コード例 #1
0
ファイル: textcat.py プロジェクト: davidar/pytextcat
def main():
    if len(sys.argv) < 2 or sys.argv[1] in ('-h','--help'):
        print """
%(0)s [-v] [FILE]...

Determine the language of each FILE.

If -v is present, then full ranking information will be given for each file.

Examples:
%(0)s test_texts/*.txt
%(0)s -v README
        """.strip() % {'0': sys.argv[0]}
        return
    elif sys.argv[1] == '-v':
        verbose = True
        fnames = sys.argv[2:]
    else:
        verbose = False
        fnames = sys.argv[1:]
    
    for fname in fnames:
        text = open(fname).read(1024)
        ranks = pytextcat.classify(text)
        ans = []
        
        for i,(rank,(lang,enc)) in enumerate(ranks):
            lang = lang.replace('_',' ').title()
            if enc: lang = "%s (%s)" % (lang,enc)
            if rank < 1.05: ans.append(lang)
            rank = int(100*rank) - 100
            ranks[i] = (lang,rank)
        
        print fname, "is probably",
        if len(ans) == 1:
            print ans[0]
        elif len(ans) == 2:
            print "%s or %s" % (ans[0], ans[1])
        else:
            print "%s, or %s" % (', '.join(ans[:-1]), ans[-1])
        
        if verbose:
            print
            print "Full ranking information is given below:"
            print
            print "Language                        Score"
            for lang,rank in ranks:
                print "%s%s%2d%% worse than best score" \
                    % (lang,' '*(32-len(lang)),rank)
            print
            print
コード例 #2
0
ファイル: __init__.py プロジェクト: JimmyJune/da.vidr.cc
    def post(self):
        text = self.request.get('text')
        if not text:
            self.render()
            return

        text = text.encode('utf-8')
        ranks = classify(text)
        ans = []
        for i,(rank,(lang,enc)) in enumerate(ranks):
            lang = lang.replace('_',' ').title()
            if enc: lang = "%s (%s)" % (lang,enc)
            if rank < 1.05: ans.append(lang)
            rank = int(100*rank) - 100
            ranks[i] = (lang,rank)
        self.render({'text': cgi.escape(text), 'ans': humanize_list(ans, 'or'), 'ranks': ranks})
コード例 #3
0
ファイル: __init__.py プロジェクト: VijayEluri/da.vidr.cc
    def post(self):
        text = self.request.get('text')
        if not text:
            self.render()
            return

        text = text.encode('utf-8')
        ranks = classify(text)
        ans = []
        for i, (rank, (lang, enc)) in enumerate(ranks):
            lang = lang.replace('_', ' ').title()
            if enc: lang = "%s (%s)" % (lang, enc)
            if rank < 1.05: ans.append(lang)
            rank = int(100 * rank) - 100
            ranks[i] = (lang, rank)
        self.render({
            'text': cgi.escape(text),
            'ans': humanize_list(ans, 'or'),
            'ranks': ranks
        })