Ejemplo n.º 1
0
from pastepm.lib.pyclassifier import Classifier
import os

c = Classifier()

for dirname, dirnames, filenames in os.walk('training'):
    if os.sep not in dirname: continue

    language = dirname.split(os.sep)[1]
    for f in filenames:
        try:
            extension = f.split(".")[1]
        except IndexError:
            extension = f

        full_path = os.path.join(dirname, f)
        c.train(open(full_path).read(), (language, extension))

output = open('training.pckl', 'w+')
c.export(output)
Ejemplo n.º 2
0
from pastepm.config import config
from pastepm.lib.pyclassifier import Classifier

c = Classifier.from_data(open(config.get('pyclassifier', 'file')))
language_ext_pairs = c.get_classes()

def language_detect(code):
    return c.identify(code)

def get_language_from_extension(extension):
    for language, ext in language_ext_pairs:
        if ext == extension: return language.lower()