def test_classify(self): db = {} Classifier.train(db, "Ruby", self.fixture("Ruby/foo.rb")) Classifier.train(db, "Objective-C", self.fixture("Objective-C/Foo.h")) Classifier.train(db, "Objective-C", self.fixture("Objective-C/Foo.m")) rs = Classifier.classify(db, self.fixture("Objective-C/hello.m")) assert "Objective-C" == rs[0][0] tokens = Tokenizer.tokenize(self.fixture("Objective-C/hello.m")) rs = Classifier.classify(db, tokens) assert "Objective-C" == rs[0][0]
def tokenize(self, data='', is_path=None): if is_path: data = open(join(join(ROOT_DIR, "samples"), str(data))).read() return Tokenizer.tokenize(data)