def classify_ambiguous_languages(sample): language = Language.find_by_name(sample['language']) languages = Language.find_by_filename(sample['path']) languages_names = [x.name for x in languages] results = Classifier.classify(DATA, open(sample['path']).read(), languages_names) self.assertEqual(language.name, results[0][0])
def test_restricted_classify(self): db = {} Classifier.train(db, "Ruby", self.fixture("Ruby/foo.rb")) Classifier.train(db, "Objective-C", self.fixture("Objective-C/Foo.h")) Classifier.train(db, "Objective-C", self.fixture("Objective-C/Foo.m")) rs = Classifier.classify(db, self.fixture("Objective-C/hello.m"), ["Objective-C"]) assert "Objective-C" == rs[0][0] rs = Classifier.classify(db, self.fixture("Objective-C/hello.m"), ["Ruby"]) assert "Ruby" == rs[0][0]
def test_classify(self): db = {} Classifier.train(db, "Ruby", self.fixture("Ruby/foo.rb")) Classifier.train(db, "Objective-C", self.fixture("Objective-C/Foo.h")) Classifier.train(db, "Objective-C", self.fixture("Objective-C/Foo.m")) rs = Classifier.classify(db, self.fixture("Objective-C/hello.m")) assert "Objective-C" == rs[0][0] tokens = Tokenizer.tokenize(self.fixture("Objective-C/hello.m")) rs = Classifier.classify(db, tokens) assert "Objective-C" == rs[0][0]
def test_instance_classify_none(self): assert [] == Classifier.classify(DATA, None)
def test_instance_classify_empty(self): rs = Classifier.classify(DATA, "") r = rs[0] assert r[1] < 0.5, str(r)