def runExperiment(self, classifier_name): data, test_data, vectors = testData() all_data = (data + test_data)*3 #print all_data maker = ClassifierMaker(vectors) classifier = maker.make(classifier_name) num_folds = 3 experiment = EntailmentExperiment(all_data, classifier, num_folds) results = [experiment.runFold(fold) for fold in range(num_folds)] #print results return results
def testClassifierMakerClassifiers(self): "Check that all classifiers that can be made are valid." data, test_data, vectors = testData() class_values = set(x[2] for x in data) params = {'beta': [1.0, 2.0], 'costs': [1.0], 'k': [1]} maker = ClassifierMaker(vectors, params) names = maker.get_names() for name in names: classifier = maker.make(name) classifier.fit(data) results = classifier.predict(test_data) self.assertEqual(len(results), len(test_data)) self.assertTrue(set(results) <= class_values)
def testClassifierMakerClassifiers(self): "Check that all classifiers that can be made are valid." data, test_data, vectors = testData() class_values = set(x[2] for x in data) params = {'beta':[1.0, 2.0], 'costs':[1.0]} maker = ClassifierMaker(vectors, params) names = maker.get_names() for name in names: classifier = maker.make(name) classifier.fit(data) results = classifier.predict(test_data) self.assertEqual(len(results), len(test_data)) self.assertTrue(set(results) <= class_values)
def runExperiment(self, classifier_name): data, test_data, vectors = testData() all_data = (data + test_data)*3 #print all_data maker = ClassifierMaker(vectors, params = {'k':[1]} ) classifier = maker.make(classifier_name) num_folds = 3 experiment = EntailmentExperiment(all_data, classifier, num_folds) results = [experiment.runFold(fold) for fold in range(num_folds)] #print results return results
def testClassifierMakerNames(self): data, test_data, vectors = testData() maker = ClassifierMaker(vectors) names = maker.get_names() self.assertGreater(len(names), 0, "Maker should have more than one classifier")
params['dataset'] = 'wn-noun-dependencies-original' params['vectors'] = 'nouns-deps.mi.db' #params['classifier'] = 'widthdiff' params['classifier']='invCLP' if __name__ == "__main__": print "Testing baseline function" datadir = params['datadir'] dataset_path = os.path.join(datadir, params['dataset'] + '.json') random.seed(abs(hash(str(params)))) with open(dataset_path) as dataset_file: dataset = json.load(dataset_file) vectors_path = os.path.join(datadir, params['vectors']) print "DB path: ", vectors_path vectors = TermDB(vectors_path) maker = ClassifierMaker(vectors, params) classifier = maker.make(params['classifier']) target = np.array([p[2] for p in dataset], dtype=int) classifier.fit(dataset) predictions=classifier.predict(dataset) print "Predictions:", predictions print "Actual:", target