def setUp(self): self.classifier = CustomWorkClassifier(name="Pride and Prejudice", datastore=TEST_REDIS, simple=True) self.labels = [0, # b1629290 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def setUp(self): self.classifier = CustomWorkClassifier(name="Pride and Prejudice", datastore=TEST_REDIS, simple=True) self.labels = [ 0, # b1629290 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]
class TestCustomPrideAndPrejudice(unittest.TestCase): def setUp(self): self.classifier = CustomWorkClassifier(name="Pride and Prejudice", datastore=TEST_REDIS, simple=True) self.labels = [ 0, # b1629290 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ] def test_init(self): self.assert_(self.classifier is not None) self.assertEquals(len(self.labels), 30) def test_load_training_marc(self): self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.assertEquals(len(self.classifier.training_data), 30) def test_generate_training_labels(self): labels = 30 * [0] self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.classifier.generate_training_labels(labels=labels) self.assertEquals(labels, self.classifier.training_labels) def test_generate_training_vocabulary(self): self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.assertEquals( self.classifier.training_vocabulary, set( [ "affair", "bebris", "helen", "matters", "1817", "prejudice", "jane", "impressions", "pride", "crawford", "1958", "first", "sentimental", "carrie", "1883", "prejudiceor", "mansfield", "1775", "austen", "prejudice", "comedy", "jerome", "mystery", "emma", "annotated", "edition", "darcy", "acknowledged", "mr", "park", "revisited", "mrs", "prescience", "suspense", "truth", "sense", "sensibility", "universally", ] ), ) def test_tokens2vectors(self): self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.assertEquals( self.classifier.tokens2vectors(["austen", "first", "matters"]), [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], ) def test_generate_training_matrix(self): self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.classifier.generate_training_matrix() print("IN training matrix test={0}".format(self.classifier.training_matrix)) self.assertEquals(self.classifier.training_matrix[0][6], 1) def test_train_naive_bayes(self): self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.classifier.generate_training_matrix() self.classifier.train_naive_bayes() self.assertEquals(self.classifier.pWork, 0.47368421052631576) self.assertEquals(self.classifier.p0Vector[0], 0.029411764705882353) self.assertEquals(self.classifier.p1Vector[6], 0.16666667) def test_classify(self): self.classifier.load_training_marc(os.path.join("ColoradoCollege", "pride-and-prejudice.mrc")) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.classifier.generate_training_matrix() self.classifier.train_naive_bayes() self.assertEquals(self.classifier.classify(["jane", "austen", "pride", "prejudice"]), 1) self.assertEquals(self.classifier.classify(["jane", "eyre", "anne", "green"]), 0) def tearDown(self): pass
class TestCustomPrideAndPrejudice(unittest.TestCase): def setUp(self): self.classifier = CustomWorkClassifier(name="Pride and Prejudice", datastore=TEST_REDIS, simple=True) self.labels = [0, # b1629290 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] def test_init(self): self.assert_(self.classifier is not None) self.assertEquals(len(self.labels), 30) def test_load_training_marc(self): self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.assertEquals(len(self.classifier.training_data), 30) def test_generate_training_labels(self): labels = 30*[0] self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.classifier.generate_training_labels(labels=labels) self.assertEquals(labels, self.classifier.training_labels) def test_generate_training_vocabulary(self): self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.assertEquals(self.classifier.training_vocabulary, set(['affair', 'bebris', 'helen', 'matters', '1817', 'prejudice', 'jane', 'impressions', 'pride', 'crawford', '1958', 'first', 'sentimental', 'carrie', '1883', 'prejudiceor', 'mansfield', '1775', 'austen', 'prejudice', 'comedy', 'jerome', 'mystery', 'emma', 'annotated', 'edition', 'darcy', 'acknowledged', 'mr', 'park', 'revisited', 'mrs', 'prescience', 'suspense', 'truth', 'sense', 'sensibility', 'universally'])) def test_tokens2vectors(self): self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.assertEquals(self.classifier.tokens2vectors(['austen', 'first', 'matters']), [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) def test_generate_training_matrix(self): self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.classifier.generate_training_matrix() print("IN training matrix test={0}".format(self.classifier.training_matrix)) self.assertEquals(self.classifier.training_matrix[0][6], 1) def test_train_naive_bayes(self): self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.classifier.generate_training_matrix() self.classifier.train_naive_bayes() self.assertEquals(self.classifier.pWork, 0.47368421052631576) self.assertEquals(self.classifier.p0Vector[0], 0.029411764705882353) self.assertEquals(self.classifier.p1Vector[6], 0.16666667) def test_classify(self): self.classifier.load_training_marc( os.path.join('ColoradoCollege', 'pride-and-prejudice.mrc')) self.classifier.generate_training_labels(labels=self.labels) self.classifier.generate_training_vocabulary() self.classifier.generate_training_matrix() self.classifier.train_naive_bayes() self.assertEquals(self.classifier.classify(['jane', 'austen', 'pride', 'prejudice']), 1) self.assertEquals(self.classifier.classify(['jane', 'eyre', 'anne', 'green']), 0) def tearDown(self): pass