def testDocumentVector(self): factory = DocumentFactory() d1 = factory.new(['a','b','c']) d2 = factory.new(['b','d','e']) print factory.nums self.assertEqual('Document(1, SupportVector({1: 1.0, 2: 1.0, 3: 1.0}))', str(d1)) self.assertEqual('Document(2, SupportVector({2: 1.0, 4: 1.0, 5: 1.0}))', str(d2))
def getDocs(): f = DocumentFactory() return [f.new(x.split()) for x in [ "this is a nice long document", "this is another nice long document", "this is rather a short document", "a horrible document", "another horrible document"]]
def test_document_vector(self): factory = DocumentFactory() d1 = factory.new(['a', 'b', 'c']) d2 = factory.new(['b', 'd', 'e']) print(factory.nums) self.assertEqual( 'Document(1, SupportVector({1: 1.0, 2: 1.0, 3: 1.0}))', str(d1)) self.assertEqual( 'Document(2, SupportVector({2: 1.0, 4: 1.0, 5: 1.0}))', str(d2))
def getDocs(): f = DocumentFactory() return [ f.new(x.split()) for x in [ "this is a nice long document", "this is another nice long document", "this is rather a short document", "a horrible document", "another horrible document" ] ]
def get_docs(): f = DocumentFactory() return [ f.new(x.split()) for x in [ 'this is a nice long document', 'this is another nice long document', 'this is rather a short document', 'a horrible document', 'another horrible document' ] ]
from svmlight import DocumentFactory, Learner artists = [line.strip() for line in open('artistList.txt')] tags = [line.strip() for line in open('tagData.txt')] classes = [line.strip() for line in open('classification.txt')] f = DocumentFactory() docs = [f.new(x.split(',')) for x in tags] l = Learner() l.set_kernel_type(0) model = l.learn(docs[50:], [int(s) for s in classes[50:]]) judgments = [model.classify(d) for d in docs[:50]] print model.plane, model.bias print judgments i = 0; while (i < len(judgments)): print str(i) + '. ' + artists[i] if (judgments[i] >= 0.0): print 'yes' else : print 'no' i += 1
def testDocumentNums(self): factory = DocumentFactory() d1 = factory.new([1,2,3]) d2 = factory.new([4,5,6]) self.assertEqual(1, d1.docnum) self.assertEqual(2, d2.docnum)
def test_document_nums(self): factory = DocumentFactory() d1 = factory.new([1, 2, 3]) d2 = factory.new([4, 5, 6]) self.assertEqual(1, d1.docnum) self.assertEqual(2, d2.docnum)