def testDocumentVector(self):
     factory = DocumentFactory()
     d1 = factory.new(['a','b','c'])
     d2 = factory.new(['b','d','e'])
     print factory.nums
     self.assertEqual('Document(1, SupportVector({1: 1.0, 2: 1.0, 3: 1.0}))', str(d1))
     self.assertEqual('Document(2, SupportVector({2: 1.0, 4: 1.0, 5: 1.0}))', str(d2))
Beispiel #2
0
def getDocs():
    f = DocumentFactory()
    return [f.new(x.split()) for x in [
            "this is a nice long document",
            "this is another nice long document",
            "this is rather a short document",
            "a horrible document",
            "another horrible document"]]
Beispiel #3
0
 def test_document_vector(self):
     factory = DocumentFactory()
     d1 = factory.new(['a', 'b', 'c'])
     d2 = factory.new(['b', 'd', 'e'])
     print(factory.nums)
     self.assertEqual(
         'Document(1, SupportVector({1: 1.0, 2: 1.0, 3: 1.0}))', str(d1))
     self.assertEqual(
         'Document(2, SupportVector({2: 1.0, 4: 1.0, 5: 1.0}))', str(d2))
Beispiel #4
0
def getDocs():
    f = DocumentFactory()
    return [
        f.new(x.split()) for x in [
            "this is a nice long document",
            "this is another nice long document",
            "this is rather a short document", "a horrible document",
            "another horrible document"
        ]
    ]
Beispiel #5
0
def get_docs():
    f = DocumentFactory()
    return [
        f.new(x.split()) for x in [
            'this is a nice long document',
            'this is another nice long document',
            'this is rather a short document', 'a horrible document',
            'another horrible document'
        ]
    ]
Beispiel #6
0
from svmlight import DocumentFactory, Learner

artists = [line.strip() for line in open('artistList.txt')]
tags = [line.strip() for line in open('tagData.txt')]
classes = [line.strip() for line in open('classification.txt')]

f = DocumentFactory()
docs = [f.new(x.split(',')) for x in tags]
l = Learner()
l.set_kernel_type(0)
model = l.learn(docs[50:], [int(s) for s in classes[50:]])
judgments = [model.classify(d) for d in docs[:50]]
print model.plane, model.bias
print judgments

i = 0;
while (i < len(judgments)):

    print str(i) + '. ' + artists[i]

    if (judgments[i] >= 0.0):
        print 'yes'
    else :
        print 'no'

    i += 1
 def testDocumentNums(self):
     factory = DocumentFactory()
     d1 = factory.new([1,2,3])
     d2 = factory.new([4,5,6])
     self.assertEqual(1, d1.docnum)
     self.assertEqual(2, d2.docnum)
Beispiel #8
0
 def test_document_nums(self):
     factory = DocumentFactory()
     d1 = factory.new([1, 2, 3])
     d2 = factory.new([4, 5, 6])
     self.assertEqual(1, d1.docnum)
     self.assertEqual(2, d2.docnum)