Example #1
0
class TestMetrics(TestCase):

    def setUp(self):
        self.m = Metrics()

    def test_jaccard_same_len(self):
        with self.assertRaises(ValueError):
            self.m.jaccard_vectors(
                [0, 1],
                [0, 1, 2, 3]
            )

    def test_jaccard_empty(self):
        e = self.m.jaccard_vectors([],[])
        self.assertEqual(e,1)

    def test_jaccard_int(self):
        e = self.m.jaccard_vectors(
            [0, 2, 1, 3],
            [0, 1, 2, 3]
        )
        self.assertEqual(e,0.75)
    
    def test_jaccard_bool(self):
        e = self.m.jaccard_vectors(
            [False, False, True, True, True ],
            [False, True , True, True, False]
        )
        self.assertEqual(e,0.4)

    def test_euclid_same_len(self):
        with self.assertRaises(ValueError):
            self.m.euclid_vectors(
                [0, 1, 2, 3],
                [0, 1]
            )

    def test_euclid(self):
        e = self.m.euclid_vectors([1,1],[4,5])
        self.assertEqual(e,5)

    def test_cos_same_len(self):
        with self.assertRaises(ValueError):
            self.m.cos_vectors(
                [0, 1, 2],
                [1, 1]
            )

    def test_cos_0(self):
        c = self.m.cos_vectors([1,0,1],[0,1,0])
        self.assertEqual(round(c,5),float(0))

    def test_cos_1(self):
        c = self.m.cos_vectors([1,1,1],[1,1,1])
        self.assertEqual(round(c,5),float(1)) 
Example #2
0
class TestMetrics(unittest.TestCase):
    def setUp(self):
        self.m = Metrics()

    def test_metrics(self):
        e = self.m.euclid_vectors([1, 1], [4, 5])
        self.assertEqual(e, 5)
        c = self.m.cos_vectors([1, 1, 1], [1, 1, 1])
        self.assertEqual(round(c, 5), float(1))
        c = self.m.cos_vectors([1, 0, 1], [0, 1, 0])
        self.assertEqual(round(c, 5), float(0))
Example #3
0
class TestMetrics(unittest.TestCase):

    def setUp(self):
        self.m = Metrics()

    def test_metrics(self):
        e = self.m.euclid_vectors([1,1],[4,5])
        self.assertEqual(e,5)
        c = self.m.cos_vectors([1,1,1],[1,1,1])
        self.assertEqual(round(c,5),float(1))
        c = self.m.cos_vectors([1,0,1],[0,1,0])
        self.assertEqual(round(c,5),float(0))
Example #4
0
class QA:
    
    def __init__(self):
        self.file_name = 'qa.txt'
        self.qa_list = {}
        self.qa_id = 0
        self.prep = Preprocessor()
        self.mx = Matrix()
        self.metric = Metrics()
        
    def randomize(self, a):
        for i in range(len(a)):
            a[i] = random.randint(0,1)

    def readfile(self):

        fd = open(self.file_name,'r')
        for line in fd.readlines():
            line = line.strip().lower().split(':')
            if len(line) != 2:  
                continue
            elif line[0] == 'q':
                q_line = ' '.join(line[1:])
                self.qa_id += 1
                self.qa_list[self.qa_id] = {'q': q_line, 'a': ''}
                terms = self.prep.ngram_tokenizer(text=q_line)
                self.mx.add_doc(doc_id=self.qa_id, doc_terms=terms, 
                        frequency=True, do_padding=True)
            elif line[0] == 'a': 
                a_line = ' '.join(line[1:])
                self.qa_list[self.qa_id]['a'] = a_line
        
        #print 'Number of read questions and answers:', len(self.mx.docs)
        #print 'Number of read terms', len(self.mx.terms)
               
    def ask(self, q=''):

        q_id = 0
        q_distance = 99999

        terms = self.prep.ngram_tokenizer(text=q)
        q_vector = self.mx.query_to_vector(terms, frequency=False)

        if sum(q_vector) == 0:
            self.randomize(q_vector)

        for doc in self.mx.docs:
            distance = self.metric.euclid_vectors(doc['terms'], q_vector)
            if distance < q_distance:
                q_distance = distance
                q_id = doc['id']
    
        print 'Tarek:', self.qa_list[q_id]['a']