Esempio n. 1
0
        mi_list.sort(self.cmp_mi_list, reverse=True)
        return mi_list

    def top_n_terms(self, n=100):
        '''
        Returns top n terms with highest MI
        '''
        top_list = []
        for item in self.terms_sorted()[0:n]:
            top_list.append(item['term'])
        return top_list


if __name__ == '__main__':

    mx = MatrixExpress()
    #mx = Matrix()
    mx.add_doc(doc_id='1',
               doc_terms=['apple', 'mac', 'iphone', 'mac'],
               doc_class='apple',
               frequency=True,
               do_padding=True)
    mx.add_doc(doc_id='2',
               doc_terms=['windows', 'word', 'excel', 'office'],
               doc_class='microsoft',
               frequency=True,
               do_padding=True)
    mx.add_doc(doc_id='3',
               doc_terms=['computer', 'mac', 'iphone', 'ipad'],
               doc_class='apple',
               frequency=True,
Esempio n. 2
0
File: mi.py Progetto: Volkan61/irlib
        return mi_list

    def top_n_terms(self, n=100):
        '''
        Returns top n terms with highest MI
        '''
        top_list = []
        for item in self.terms_sorted()[0:n]:
           top_list.append(item['term'])
        return top_list  
                            
                          
if __name__ == '__main__':
    
        
    mx = MatrixExpress()
    #mx = Matrix()
    mx.add_doc(doc_id='1',
               doc_terms=['apple', 'mac', 'iphone', 'mac'],
               doc_class= 'apple',
               frequency=True, do_padding=True)
    mx.add_doc(doc_id='2',
               doc_terms=['windows', 'word', 'excel', 'office'],
               doc_class= 'microsoft',
               frequency=True, do_padding=True)
    mx.add_doc(doc_id='3',
               doc_terms=['computer', 'mac', 'iphone', 'ipad'],
               doc_class= 'apple',
               frequency=True, do_padding=True)
    mx.add_doc(doc_id='4',
               doc_terms=['excel', 'computer', 'office', 'xp'],