Ejemplo n.º 1
0
'''Takes an iterable of bookids, and does all procsesing / thread management of said processing.'''
def process_books(library, calibre_ids=None, book_text_files=None, multi_thread=False):
    bookids = []
    if calibre_ids:
        for id in calibre_ids:
            book = library.get_book_cid(id)
            if not book:
                book = Book.Book(calibreid=id)
                library.add_book(book)
            bookids.add(book.id)
    if book_text_files:
        for id in book_text_files:
            book = library.get_book_textfile(id)
            if not book:
                book = Book.Book(textfile=id)
                library.add_book(book)
            bookids.append(book.id)
    if not multi_thread:
        #Just run through our tasks serially
        fingerprint_initializer(library, bookids)
        book_comparator(library, bookids)
    else:
        raise TBD('Multithreading is not yet implimented')
    #TODO: must add the completed scan uuid to books when finished
        
if __name__ == '__main__':
    import zUnitTest
    zUnitTest.run_testcase(zUnitTest.ControllerTest)  
   
Ejemplo n.º 2
0
                    if myhash < minhashes[h]:
                        minhashes[h] = myhash
        else:
            hashedwords = len(words)
            hashes = OptimizeCompare.HashSequence()
            hashes.resize(hashedwords, 0)
            for i in xrange(hashedwords):
                hashes[i] = hash(words[i])
            myhashes = OptimizeCompare.HashSequence()
            myhashes = OptimizeCompare.shingle_and_hash(hashes, Utility.myMasks.masks, L_tables, shingle_size)
            minhashes = array('l',myhashes) #Convert to native python type because this array is small, but will be accessed frequently!
        return minhashes
                
            
    def __generate_minhashes(self,words, shingle_size=5):
        L_tables = Compare.myMinHashParams.minhash_tables
        if len(words) < 4:
            return array('l',[])
        elif len(words) < shingle_size:
            shingle_size = 1
        return self.__shingle_and_hash(words, shingle_size, L_tables)
        #self.__find_mins(tables, L_tables, minhashes)
                    
        
        
        
    
if __name__ == '__main__':
    import zUnitTest
    zUnitTest.run_testcase(zUnitTest.FingerprintTest)