def my_vacuum(dbpathnew, dbpathold): """ My own vacuum function on cover_hash_table. It works by copying and is slow! My main use, transform the page size to 4096 Here because of its use of 'aggregate_dbs' """ if os.path.exists(dbpathnew): print 'ERROR: %s already exists.' % dbpathnew return if not os.path.isfile(dbpathold): print 'ERROR: %s is not a file.' % dbpathold return # create new db import cover_hash_table as CHT conn = sqlite3.connect(dbpathnew) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute('PRAGMA page_size = 4096;') conn.execute('PRAGMA cache_size = 500000;') # page_size=4096, 500000->2GB CHT.init_db(conn) # copy aggregate_dbs(conn, dbpathold) # reindex CHT.reindex(conn) # done conn.commit() conn.close()
def create_fill_one_partial_db(filelist=None, outputdb=None): """ This is the main function called by each process """ # assert we have the params assert (not filelist is None) and ( not outputdb is None), "internal arg passing error...!" # must be imported there... maybe... because of local num_hash_tables count import cover_hash_table as CHT # other imports import quick_query_test as QQT # should be replaced in the future import fingerprint_hash as FH # create output db, including PRAGMA conn = sqlite3.connect(outputdb) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute( 'PRAGMA cache_size = 1000000;') # default=2000, page_size=1024 CHT.init_db(conn) # iterate over files cnt_tid_added = 0 for filepath in filelist: # get bthcroma btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION) if btchroma is None: continue # get tid from filepath (faster than querying h5 file, less robust) tid = os.path.split(os.path.splitext(filepath)[0])[1] # get jumps landmarks = FH.get_landmarks(btchroma, decay=DECAY, max_per_frame=MAX_PER_FRAME) jumps = FH.get_jumps(landmarks, win=WIN) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) # add them jumpcodes = map( lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False) cnt_tid_added += 1 if cnt_tid_added % 1000 == 0: conn.commit() # debug if cnt_tid_added % 500 == 0: print 'We added %d tid in the hash table(s) of %s.' % ( cnt_tid_added, outputdb) # we index CHT.reindex(conn) # close connection conn.close() # done return
def create_fill_one_partial_db(filelist=None, outputdb=None): """ This is the main function called by each process """ # assert we have the params assert (not filelist is None) and (not outputdb is None), "internal arg passing error...!" # must be imported there... maybe... because of local num_hash_tables count import cover_hash_table as CHT # other imports import quick_query_test as QQT # should be replaced in the future import fingerprint_hash as FH # create output db, including PRAGMA conn = sqlite3.connect(outputdb) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute('PRAGMA cache_size = 1000000;') # default=2000, page_size=1024 CHT.init_db(conn) # iterate over files cnt_tid_added = 0 for filepath in filelist: # get bthcroma btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION) if btchroma is None: continue # get tid from filepath (faster than querying h5 file, less robust) tid = os.path.split(os.path.splitext(filepath)[0])[1] # get jumps landmarks = FH.get_landmarks(btchroma, decay=DECAY, max_per_frame=MAX_PER_FRAME) jumps = FH.get_jumps(landmarks, win=WIN) cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN) # add them jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps) CHT.add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False) cnt_tid_added += 1 if cnt_tid_added % 1000 == 0: conn.commit() # debug if cnt_tid_added % 500 == 0: print 'We added %d tid in the hash table(s) of %s.' % (cnt_tid_added, outputdb) # we index CHT.reindex(conn) # close connection conn.close() # done return
print 'MULTIPROCESSING' print 'got exception: %r, terminating the pool' % (e, ) pool.terminate() pool.join() # SECOND PASS, AGGREGATE (ONE THREAD) if nthreads == 1: print 'We are done (there was one thread, no aggregation!)' sys.exit(0) # create final output import cover_hash_table as CHT conn = sqlite3.connect(outputdb) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute('PRAGMA page_size = 4096;') conn.execute('PRAGMA cache_size = 500000;') # page_size=4096, 500000->2GB CHT.init_db(conn) print 'Final db initialized (including PRAGMA settings)' # iterate over temporary dbs for tdb in tmpdbs: aggregate_dbs(conn, tdb) # index the final db CHT.reindex(conn) # all done conn.commit() conn.close() print 'ALL DONE! you should delete the temporary databases...' print tmpdbs
print 'MULTIPROCESSING' print 'got exception: %r, terminating the pool' % (e,) pool.terminate() pool.join() # SECOND PASS, AGGREGATE (ONE THREAD) if nthreads == 1: print 'We are done (there was one thread, no aggregation!)' sys.exit(0) # create final output import cover_hash_table as CHT conn = sqlite3.connect(outputdb) conn.execute('PRAGMA temp_store = MEMORY;') conn.execute('PRAGMA synchronous = OFF;') conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK! conn.execute('PRAGMA page_size = 4096;') conn.execute('PRAGMA cache_size = 500000;') # page_size=4096, 500000->2GB CHT.init_db(conn) print 'Final db initialized (including PRAGMA settings)' # iterate over temporary dbs for tdb in tmpdbs: aggregate_dbs(conn, tdb) # index the final db CHT.reindex(conn) # all done conn.commit() conn.close() print 'ALL DONE! you should delete the temporary databases...' print tmpdbs