def my_vacuum(dbpathnew, dbpathold):
    """
    My own vacuum function on cover_hash_table.
    It works by copying and is slow!
    My main use, transform the page size to 4096
    Here because of its use of 'aggregate_dbs'
    """
    if os.path.exists(dbpathnew):
        print 'ERROR: %s already exists.' % dbpathnew
        return
    if not os.path.isfile(dbpathold):
        print 'ERROR: %s is not a file.' % dbpathold
        return
    # create new db
    import cover_hash_table as CHT
    conn = sqlite3.connect(dbpathnew)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;')  # no ROLLBACK!
    conn.execute('PRAGMA page_size = 4096;')
    conn.execute('PRAGMA cache_size = 500000;')  # page_size=4096, 500000->2GB
    CHT.init_db(conn)
    # copy
    aggregate_dbs(conn, dbpathold)
    # reindex
    CHT.reindex(conn)
    # done
    conn.commit()
    conn.close()
def my_vacuum(dbpathnew, dbpathold):
    """
    My own vacuum function on cover_hash_table.
    It works by copying and is slow!
    My main use, transform the page size to 4096
    Here because of its use of 'aggregate_dbs'
    """
    if os.path.exists(dbpathnew):
        print 'ERROR: %s already exists.' % dbpathnew
        return
    if not os.path.isfile(dbpathold):
        print 'ERROR: %s is not a file.' % dbpathold
        return
    # create new db
    import cover_hash_table as CHT
    conn = sqlite3.connect(dbpathnew)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK!
    conn.execute('PRAGMA page_size = 4096;')
    conn.execute('PRAGMA cache_size = 500000;') # page_size=4096, 500000->2GB
    CHT.init_db(conn)
    # copy
    aggregate_dbs(conn, dbpathold)
    # reindex
    CHT.reindex(conn)
    # done
    conn.commit()
    conn.close()
def create_fill_one_partial_db(filelist=None, outputdb=None):
    """
    This is the main function called by each process
    """
    # assert we have the params
    assert (not filelist is None) and (
        not outputdb is None), "internal arg passing error...!"
    # must be imported there... maybe... because of local num_hash_tables count
    import cover_hash_table as CHT
    # other imports
    import quick_query_test as QQT  # should be replaced in the future
    import fingerprint_hash as FH
    # create output db, including PRAGMA
    conn = sqlite3.connect(outputdb)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;')  # no ROLLBACK!
    conn.execute(
        'PRAGMA cache_size = 1000000;')  # default=2000, page_size=1024
    CHT.init_db(conn)
    # iterate over files
    cnt_tid_added = 0
    for filepath in filelist:
        # get bthcroma
        btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION)
        if btchroma is None:
            continue
        # get tid from filepath (faster than querying h5 file, less robust)
        tid = os.path.split(os.path.splitext(filepath)[0])[1]
        # get jumps
        landmarks = FH.get_landmarks(btchroma,
                                     decay=DECAY,
                                     max_per_frame=MAX_PER_FRAME)
        jumps = FH.get_jumps(landmarks, win=WIN)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        # add them
        jumpcodes = map(
            lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN),
            cjumps)
        CHT.add_jumpcodes(conn,
                          tid,
                          jumpcodes,
                          normalize=NORMALIZE,
                          commit=False)
        cnt_tid_added += 1
        if cnt_tid_added % 1000 == 0:
            conn.commit()
        # debug
        if cnt_tid_added % 500 == 0:
            print 'We added %d tid in the hash table(s) of %s.' % (
                cnt_tid_added, outputdb)
    # we index
    CHT.reindex(conn)
    # close connection
    conn.close()
    # done
    return
def create_fill_one_partial_db(filelist=None, outputdb=None):
    """
    This is the main function called by each process
    """
    # assert we have the params
    assert (not filelist is None) and (not outputdb is None), "internal arg passing error...!"
    # must be imported there... maybe... because of local num_hash_tables count
    import cover_hash_table as CHT
    # other imports
    import quick_query_test as QQT # should be replaced in the future
    import fingerprint_hash as FH
    # create output db, including PRAGMA
    conn = sqlite3.connect(outputdb)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK!
    conn.execute('PRAGMA cache_size = 1000000;') # default=2000, page_size=1024
    CHT.init_db(conn)
    # iterate over files
    cnt_tid_added = 0
    for filepath in filelist:
        # get bthcroma
        btchroma = QQT.get_cpressed_btchroma(filepath, compression=COMPRESSION)
        if btchroma is None:
            continue
        # get tid from filepath (faster than querying h5 file, less robust)
        tid = os.path.split(os.path.splitext(filepath)[0])[1]
        # get jumps
        landmarks = FH.get_landmarks(btchroma, decay=DECAY, max_per_frame=MAX_PER_FRAME)
        jumps = FH.get_jumps(landmarks, win=WIN)
        cjumps = FH.get_composed_jumps(jumps, levels=LEVELS, win=WIN)
        # add them
        jumpcodes = map(lambda cj: FH.get_jumpcode_from_composed_jump(cj, maxwin=WIN), cjumps)
        CHT.add_jumpcodes(conn, tid, jumpcodes, normalize=NORMALIZE, commit=False)
        cnt_tid_added += 1
        if cnt_tid_added % 1000 == 0:
            conn.commit()
        # debug
        if cnt_tid_added % 500 == 0:
            print 'We added %d tid in the hash table(s) of %s.' % (cnt_tid_added,
                                                                   outputdb)
    # we index
    CHT.reindex(conn)
    # close connection
    conn.close()
    # done
    return
        print 'MULTIPROCESSING'
        print 'got exception: %r, terminating the pool' % (e, )
        pool.terminate()
        pool.join()

    # SECOND PASS, AGGREGATE (ONE THREAD)
    if nthreads == 1:
        print 'We are done (there was one thread, no aggregation!)'
        sys.exit(0)
    # create final output
    import cover_hash_table as CHT
    conn = sqlite3.connect(outputdb)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;')  # no ROLLBACK!
    conn.execute('PRAGMA page_size = 4096;')
    conn.execute('PRAGMA cache_size = 500000;')  # page_size=4096, 500000->2GB
    CHT.init_db(conn)
    print 'Final db initialized (including PRAGMA settings)'

    # iterate over temporary dbs
    for tdb in tmpdbs:
        aggregate_dbs(conn, tdb)
    # index the final db
    CHT.reindex(conn)
    # all done
    conn.commit()
    conn.close()
    print 'ALL DONE! you should delete the temporary databases...'
    print tmpdbs
        print 'MULTIPROCESSING'
        print 'got exception: %r, terminating the pool' % (e,)
        pool.terminate()
        pool.join()

    # SECOND PASS, AGGREGATE (ONE THREAD)
    if nthreads == 1:
        print 'We are done (there was one thread, no aggregation!)'
        sys.exit(0)
    # create final output
    import cover_hash_table as CHT
    conn = sqlite3.connect(outputdb)
    conn.execute('PRAGMA temp_store = MEMORY;')
    conn.execute('PRAGMA synchronous = OFF;')
    conn.execute('PRAGMA journal_mode = OFF;') # no ROLLBACK!
    conn.execute('PRAGMA page_size = 4096;')
    conn.execute('PRAGMA cache_size = 500000;') # page_size=4096, 500000->2GB
    CHT.init_db(conn)
    print 'Final db initialized (including PRAGMA settings)'

    # iterate over temporary dbs
    for tdb in tmpdbs:
        aggregate_dbs(conn, tdb)
    # index the final db
    CHT.reindex(conn)
    # all done
    conn.commit()
    conn.close()
    print 'ALL DONE! you should delete the temporary databases...'
    print tmpdbs