def queryHash(self, hashp, ident="unknownId", hashLen=1): # put it in datapoint #DP *query = pHash.ph_malloc_datapoint(mvpfile.hash_type) #query=pHash.ph_malloc_datapoint(mvpfile.hash_type) query = pHash.DP() if (query is None): self.log.error("mem alloc error") raise PHashException("mem alloc error") # memory ownage ...== 1 #print ' query.thisown ', query.thisown #query.thisown=0 # fill fields query.id = ident query.hash = pHash.copy_ulong64Ptr(hashp) query.hash_length = hashLen # query datapoint return self.query(query)
def queryHash(self,hashp,ident="unknownId",hashLen=1): # put it in datapoint #DP *query = pHash.ph_malloc_datapoint(mvpfile.hash_type) #query=pHash.ph_malloc_datapoint(mvpfile.hash_type) query=pHash.DP() if (query is None): self.log.error("mem alloc error") raise PHashException("mem alloc error") # memory ownage ...== 1 #print ' query.thisown ', query.thisown #query.thisown=0 # fill fields query.id = ident query.hash = pHash.copy_ulong64Ptr(hashp) query.hash_length = hashLen # query datapoint return self.query(query)
def main(argv): ''' ''' logging.basicConfig(level=logging.DEBUG) print pHash.ph_about() if (len(argv) < 2): print "not enough input arguments" print "usage: %s directory dbname [radius] [knearest] [threshold]"%( sys.argv[0]) return -1 dir_name = argv[0]#/* name of files in directory of query images */ filename = argv[1]#/* name of file to save db */ mvpfile=pHash.MVPFile() mvpfile.filename = filename #@TODO #mvpfile.hashdist = distancefunc #mvpfile.hashdist = distancefunc #save: ret code 17 pHash.my_set_callback(mvpfile,distancefunc) mvpfile.hash_type = pHash.UINT64ARRAY nbfiles = 0 print "using db %s"%( filename) print "using dir %s for query files"%( dir_name) nbfiles = 0 print "dir name: %s"%( dir_name) nbfiles=0 files=None for root, dirs, filest in os.walk(dir_name): nbfiles=len(filest) files=[os.path.join(root,f) for f in filest] files.sort() print "nb query files = %d"%( nbfiles) #DP *query = pHash.ph_malloc_datapoint(mvpfile.hash_type) query=pHash.ph_malloc_datapoint(mvpfile.hash_type) if (query is None): print "mem alloc error" return -3 query.thisown=0 argc=len(argv)+1 radius = 30.0 threshold = 15.0 knearest = 20 if (argc >= 4): radius = float(argv[3]) if (argc >= 5): knearest = int(argv[4]) if (argc >= 6): threshold = float(argv[5]) print "radius = %f"%( radius) print "knearest = %d"%( knearest) print "threshold = %f"%( threshold) # malloc results = pHash.DPptrArray(knearest) if (results is None): return -3 tmphash = 0x0000000000000000 nbfound = 0 count = 0 sum_calcs = 0 for i in range(0,nbfiles): ret=pHash.ph_dct_imagehash(files[i]) if (type(ret) is int): print "unable to get hash" continue ret2,tmphash=ret print "query[%d]: %s %x"%( i, files[i], tmphash) query.id = files[i] query.hash = pHash.copy_ulong64Ptr(tmphash) query.hash_length = 1 global nb_calcs nb_calcs = 0 nbfound = 0 ret = pHash.ph_query_mvptree(mvpfile,query,knearest,radius,threshold,results.cast()) if (type(ret) is int ): print "could not complete query, %d"%(retcode) continue #print 'pHash.ph_query_mvptree',ret # results DP ** retcode,nbfound = ret #print 'errcodes : pHash.PH_ERRCAP ',pHash.PH_ERRCAP if (retcode != pHash.PH_SUCCESS and retcode != pHash.PH_ERRCAP): print "could not complete query, %d"%(retcode) continue count+=1 sum_calcs += nb_calcs print " %d files found"%( nbfound) for j in range (0,nbfound): d = distancefunc(query, results[j]) print " %d %s distance = %f"%( j, results[j].id, d) print "nb distance calcs: %d"%( nb_calcs) for j in range(0,nbfound): #free(results[j]->id) #del results[j].id results[j].id = None #del results[j].hash results[j].hash = None pHash.ph_free_datapoint(results[j]) #end for i ave_calcs = float(sum_calcs)/float(count) print "ave calcs/query: %f"%( ave_calcs) #for i in range (0, nbfiles): # del files[i] del files pHash.ph_free_datapoint(query) del results #del mvpfile.filename return 0
def main(argv): ''' ''' #locale.setlocale(locale.LC_ALL,'fr_FR') #logger=logging.getLogger('root') #logger.setLevel(logging.DEBUG) #logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.DEBUG) print pHash.ph_about() if (len(argv) < 2): print "not enough input args" print "usage: %s dirname filename" % (sys.argv[0]) return -1 dir_name = argv[0] #;/* name of dir to retrieve image files */ filename = argv[1] #;/* name of file to save db */ mvpfile = pHash.MVPFile() mvpfile.branchfactor = 2 mvpfile.pathlength = 5 mvpfile.leafcapacity = 23 #50 mvpfile.pgsize = 4096 #8192 mvpfile.filename = filename #mvpfile.hashdist = distancefunc #save: ret code 17 pHash.my_set_callback(mvpfile, distancefunc) mvpfile.hash_type = pHash.UINT64ARRAY nbfiles = 0 print "dir name: %s" % (dir_name) for root, dirs, files in os.walk(dir_name): nbfiles = len(files) print "nbfiles = %d" % nbfiles #allocate a list of nbfiles elements # hashlist = (DP**)malloc(nbfiles*sizeof(DP*)); hashlist = pHash.DPptrArray(nbfiles) count = 0 for i in range(0, nbfiles): filename = os.path.normpath(os.path.join(root, files[i])) # malloc DP or use pHash.DP() // pHash.ph_malloc_datapoint(mvpfile.hash_type) #tmp=pHash.ph_malloc_datapoint(mvpfile.hash_type) tmp = pHash.DP() if (tmp is None): print "mem alloc error" return -4 tmp.hash_type = mvpfile.hash_type # say to python, to NOT garbage collect DP reference. tmp.thisown = 0 # store the ref. hashlist[count] = tmp # calculate image hash ret, tmphash = pHash.ph_dct_imagehash(filename) if (ret < 0): print "unable to get hash" continue # we can't assign .hash to hashlist[count].hash = tmphash because .hash is a pointer # we use ulong64Ptr instead of voidPtr because .. it's a ulong64 ? casting is dynamic hashlist[count].hash = pHash.copy_ulong64Ptr(tmphash) # print "files[%d]: %s hash = %x" % (i, filename, tmphash) hashlist[count].id = filename hashlist[count].hash_length = 1 count += 1 # method with DPptrArray being a DP ** hashlistf = hashlist.cast() ret = pHash.ph_save_mvptree(mvpfile, hashlistf, count) # ret 11 is null hash distance function # save: ret code 17 has callback func. -> not enought hashdist ? print "save: ret code %d" % (ret) #free is done by GC .. ? #for i in range(0,nbfiles): # pHash.free(hashlist[i].hash) #free is NOT done by GC for i in range(0, nbfiles): pHash.ph_free_datapoint(hashlist[i])
def main(argv): ''' ''' logging.basicConfig(level=logging.DEBUG) print pHash.ph_about() if (len(argv) < 2): print "not enough input arguments" print "usage: %s directory dbname [radius] [knearest] [threshold]" % ( sys.argv[0]) return -1 dir_name = argv[0] #/* name of files in directory of query images */ filename = argv[1] #/* name of file to save db */ mvpfile = pHash.MVPFile() mvpfile.filename = filename pHash.my_set_callback(mvpfile, distancefunc) mvpfile.hash_type = pHash.UINT64ARRAY nbfiles = 0 print "dir name: %s" % (dir_name) nbfiles = 0 files = None for root, dirs, filest in os.walk(dir_name): nbfiles = len(filest) files = [os.path.join(root, f) for f in filest] files.sort() print "nbfiles = %d" % (nbfiles) #allocate a list of nbfiles elements # hashlist = (DP**)malloc(nbfiles*sizeof(DP*)); hashlist = pHash.DPptrArray(nbfiles) if (hashlist is None): print "mem alloc error" return -3 count = 0 tmphash = 0x00000000 for i in range(0, nbfiles): tmpdp = pHash.ph_malloc_datapoint(mvpfile.hash_type) if (tmpdp is None): print "mem alloc error" return -4 tmpdp.thisown = 0 hashlist[count] = tmpdp #useless malloc, we use copy_ #hashlist[count].hash=pHash.new_ulong64Ptr() #if (hashlist[count].hash is None): # print "mem alloc error" # return -5 print "file[%d] = %s" % (i, files[i]) ret = pHash.ph_dct_imagehash(files[i]) if (type(ret) is int): print "unable to get hash" hashlist[count].hash = None phash.ph_free_datapoint(hashlist[count]) continue (res, tmphash) = ret hashlist[count].id = files[i] hashlist[count].hash = pHash.copy_ulong64Ptr(tmphash) hashlist[count].hash_length = 1 count += 1 # print "add files to file %s" % (filename) nbsaved = 0 ret = pHash.ph_add_mvptree(mvpfile, hashlist.cast(), count) if (type(ret) is int): print "error on ph_add_mvptree" return -6 (res, nbsaved) = ret print "number saved %d out of %d, ret code %d" % (nbsaved, count, res) # freeeee. we need to add %newobject to ph_readfilesnames #for i in range(0,nbfiles): # free(files[i]) # files = None for i in range(0, nbfiles): hashlist[i].hash = None pHash.ph_free_datapoint(hashlist[i]) hashlist = None return 0
def main(argv): ''' ''' logging.basicConfig(level=logging.DEBUG) print pHash.ph_about() if (len(argv) < 2): print "not enough input arguments" print "usage: %s directory dbname [radius] [knearest] [threshold]" % ( sys.argv[0]) return -1 dir_name = argv[0] #/* name of files in directory of query images */ filename = argv[1] #/* name of file to save db */ mvpfile = pHash.MVPFile() mvpfile.filename = filename #@TODO #mvpfile.hashdist = distancefunc #mvpfile.hashdist = distancefunc #save: ret code 17 pHash.my_set_callback(mvpfile, distancefunc) mvpfile.hash_type = pHash.UINT64ARRAY nbfiles = 0 print "using db %s" % (filename) print "using dir %s for query files" % (dir_name) nbfiles = 0 print "dir name: %s" % (dir_name) nbfiles = 0 files = None for root, dirs, filest in os.walk(dir_name): nbfiles = len(filest) files = [os.path.join(root, f) for f in filest] files.sort() print "nb query files = %d" % (nbfiles) #DP *query = pHash.ph_malloc_datapoint(mvpfile.hash_type) query = pHash.ph_malloc_datapoint(mvpfile.hash_type) if (query is None): print "mem alloc error" return -3 query.thisown = 0 argc = len(argv) + 1 radius = 30.0 threshold = 15.0 knearest = 20 if (argc >= 4): radius = float(argv[3]) if (argc >= 5): knearest = int(argv[4]) if (argc >= 6): threshold = float(argv[5]) print "radius = %f" % (radius) print "knearest = %d" % (knearest) print "threshold = %f" % (threshold) # malloc results = pHash.DPptrArray(knearest) if (results is None): return -3 tmphash = 0x0000000000000000 nbfound = 0 count = 0 sum_calcs = 0 for i in range(0, nbfiles): ret = pHash.ph_dct_imagehash(files[i]) if (type(ret) is int): print "unable to get hash" continue ret2, tmphash = ret print "query[%d]: %s %x" % (i, files[i], tmphash) query.id = files[i] query.hash = pHash.copy_ulong64Ptr(tmphash) query.hash_length = 1 global nb_calcs nb_calcs = 0 nbfound = 0 ret = pHash.ph_query_mvptree(mvpfile, query, knearest, radius, threshold, results.cast()) if (type(ret) is int): print "could not complete query, %d" % (retcode) continue #print 'pHash.ph_query_mvptree',ret # results DP ** retcode, nbfound = ret #print 'errcodes : pHash.PH_ERRCAP ',pHash.PH_ERRCAP if (retcode != pHash.PH_SUCCESS and retcode != pHash.PH_ERRCAP): print "could not complete query, %d" % (retcode) continue count += 1 sum_calcs += nb_calcs print " %d files found" % (nbfound) for j in range(0, nbfound): d = distancefunc(query, results[j]) print " %d %s distance = %f" % (j, results[j].id, d) print "nb distance calcs: %d" % (nb_calcs) for j in range(0, nbfound): #free(results[j]->id) #del results[j].id results[j].id = None #del results[j].hash results[j].hash = None pHash.ph_free_datapoint(results[j]) #end for i ave_calcs = float(sum_calcs) / float(count) print "ave calcs/query: %f" % (ave_calcs) #for i in range (0, nbfiles): # del files[i] del files pHash.ph_free_datapoint(query) del results #del mvpfile.filename return 0
class MVPTree(): log = None db = None radius = 30.0 threshold = 15.0 knearest = 20 hashType = None callback = None mvpfile = None # __hasher = None def __init__(self, dbname, contentType=IMAGE, radius=30.0, threshold=15.0, knearest=20, hashType=pHash.UINT64ARRAY, callback=None): self.log = logging.getLogger(self.__class__.__name__) self.db = dbname self.initContentType(contentType) self.radius = radius self.threshold = threshold self.knearest = knearest self.hashType = hashType if not callback is None: self.callback = callback self.initMVPFile() self.log.debug( "Setup: radius:%f threshold:%f knearest:%d (hashType:%d)" % (self.radius, self.threshold, self.knearest, self.hashType)) ''' Init the MVPFile struct ''' def initMVPFile(self): self.mvpfile = None self.mvpfile = pHash.MVPFile() pHash.ph_mvp_init(self.mvpfile) self.mvpfile.filename = self.db pHash.my_set_callback(self.mvpfile, self.callback) self.mvpfile.hash_type = self.hashType # check if file exists or create it ?? return ''' inits hash callbacks and other content dependant fields ''' def initContentType(self, content): if content == IMAGE: self.__hasher = ImageHasher() self.callback = distancefunc elif content == VIDEO: self.__hasher = VideoHasher() self.callback = distancefunc elif content == AUDIO: self.__hasher = AudioHasher() self.callback = distancefunc else: raise TypeError() ############################ ADD FUNCTIONS ################################# ''' add a single file ''' def addFile(self, filename): self.addFiles([filename]) return ''' add files from directory ''' def addFilesFrom(self, dirname): # read filenames files1 = [] for root, dirs, files in os.walk(dirname): if (root == dirname): files1 = [os.path.join(root, f) for f in files] break files1.sort() # self.addFiles(files1) return ''' Add files in args to MVP Databse ''' def addFiles(self, files): nbfiles = len(files) # make sources struct hashlist = pHash.DPptrArray(nbfiles) if (hashlist is None): self.log.error("mem alloc error") raise MemoryError("mem alloc error") # make a datapoint for each file count = 0 tmphash = 0x00000000 for f in files: tmpdp = self.makeDatapoint(f) tmpdp.thisown = 0 hashlist[count] = tmpdp self.log.debug("file[%d] = %s" % (count, f)) count += 1 #end for files self.log.debug("add %d files to file %s" % (count, self.db)) nbsaved = 0 if (not self.__DbExists()): # add all files to MVPTree ret = pHash.ph_save_mvptree(self.mvpfile, hashlist.cast(), count) (retcode, nbsaved) = ret, count else: # add all files to MVPTree ret = pHash.ph_add_mvptree(self.mvpfile, hashlist.cast(), count) if (type(ret) is int): self.log.error("error on ph_add_mvptree") raise PHashException("error on ph_add_mvptree") (retcode, nbsaved) = ret # common error handling if (retcode != pHash.PH_SUCCESS and retcode != pHash.PH_ERRCAP): self.log.warning("could not complete query, %d" % (retcode)) raise PHashException("could not complete query, %d" % (retcode)) self.log.debug("number saved %d out of %d, ret code %d" % (nbsaved, count, retcode)) return ############################ QUERY FUNCTIONS ################################# ''' Query a MVP Tree for files from a directory . return a list : [ (srcfilename, [ (matcshFilename,score),...] ) , ... ] ''' def queryFilesFrom(self, dirname, ident=None): # read filenames files1 = [] for root, dirs, files in os.walk(dirname): if (root == dirname): files1 = [os.path.join(root, f) for f in files] break files1.sort() # return self.queryFiles(files1) ''' query a list of files ''' def queryFiles(self, files): results = [(f, self.queryFile(f)) for f in files] # return result list return results ''' Query for an image file in a MVP Tree. @use ph_dct_imagehash ''' def queryFile(self, filename, ident=None): if (not os.access(filename, os.F_OK)): raise IOError('file not found: %s' % (filename)) # compute image Hash hashp, hashLen = self.__hasher.makeHash(filename) # put it in datapoint # query datapoint ret = self.queryHash(hashp, ident=filename, hashLen=hashLen) # return result list return ret ''' Query for an hash in a MVP Tree. @use ph_dct_imagehash ''' def queryHash(self, hashp, ident="unknownId", hashLen=1): # put it in datapoint #DP *query = pHash.ph_malloc_datapoint(mvpfile.hash_type) #query=pHash.ph_malloc_datapoint(mvpfile.hash_type) query = pHash.DP() if (query is None): self.log.error("mem alloc error") raise PHashException("mem alloc error") # memory ownage ...== 1 #print ' query.thisown ', query.thisown #query.thisown=0 # fill fields query.id = ident query.hash = pHash.copy_ulong64Ptr(hashp) query.hash_length = hashLen # query datapoint return self.query(query) ''' Query a datapoint return a list of tuple results [ ( dpmatch, distance),..] ''' def query(self, datapoint, callback=distancefunc): if not type(datapoint) is pHash.DP: raise TypeError("expected a pHash.DP instance") # refresh info.... self.initMVPFile() # malloc results structure results = pHash.DPptrArray(self.knearest) # error handling if (results is None): raise MemoryError("expected a pHash.DP instance") # query datapoint in MVP tree ret = pHash.ph_query_mvptree(self.mvpfile, datapoint, self.knearest, self.radius, self.threshold, results.cast()) # error handling if (type(ret) is int): self.log.error("could not complete query, %d" % (retcode)) raise PHashException("could not complete query, %d" % (retcode)) retcode, nbfound = ret if (retcode != pHash.PH_SUCCESS and retcode != pHash.PH_ERRCAP): self.log.warning("could not complete query, %d" % (retcode)) #self.log.debug("nbfound : %d"%(nbfound)) raise PHashException("could not complete query, %d" % (retcode)) # results treatment self.log.debug(" %d files found" % (nbfound)) # for j in range(0, nbfound): # this own == false #print 'thisown',results[j].thisown #results[j].thisown=1 # free dp.id ? # free dp.hash ? pass res = [(results[j], distancefunc(datapoint, results[j])) for j in range(0, nbfound)] return res ############################ UTILS FUNCTIONS ################################# def makeDatapoint(self, filename): # make datapoint tmpdp = pHash.ph_malloc_datapoint(self.mvpfile.hash_type) if (tmpdp is None): self.log.error("mem alloc error") raise MemoryError("mem alloc error") # memory ownage tmpdp.thisown = 0 # call hasher hashp = 0 hashLen = 0 try: hashp, hashLen = self.__hasher.makeHash(filename) except PHashException, e: self.log.error("unable to get hash: %s" % (filename)) pHash.ph_free_datapoint(tmpdp) raise PHashException("unable to get hash: %s" % (filename)) # fill DP tmpdp.id = filename #@TODO that function call is type dependent... voidPtr ? tmpdp.hash = pHash.copy_ulong64Ptr(hashp) tmpdp.hash_length = 1 return tmpdp
def main(argv): ''' ''' #locale.setlocale(locale.LC_ALL,'fr_FR') #logger=logging.getLogger('root') #logger.setLevel(logging.DEBUG) #logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.DEBUG) print pHash.ph_about() if (len(argv) < 2): print "not enough input args" print "usage: %s dirname filename"% (sys.argv[0]) return -1 dir_name = argv[0] #;/* name of dir to retrieve image files */ filename = argv[1] #;/* name of file to save db */ mvpfile= pHash.MVPFile() mvpfile.branchfactor = 2 mvpfile.pathlength = 5 mvpfile.leafcapacity = 23 #50 mvpfile.pgsize = 4096 #8192 mvpfile.filename = filename #mvpfile.hashdist = distancefunc #save: ret code 17 pHash.my_set_callback(mvpfile,distancefunc) mvpfile.hash_type = pHash.UINT64ARRAY nbfiles = 0 print "dir name: %s"%( dir_name) for root, dirs, files in os.walk(dir_name): nbfiles=len(files) print "nbfiles = %d"% nbfiles #allocate a list of nbfiles elements # hashlist = (DP**)malloc(nbfiles*sizeof(DP*)); hashlist=pHash.DPptrArray(nbfiles) count = 0 for i in range(0,nbfiles): filename=os.path.normpath(os.path.join(root,files[i]) ) # malloc DP or use pHash.DP() // pHash.ph_malloc_datapoint(mvpfile.hash_type) #tmp=pHash.ph_malloc_datapoint(mvpfile.hash_type) tmp=pHash.DP() if (tmp is None): print "mem alloc error" return -4 tmp.hash_type = mvpfile.hash_type # say to python, to NOT garbage collect DP reference. tmp.thisown = 0 # store the ref. hashlist[count]=tmp # calculate image hash ret,tmphash=pHash.ph_dct_imagehash(filename) if ( ret < 0): print "unable to get hash" continue # we can't assign .hash to hashlist[count].hash = tmphash because .hash is a pointer # we use ulong64Ptr instead of voidPtr because .. it's a ulong64 ? casting is dynamic hashlist[count].hash = pHash.copy_ulong64Ptr(tmphash) # print "files[%d]: %s hash = %x"%( i, filename, tmphash ) hashlist[count].id = filename hashlist[count].hash_length = 1 count+=1 # method with DPptrArray being a DP ** hashlistf=hashlist.cast() ret = pHash.ph_save_mvptree(mvpfile, hashlistf, count) # ret 11 is null hash distance function # save: ret code 17 has callback func. -> not enought hashdist ? print "save: ret code %d"%(ret) #free is done by GC .. ? #for i in range(0,nbfiles): # pHash.free(hashlist[i].hash) #free is NOT done by GC for i in range(0,nbfiles): pHash.ph_free_datapoint(hashlist[i])