# words to be added word_present = [ 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom', 'blossom', 'bolster', 'bonny', 'bonus', 'bonuses', 'coherent', 'cohesive', 'colorful', 'comely', 'comfort', 'gems', 'generosity', 'generous', 'generously', 'genial' ] # word not added word_absent = [ 'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt', 'nuke', 'gloomy', 'facebook', 'geeksforgeeks', 'twitter' ] for item in word_present: bloomf._add(item) _size, _hash_count, _bytestring = bloomf._filter_array_for_storing() ##################################### b_a = bitarray(0) b_a.frombytes(_bytestring) shuffle(word_present) shuffle(word_absent) test_words = word_present[:10] + word_absent shuffle(test_words) for word in test_words: if bloomf._check(word): if word in word_absent:
def main(arglist): """ flow for adding the feature vector of the image to DB :param arglist: :return: """ print("\n{}\n".format(arglist)) print("\n{}\n".format(arglist), file=f) for i in range(len(arglist)): print("arg[{}]={}".format(i, arglist[i])) print("arg[{}]={}".format(i, arglist[i]), file=f) # path_base = "/home/osboxes/PycharmProjects/hog_data/pedestrian/" _host = arglist[1] _database = arglist[2] _NAME = arglist[3] _path_base = arglist[4] _N_IMAGES = arglist[5] _PB = arglist[6] _TRUST = 'trustless' # _host = "10.137.137.40" # _database = "imgTest" db = hogDB(_host, _database, f) # set Bloom filter _blmFilterDB = blmFilter(f) if _blmFilterDB.existindb(db, _NAME): # exists in DB dict_for_blmfilter = _blmFilterDB.get_dict() bystr_dataset_from_db = dict_for_blmfilter["blm_bystr"] bitarray_size = dict_for_blmfilter["blm_size"] hash_count_from_db = dict_for_blmfilter["blm_hashes"] _blmFilter = BloomFilter(_N_IMAGES, _PB, f, None, bystr_dataset_from_db, bitarray_size, hash_count_from_db) else: # new, it should be added to DB _blmFilter = BloomFilter(_N_IMAGES, _PB, f) # type: object _, _, blm_bystr = _blmFilter._filter_array_for_storing() _blmFilterDB.set_blm(_NAME, _blmFilter.get_size(), _blmFilter.get_hash_count(), blm_bystr) db.add_blmFilter(_blmFilterDB.get_dict()) # get filter data from DB blm_name, blm_size, blm_hashes, blm_bystr = db.get_blmFilter(_NAME) print("blm_name ={}, blm_size={}, blm_hashes={}, blm_bystr=\n{}".format( blm_name, blm_size, blm_hashes, blm_bystr)) # re-create bloom filter # bitarray_size, hash_count, bitarray_dataset = BloomFilter._filter_array_from_storing(blm_size, blm_hashes,blm_bystr) # _blmFilter1 = BloomFilter(_N_IMAGES, _PB, f, bitarray_dataset, bitarray_size) # del _blmFilter train_images = list_img_files(_path_base) number_of_images = len(train_images) image_count = 0 added_to_filter = 0 existed_in_filter = 0 discarded = 0 for fname in train_images: print("\n**** {} image from {} processing started.... {}".format( image_count, number_of_images, fname)) print("\n**** {} image from {} processing started.... {}".format( image_count, number_of_images, fname), file=f) fimage = hog_statest(fname, 64, 128, _host, _database, f) try: fimage._run() fimage._save_fvect() except Exception as e: print("exception on image {}".format(fname)) print("exception on image {}".format(fname), file=f) discarded = discarded + 1 del fimage continue if _blmFilter._add(fimage._str_fvect): print( "The feature vector for {} added to Bloom filter data".format( fname)) print( "The feature vector for {} added to Bloom filter data".format( fname), file=f) added_to_filter = added_to_filter + 1 else: print("The feature vector for {} exists in Bloom filter data". format(fname)) print("The feature vector for {} exists in Bloom filter data". format(fname), file=f) existed_in_filter = existed_in_filter + 1 del fimage if image_count % 10 == 0: # save bitarray _, _, blm_bystr = _blmFilter._filter_array_for_storing() _blmFilterDB.set_blm(_NAME, _blmFilter.get_size(), _blmFilter.get_hash_count(), blm_bystr) db.add_blmFilter(_blmFilterDB.get_dict()) print("\n{}".format(_blmFilter.get_bit_array), file=f) if db.validation_saved_data(_blmFilterDB.get_dict()) > 0: print(" Validation for adding bloom filter failed!!!!!") if f: print(" Validation for adding bloom filter failed!!!!!", file=f) f.close() sys.exit(-1) print("Bloom {} bit array saved after {} image processed".format( _NAME, image_count)) print("Bloom {} bit array saved after {} image processed".format( _NAME, image_count), file=f) print("\n**** {} image from {} processing finished\n\n".format( image_count, number_of_images)) print("\n**** {} image from {} processing finished\n\n".format( image_count, number_of_images), file=f) image_count = image_count + 1 del _blmFilter del _blmFilterDB print( "passed : {} images\n added: {}\n existed: {}\n discarded: {}".format( number_of_images, added_to_filter, existed_in_filter, discarded)) print( "passed : {} images\n added: {}\n existed: {}\n discarded: {}".format( number_of_images, added_to_filter, existed_in_filter, discarded), file=f) return 0