def dumpData(self): print("Start logging data to tmp folder") # ClientState # utilities.dump_data_to_file(self.padding_ds,"tmp", "paddingset") utilities.dump_data_to_file(self.getClientState(), "tmp", "client") self.cached_data_clusters.getAllLocalCacheWithoutLock() # utilities.dump_data_to_file(self.cached_data_clusters.getAllLocalCacheWithoutLock(), "tmp", "cache") utilities.dump_data_to_file(self.keywords_tracking, "tmp", "keywords_tracking") # print out # throughput, bogus pairs, local cache pairs and cache size, # total pairs in edb, and average result length in paddedDB print("Throughput " + str(self.accumulated_throughput)) print("Batch count " + str(self.batch_count)) print("Average batch processing time (ms) " + str((self.total_time_pad_enc/self.batch_count )*1000)) print("Bogus pairs " + str(self.accumulated_bogus)) # content = self.cached_data_clusters.getClusterSizeAllWithoutLock() # print("Cache pairs " + str(content)) # print("Cache (mb) " + str(os.path.getsize(os.path.join("tmp", "cache"))/(1024*1024.0))) print("EDB size " + str(utilities.monitor_dict_size(self.keywords_tracking))) print("EDB size (mb) " + str(utilities.get_size("shield.db") / (1024 * 1024.0))) print("Avg result length " + str(utilities.monitor_dict_size(self.keywords_tracking) / len(self.keywords_tracking)))
def generatePaddingData(clusters_keywords_props, *kwargs): #format [[('a',0.23),('b',0.2),...], [], [], [], [] ] max_bogus_size = kwargs[0]['max_bogus_size'] file_count = [(100000 + i) for i in range(1, max_bogus_size + 1)] padding_ds = {} count = 0 for cluster in clusters_keywords_props: count += 1 print("Processing cluster " + str(count) + "/" + str(len(clusters_keywords_props))) for keyword_prop in cluster: keyword = keyword_prop[0] probability = keyword_prop[1] bogus_file_no = int(probability * max_bogus_size * 1500) #800 is average document/file if bogus_file_no < max_bogus_size / 2: bogus_file_no = int(max_bogus_size / 2) random.shuffle(file_count) bogus_ids = file_count[0:bogus_file_no] padding_ds[keyword] = bogus_ids #dump to file if kwargs[0]['writable'] == True: utilities.dump_data_to_file(padding_ds, kwargs[0]['dir'], kwargs[0]['file_name'])
def getAllLocalCacheWithoutLock(self): self.lock.acquire() img = copy.deepcopy(self.cached_data_clusters) self.lock.release() utilities.dump_data_to_file(img, "tmp", "cache") content = self.getClusterSizeAllWithoutLock() print("Cache pairs " + str(content)) print("Cache (mb) " + str(os.path.getsize(os.path.join("tmp", "cache"))/(1024*1024.0)))
def run(self): try: self.service_connector.open_connection() n_query_number= int(0.1 * len(self.keywords_tracking)) print("No. keywords " + str(len(self.keywords_tracking))) #serving counting attack access_patterns = [] #total ids #total_ids = 0 #select keywords, #note search token to perform search later n_query_keywords = random.sample(self.keywords_tracking.keys(),n_query_number) query_time = 0 for query_keyword in n_query_keywords: start_time = timer() # in seconds search_token= self.sse_client.generateToken(query_keyword) encrypted_IDs = self.service_connector.search_connect(search_token) if encrypted_IDs is not None: raw_ids = self.sse_client.decryptIDs(encrypted_IDs) else: print("Keyword " + query_keyword +" is not tracked") _ = self.search_local_cache(query_keyword) end_time = timer() access_patterns.append(raw_ids) #total_ids += len(encrypted_IDs) query_time += (end_time - start_time) avg_cur_trial = (query_time/n_query_number)*1000 print("Avg search time (ms) per keyword: " + str(avg_cur_trial)) #print("Total ids " + str(total_ids)) #write search data to file utilities.dump_data_to_file(n_query_keywords, "tmp", "query") utilities.dump_data_to_file(access_patterns,"tmp", "access") #except: # print("Search exception") finally: self.service_connector.close_connection()
def generatePaddingData(self, clusters_keywords_props, max_bogus_size=550000, **kwargs): # format [[('a',0.23),('b',0.2),...], [], [], [], [] ] file_count = [(100000 + i) for i in range(1, max_bogus_size + 1)] self.padding_ds = {} for cluster in clusters_keywords_props: for keyword_prop in cluster: keyword = keyword_prop[0] probability = keyword_prop[1] bogus_file_no = int(probability * max_bogus_size * 100) # 100 is average document/file random.shuffle(file_count) bogus_ids = file_count[0:bogus_file_no] self.padding_ds[keyword] = bogus_ids # dump to file if kwargs['writable'] == True: utilities.dump_data_to_file(self.padding_ds, kwargs['dir'], kwargs['file_name'])
def exportClusters(self, datadir, cluster_keyword_file, cluster_prob_file): utilities.dump_data_to_file(self.clusters_keywords, datadir, cluster_keyword_file) utilities.dump_data_to_file(self.cluster_probabilities, datadir, cluster_prob_file)
def run(self): cur_cluster_real_ids = dict( ) # this dictionary contains real keywords that we later pad them to transfer to server outsourced_key_count = 0 padding_count = 0 self.service_connector.open_connection() ######## test total_ids = 0 temp_total_search_time = 0.0 for queried_keyword in self.queried_keywords: start_temp_search_time = timer() search_token = self.sse_client.generateToken(queried_keyword) if search_token is not None: encrypted_IDs = self.service_connector.search_connect( search_token) # and time to decryptID as well _ = self.sse_client.decryptIDs( encrypted_IDs) # considered as access patterns. total_ids += len(encrypted_IDs) #FARE COMPARE: NOT INCLUDE LOCAL SEARCH #consider the time it search in local cache #_ = self.search_local_cache(queried_keyword) end_temp_search_time = timer() temp_total_search_time += (end_temp_search_time - start_temp_search_time) print(">>>GROUND TEST searching queried keywords (ms) " + str((temp_total_search_time / len(self.queried_keywords)) * 1000)) print(">>>Total ids " + str(total_ids)) self.service_connector.close_connection() ########## ending test print( "> Phase 1: Query back and deletion at DB, add back to padding dataset" ) self.service_connector.open_connection() start_time = timer() for keyword in self.rebuild_keywords: search_token = self.sse_client.generateToken(keyword) if search_token is not None: encrypted_IDs = self.service_connector.search_del_connect( search_token) raw_ids = self.sse_client.decryptIDs(encrypted_IDs) # update calculate outsourced_key_count += 1 #**** we need to reset tracking as latter we also add them again with different numbers of. of real and Padding self.keywords_tracking[keyword] = 0 # we delete in client states self.sse_client.deleteState(keyword) # filter real document (real_ids, bogus_ids) = utilities.split_real_documents(raw_ids, 100000) padding_count += len(bogus_ids) if keyword in self.padding_dataset: cur_padding = self.padding_dataset[keyword] mergedlist = list(set(cur_padding + bogus_ids)) self.padding_dataset[keyword] = mergedlist # we keep track raw_IDS for later re-encryption cur_cluster_real_ids[keyword] = real_ids end_time = timer() self.service_connector.close_connection() #print("updated keyword state " + str(len(self.sse_client.getKeywordState()))) # rebuild time in phase 1 includes # * query time + delete in server and client state + filter and put back in padding data set + reset tracking print(">>> rebuild time (ms) phase 1st " + str((end_time - start_time) * 1000)) print(">>> no. outsourced keys of the cluster " + str(outsourced_key_count)) print(">>> no. of original keys of the cluster " + str(len(self.rebuild_keywords))) print(">>> no. real downloaded ids " + str(utilities.monitor_len_dataset(cur_cluster_real_ids))) print(">>> no. bogus downloaded ids " + str(padding_count)) # we -re test the query keywords self.service_connector.open_connection() #how often these keywords occur in the query test count_in_query = len( set(self.rebuild_keywords).intersection(set( self.queried_keywords))) total_ids = 0 temp_total_search_time = 0.0 for queried_keyword in self.queried_keywords: start_temp_search_time = timer() search_token = self.sse_client.generateToken(queried_keyword) if search_token is not None: encrypted_IDs = self.service_connector.search_connect( search_token) # and time to decryptID as well _ = self.sse_client.decryptIDs( encrypted_IDs) # considered as access patterns. total_ids += len(encrypted_IDs) #consider the time it search in local cache #_ = self.search_local_cache(queried_keyword) end_temp_search_time = timer() temp_total_search_time += (end_temp_search_time - start_temp_search_time) self.service_connector.close_connection() print(">>> no. of queried keywords " + str(len(self.queried_keywords))) print(">>> rebuild keywords in queried set " + str(count_in_query)) print(">>> re-test searching queried keywords (ms) " + str((temp_total_search_time / len(self.queried_keywords)) * 1000)) print(">>> total ids " + str(total_ids)) print( "> Phase 2: Pad the downloaded keyword ids using high mode, and outsource encrypted data to server" ) padding_count = 0 self.service_connector.open_connection() start_re_encrypt_time = timer() max_length = 0 for _, value in cur_cluster_real_ids.items(): if max_length < len(value): max_length = len(value) #finalise the padding bogus for each keyword for keyword, value in cur_cluster_real_ids.items(): bogus_count = max_length - len(value) padding_count += bogus_count if bogus_count > 0: #select random bogus and add to the value random.shuffle(self.padding_dataset[keyword]) bogus_files = self.padding_dataset[keyword][0:bogus_count] #update the list value.update(bogus_files) #update the padding dataset self.padding_dataset[keyword] = self.padding_dataset[keyword][ bogus_count:] #reset tracking self.keywords_tracking[keyword] = len(value) #encrypt and transfer to server encrypted_batch = self.sse_client.streaming(cur_cluster_real_ids) #transfer to server self.service_connector.streaming_connect(encrypted_batch) end_re_encrypt_time = timer() self.service_connector.close_connection() print(">>> Re-encrypt time (ms) " + str((end_re_encrypt_time - start_re_encrypt_time) * 1000)) print(">>> Bogus used " + str(padding_count)) print("> Phase 3: Re-search queried keywords") self.service_connector.open_connection() total_ids = 0 temp_total_search_time = 0.0 for queried_keyword in self.queried_keywords: start_temp_search_time = timer() search_token = self.sse_client.generateToken(queried_keyword) if search_token is not None: encrypted_IDs = self.service_connector.search_connect( search_token) # and time to decryptID as well _ = self.sse_client.decryptIDs( encrypted_IDs) # considered as access patterns. total_ids += len(encrypted_IDs) #consider the time it search in local cache #_ = self.search_local_cache(queried_keyword) end_temp_search_time = timer() temp_total_search_time += (end_temp_search_time - start_temp_search_time) print(">>>re-test searching queried keywords (ms) " + str((temp_total_search_time / len(self.queried_keywords)) * 1000)) print(">>> total ids " + str(total_ids)) self.service_connector.close_connection() #over write self.keywords_tracking #over write self.padding dataset #over write client state utilities.dump_data_to_file(self.padding_dataset, "tmp", "paddingset") utilities.dump_data_to_file(self.sse_client.exportClientState(), "tmp", "client") utilities.dump_data_to_file(self.keywords_tracking, "tmp", "keywords_tracking")