def makeJsonPathFile_LK(self): print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_LK") wb = xlrd.open_workbook(self.lkFileName) sheet = wb.sheet_by_index(0) for x in range(1, sheet.nrows): CommonUtilities.progressBar(x, sheet.nrows) row = CommonUtilities.splitOnComma(sheet.cell_value(x, 0)) left_r, righ_r = self.__getLeftRightElment(row) source_lr = CommonUtilities.getSourceFromPath(left_r) source_rr = CommonUtilities.getSourceFromPath(righ_r) if not left_r in self.dictionary_lk_path.keys( ) and not left_r in self.resources_founded: self.dictionary_lk_path[left_r] = {source_lr: [left_r]} self.resources_founded.append(left_r) self.total_object_linked += 1 self.total_file_linkage += 1 if not righ_r in self.resources_founded: if not source_rr in self.dictionary_lk_path[left_r].keys(): self.dictionary_lk_path[left_r][source_rr] = [] self.dictionary_lk_path[left_r][source_rr].append(righ_r) self.resources_founded.append(righ_r) self.total_file_linkage += 1 CommonUtilities.writeDictToJson(self.dictionary_lk_path, SOURCES_BASE_LK_DICT)
def __AggregateAttributesFile(self): progressCount = 0 print("") CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status="Loading ..") for objectSpect, filepath in self.lk_path_dict.items(): progressCount += 1 CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status=f"Agg: {objectSpect}") self.__current_json_data = CommonUtilities.loadJsonFile( f"{self.src_dir_name}/{filepath}") #print(f"{self.src_dir_name}/{filepath}") ###Appena viene caricato il file per il l' aggregazione creo i dizionari dinamici self.dym_dict_local = SecondIterationDictionary( self.__current_json_data) self.dym_dict_local.Load() ##Istanzio nella class i dinionari self.coll_sim_din = self.dym_dict_local.dyn_col_sim self.coll_inv_din = self.dym_dict_local.col_inv self.__AggregateAttributes() CommonUtilities.writeDictToJson( self.__current_json_data, f"{self.dst_dir_name}/{filepath}.json") self.__dynDict.save()
def __cleanSingleFile(self, source_name, file_path): #print(f"[{type(self).__name__}]Running ---> __cleanSingleFile: {file_path}") jsnData = CommonUtilities.loadJsonFile( f"{self.src_dir_name}/{file_path}") dt_cleaner = DataCleaner(jsnData, self.gtAttrNames) dt_cleaner.cleanKeys() dt_cleaner.cleanValues() jsnDataCl = dt_cleaner.getSignificantData() empty_keys_d, empty_value_d, composite_value_d = dt_cleaner.getEmptyDataKeys( ) if len(empty_keys_d.keys()) + len(empty_value_d.keys()) > 0: self.discarded_info_pool[file_path] = { "key_empty": empty_keys_d, "value_empty": empty_value_d } if len(composite_value_d.keys()) > 0: self.composite_value_pool[file_path] = composite_value_d CommonUtilities.writeDictToJson( jsnDataCl, f"{self.dst_dir_name}/{file_path}.json")
def Load(self): self.__makeCollInv() self.__DynColSim() CommonUtilities.writeDictToJson(self.col_inv, f"test/coll_{self.fileID}.json") CommonUtilities.writeDictToJson(self.dyn_col_sim, f"test/sym_coll_{self.fileID}.json")
def makeJsonPathFile_Common(self): print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_Common") self.dictionary_cm_path = CommonUtilities.merge_two_dicts( self.dictionary_lk_path, self.dictionary_ext_path) CommonUtilities.writeDictToJson(self.dictionary_cm_path, SOURCES_BASE_CM_DICT)
def __AggregateFileS(self): self.progress_bar_count = 0 print("") CommonUtilities.progressBar(self.progress_bar_count, len(self.lk_path_dict.keys()), status="Loading ..") new_merged_file = self.__AggregateFiles(self.lk_path_dict.values()) CommonUtilities.writeDictToJson( new_merged_file, f"{self.dst_dir_name}/big_cluster.json")
def __makeCollisionSimDictionary(self): print(f"[{type(self).__name__}]Running ---> __makeCollisionSimDictionary") self.exc_start_time = datetime.datetime.now() self.__make_dirty_dict_sim() self.__make_clean_dict_sim() CommonUtilities.writeDictToJson(self.collision_sim_dict, self.outFile) self.exc_end_time = datetime.datetime.now()
def RunInteration(self): print(f"[{type(self).__name__}]Running ---> RunInteration") self.exc_start_time = datetime.datetime.now() self.__findOrCreateDir("") self.__AggregateFileSameSourceAndSpect() CommonUtilities.writeDictToJson(self.lk_1_path_dict, self.dst_lk_path_dict) self.exc_end_time = datetime.datetime.now() self.__printStats()
def __AggregateAttributesFile(self): for objectSpect, objectSources in self.lk_path_dict.items(): for sources, filepath in objectSources.items(): self.__findOrCreateDir(sources) self.__current_json_data = CommonUtilities.loadJsonFile( f"{self.src_dir_name}/{filepath}") self.__AggregateAttributes() CommonUtilities.writeDictToJson( self.__current_json_data, f"{self.dst_dir_name}/{filepath}.json")
def __makeCollisionDictionary(self): print(f"[{type(self).__name__}]Running ---> makeCollisionDictionary") self.exc_start_time = datetime.datetime.now() for objectSpect, objectValues in self.filePathData.items(): for fileSource, filesList in objectValues.items(): for filepath in filesList: curr_jsonData = CommonUtilities.loadJsonFile( f"{self.srcDir}/{filepath}") self.__mergeFileWithCollisionDict(curr_jsonData) CommonUtilities.writeDictToJson(self.collision_dict, self.outFile) self.exc_end_time = datetime.datetime.now()
def makeJsonPathFile_EXT(self): print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_EXT") for dir in os.listdir(BASE_SOURCE_DIR): for file in os.listdir(f"{BASE_SOURCE_DIR}/{dir}/"): filename = file.split(".")[0] rs = f"{dir}/{filename}" if not rs in self.resources_founded: self.dictionary_ext_path[rs] = { CommonUtilities.getSourceFromPath(rs): [rs] } self.total_file_external += 1 CommonUtilities.writeDictToJson(self.dictionary_ext_path, SOURCES_BASE_EXT_DICT)
def __AggregateFileSameSourceAndSpect(self): progressCount = 0 print("") CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status="Loading ..") for objectSpect, objectSources in self.lk_path_dict.items(): progressCount +=1 CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status=f"Agg: {objectSpect}") self.lk_1_path_dict[objectSpect] = {} for sources, filespath in objectSources.items(): self.__findOrCreateDir(sources) self.lk_1_path_dict[objectSpect][sources] = f"{sources}/{self.newFileNameID}" new_merged_file = self.__AggregateFiles(filespath) CommonUtilities.writeDictToJson(new_merged_file, f"{self.dst_dir_name}/{sources}/{self.newFileNameID}.json") self.newFileNameID += 1
def __AggregateFileSameSourceAndSpect(self): for objectSpect, objectSources in self.lk_path_dict.items(): self.lk_1_path_dict[objectSpect] = {} for sources, filespath in objectSources.items(): self.__findOrCreateDir(sources) self.lk_1_path_dict[objectSpect][ sources] = f"{sources}/{self.newFileNameID}" new_merged_file = self.__AggregateFiles(filespath) dym_dict = FirstIterationDictionary(new_merged_file, self.newFileNameID) dym_dict.Load() CommonUtilities.writeDictToJson( new_merged_file, f"{self.dst_dir_name}/{sources}/{self.newFileNameID}.json") self.newFileNameID += 1
def cleanDataSet(self): print(f"[{type(self).__name__}]Running ---> cleanDataSet") self.exc_start_time = datetime.datetime.now() for object_spect, object_val in self.cm_path_dict.items(): for source_name, source_files in object_val.items(): self.__findOrCreateDir(source_name) for file_path in source_files: self.__cleanSingleFile(source_name, file_path) CommonUtilities.writeDictToJson(self.discarded_info_pool, f"{DROPPED_ATTRIBUTES_FILES}") CommonUtilities.writeDictToJson(self.composite_value_pool, f"{COMPOSITE_ATTRIBUTES_FILES}") self.exc_end_time = datetime.datetime.now() self.__printStats()
def __makeCollisionInvDictionary(self): print( f"[{type(self).__name__}]Running ---> __makeCollisionInvDictionary" ) self.exc_start_time = datetime.datetime.now() for keyAttribute, valueAttributeList in self.__collision_dict.items(): for valueAttributeCount, valueAttribute in valueAttributeList[ "value_list"]: if not valueAttribute in self.collision_inv_dict.keys(): self.collision_inv_dict[valueAttribute] = { 'attribute_list': [] } self.collision_inv_dict[valueAttribute][ 'attribute_list'].append( (valueAttributeCount, keyAttribute)) CommonUtilities.writeDictToJson(self.collision_inv_dict, self.outFile) self.exc_end_time = datetime.datetime.now()
def __AggregateFileSameSpect(self): progressCount = 0 print("") CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status="Loading ..") for objectSpect, objectSources in self.lk_path_dict.items(): progressCount += 1 CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status=f"Agg: {objectSpect}") pathToMerge = [] for sources, filespath in objectSources.items(): pathToMerge.append(filespath) self.lk_2_path_dict[objectSpect] = f"{self.newFileNameID}" new_merged_file = self.__AggregateFiles(pathToMerge) CommonUtilities.writeDictToJson( new_merged_file, f"{self.dst_dir_name}/{self.newFileNameID}.json") self.newFileNameID += 1
dictBigCluster = CommonUtilities.loadJsonFile( f"{PHASE_3_SOURCE_DIR}/big_cluster.json", ext="") dictColl = {} dictColl2 = {} dictCollInv = {} for key, values in dictBigCluster.items(): dictColl[key] = {} for src, val, *out in values: if not val in dictColl[key].keys(): dictColl[key][val] = 0 dictColl[key][val] += 1 CommonUtilities.writeDictToJson(dictColl, f"{PHASE_3_SOURCE_DIR}/big_clusterColl.json") for key, values in dictBigCluster.items(): dictColl2[key] = {} for src, val, *out in values: if not val in dictColl2[key].keys(): dictColl2[key][val] = {} if len(out) > 0: oldAttrName = out[0] else: oldAttrName = key if not oldAttrName in dictColl2[key][val].keys(): dictColl2[key][val][oldAttrName] = 0 dictColl2[key][val][oldAttrName] += 1 CommonUtilities.writeDictToJson(dictColl2,
def __Load(self): if not os.path.exists(self.__pathDictionary): CommonUtilities.writeDictToJson({}, self.__pathDictionary) self.__dyn_dictionary = CommonUtilities.loadJsonFile(self.__pathDictionary, ext="")
def save(self): CommonUtilities.writeDictToJson(self.__dyn_dictionary, self.__pathDictionary)
import CommonUtilities from Constats_App import * dictBigCluster2 = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_cluster2.json", ext="") clusterTaDict = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_clusterkey_5.json", ext="") clusterTaDictinv = {} bigCluster3 = {} for key, values in clusterTaDict.items(): for value in values: clusterTaDictinv[value] = key CommonUtilities.writeDictToJson(clusterTaDictinv, f"{PHASE_3_SOURCE_DIR}/big_clusterkey_5_inv.json") for key, values in dictBigCluster2.items(): if len(values) > 0: rootKey = clusterTaDictinv[key] if not rootKey in bigCluster3.keys(): bigCluster3[rootKey] = [] for src, value, *oldAttrName in values: if len(oldAttrName) < 1: curr_item = (src, value, key) else: curr_item = (src, value, oldAttrName[0]) bigCluster3[rootKey].append(curr_item) CommonUtilities.writeDictToJson(bigCluster3, f"{PHASE_3_SOURCE_DIR}/big_cluster3.json")
bigcluster = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_cluster2") keySimInv = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/testInv") outputData = {} outputData2 = {} outputData3 = {} outputData4 = {} outputData5 = {} ##Passo 1 count gli elementi per ogni nomeAttributo for key, values in bigcluster.items(): if len(values) > 0: outputData[key] = len(values) CommonUtilities.writeDictToJson(outputData, f"{PHASE_3_SOURCE_DIR}/big_clusterkey.json") ####Passo 2 Conto gli elementi di ogni chiave suddividendoli secondo il nome attributo originale for key, values in bigcluster.items(): if len(values) > 0: outputData2[key] = {} for src, val, *oldAttrname in values: if len(oldAttrname) > 0: if not oldAttrname[0] in outputData2[key].keys(): outputData2[key][oldAttrname[0]] = 0 outputData2[key][oldAttrname[0]] += 1 else: if not key in outputData2[key].keys(): outputData2[key][key] = 0 outputData2[key][key] += 1
import CommonUtilities from Constats_App import * dictSim = CommonUtilities.loadJsonFile(f"{COLLISION_DICTIONARY_SIM_DICT}", ext="") dictSimInv = {} for key in dictSim: dictSimInv[key] = {} for key2 in dictSim: keysList = dictSim[key2]['attr_sim_list'] if key in keysList: for x in range(0, len(keysList)): if dictSim[key2]['attr_sim_list'][x] == key: dictSimInv[key][key2] = dictSim[key2]['attr_sim_score'][x] CommonUtilities.writeDictToJson(dictSimInv, f"{PHASE_3_SOURCE_DIR}/testInv.json")