def makeJsonPathFile_LK(self):

        print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_LK")

        wb = xlrd.open_workbook(self.lkFileName)
        sheet = wb.sheet_by_index(0)

        for x in range(1, sheet.nrows):
            CommonUtilities.progressBar(x, sheet.nrows)
            row = CommonUtilities.splitOnComma(sheet.cell_value(x, 0))
            left_r, righ_r = self.__getLeftRightElment(row)
            source_lr = CommonUtilities.getSourceFromPath(left_r)
            source_rr = CommonUtilities.getSourceFromPath(righ_r)

            if not left_r in self.dictionary_lk_path.keys(
            ) and not left_r in self.resources_founded:
                self.dictionary_lk_path[left_r] = {source_lr: [left_r]}
                self.resources_founded.append(left_r)
                self.total_object_linked += 1
                self.total_file_linkage += 1

            if not righ_r in self.resources_founded:
                if not source_rr in self.dictionary_lk_path[left_r].keys():
                    self.dictionary_lk_path[left_r][source_rr] = []
                self.dictionary_lk_path[left_r][source_rr].append(righ_r)
                self.resources_founded.append(righ_r)
                self.total_file_linkage += 1

        CommonUtilities.writeDictToJson(self.dictionary_lk_path,
                                        SOURCES_BASE_LK_DICT)
    def __AggregateAttributesFile(self):

        progressCount = 0
        print("")
        CommonUtilities.progressBar(progressCount,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")
        for objectSpect, filepath in self.lk_path_dict.items():
            progressCount += 1
            CommonUtilities.progressBar(progressCount,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {objectSpect}")

            self.__current_json_data = CommonUtilities.loadJsonFile(
                f"{self.src_dir_name}/{filepath}")
            #print(f"{self.src_dir_name}/{filepath}")
            ###Appena viene caricato il file per il l' aggregazione creo i dizionari dinamici
            self.dym_dict_local = SecondIterationDictionary(
                self.__current_json_data)
            self.dym_dict_local.Load()

            ##Istanzio nella class i dinionari
            self.coll_sim_din = self.dym_dict_local.dyn_col_sim
            self.coll_inv_din = self.dym_dict_local.col_inv

            self.__AggregateAttributes()

            CommonUtilities.writeDictToJson(
                self.__current_json_data,
                f"{self.dst_dir_name}/{filepath}.json")
        self.__dynDict.save()
    def __cleanSingleFile(self, source_name, file_path):

        #print(f"[{type(self).__name__}]Running ---> __cleanSingleFile: {file_path}")

        jsnData = CommonUtilities.loadJsonFile(
            f"{self.src_dir_name}/{file_path}")
        dt_cleaner = DataCleaner(jsnData, self.gtAttrNames)
        dt_cleaner.cleanKeys()
        dt_cleaner.cleanValues()

        jsnDataCl = dt_cleaner.getSignificantData()

        empty_keys_d, empty_value_d, composite_value_d = dt_cleaner.getEmptyDataKeys(
        )

        if len(empty_keys_d.keys()) + len(empty_value_d.keys()) > 0:
            self.discarded_info_pool[file_path] = {
                "key_empty": empty_keys_d,
                "value_empty": empty_value_d
            }

        if len(composite_value_d.keys()) > 0:
            self.composite_value_pool[file_path] = composite_value_d

        CommonUtilities.writeDictToJson(
            jsnDataCl, f"{self.dst_dir_name}/{file_path}.json")
    def Load(self):
        self.__makeCollInv()
        self.__DynColSim()

        CommonUtilities.writeDictToJson(self.col_inv,
                                        f"test/coll_{self.fileID}.json")
        CommonUtilities.writeDictToJson(self.dyn_col_sim,
                                        f"test/sym_coll_{self.fileID}.json")
    def makeJsonPathFile_Common(self):

        print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_Common")

        self.dictionary_cm_path = CommonUtilities.merge_two_dicts(
            self.dictionary_lk_path, self.dictionary_ext_path)
        CommonUtilities.writeDictToJson(self.dictionary_cm_path,
                                        SOURCES_BASE_CM_DICT)
    def __AggregateFileS(self):

        self.progress_bar_count = 0
        print("")
        CommonUtilities.progressBar(self.progress_bar_count,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")

        new_merged_file = self.__AggregateFiles(self.lk_path_dict.values())
        CommonUtilities.writeDictToJson(
            new_merged_file, f"{self.dst_dir_name}/big_cluster.json")
    def __makeCollisionSimDictionary(self):
        print(f"[{type(self).__name__}]Running ---> __makeCollisionSimDictionary")

        self.exc_start_time = datetime.datetime.now()
        
        self.__make_dirty_dict_sim()
        self.__make_clean_dict_sim()
        
        CommonUtilities.writeDictToJson(self.collision_sim_dict, self.outFile)
        
        self.exc_end_time = datetime.datetime.now()
Beispiel #8
0
 def RunInteration(self):
     
     print(f"[{type(self).__name__}]Running ---> RunInteration")
     
     self.exc_start_time = datetime.datetime.now()
     
     self.__findOrCreateDir("")
     self.__AggregateFileSameSourceAndSpect()
     
     CommonUtilities.writeDictToJson(self.lk_1_path_dict, self.dst_lk_path_dict)
     
     self.exc_end_time = datetime.datetime.now()
     self.__printStats()
    def __AggregateAttributesFile(self):

        for objectSpect, objectSources in self.lk_path_dict.items():
            for sources, filepath in objectSources.items():

                self.__findOrCreateDir(sources)

                self.__current_json_data = CommonUtilities.loadJsonFile(
                    f"{self.src_dir_name}/{filepath}")
                self.__AggregateAttributes()

                CommonUtilities.writeDictToJson(
                    self.__current_json_data,
                    f"{self.dst_dir_name}/{filepath}.json")
    def __makeCollisionDictionary(self):

        print(f"[{type(self).__name__}]Running ---> makeCollisionDictionary")

        self.exc_start_time = datetime.datetime.now()

        for objectSpect, objectValues in self.filePathData.items():
            for fileSource, filesList in objectValues.items():
                for filepath in filesList:
                    curr_jsonData = CommonUtilities.loadJsonFile(
                        f"{self.srcDir}/{filepath}")
                    self.__mergeFileWithCollisionDict(curr_jsonData)
        CommonUtilities.writeDictToJson(self.collision_dict, self.outFile)

        self.exc_end_time = datetime.datetime.now()
    def makeJsonPathFile_EXT(self):

        print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_EXT")

        for dir in os.listdir(BASE_SOURCE_DIR):
            for file in os.listdir(f"{BASE_SOURCE_DIR}/{dir}/"):
                filename = file.split(".")[0]
                rs = f"{dir}/{filename}"
                if not rs in self.resources_founded:
                    self.dictionary_ext_path[rs] = {
                        CommonUtilities.getSourceFromPath(rs): [rs]
                    }
                    self.total_file_external += 1

        CommonUtilities.writeDictToJson(self.dictionary_ext_path,
                                        SOURCES_BASE_EXT_DICT)
Beispiel #12
0
 def __AggregateFileSameSourceAndSpect(self):
     
     progressCount = 0
     print("")
     CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status="Loading ..")
     for objectSpect, objectSources in self.lk_path_dict.items():
         progressCount +=1 
         CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status=f"Agg: {objectSpect}")
         self.lk_1_path_dict[objectSpect] = {}
         
         for sources, filespath in objectSources.items():
             self.__findOrCreateDir(sources)
             
             self.lk_1_path_dict[objectSpect][sources] = f"{sources}/{self.newFileNameID}"
             new_merged_file = self.__AggregateFiles(filespath)
             CommonUtilities.writeDictToJson(new_merged_file, f"{self.dst_dir_name}/{sources}/{self.newFileNameID}.json")
             self.newFileNameID += 1
    def __AggregateFileSameSourceAndSpect(self):

        for objectSpect, objectSources in self.lk_path_dict.items():
            self.lk_1_path_dict[objectSpect] = {}

            for sources, filespath in objectSources.items():
                self.__findOrCreateDir(sources)

                self.lk_1_path_dict[objectSpect][
                    sources] = f"{sources}/{self.newFileNameID}"
                new_merged_file = self.__AggregateFiles(filespath)
                dym_dict = FirstIterationDictionary(new_merged_file,
                                                    self.newFileNameID)
                dym_dict.Load()
                CommonUtilities.writeDictToJson(
                    new_merged_file,
                    f"{self.dst_dir_name}/{sources}/{self.newFileNameID}.json")
                self.newFileNameID += 1
    def cleanDataSet(self):

        print(f"[{type(self).__name__}]Running ---> cleanDataSet")

        self.exc_start_time = datetime.datetime.now()

        for object_spect, object_val in self.cm_path_dict.items():
            for source_name, source_files in object_val.items():
                self.__findOrCreateDir(source_name)
                for file_path in source_files:
                    self.__cleanSingleFile(source_name, file_path)

        CommonUtilities.writeDictToJson(self.discarded_info_pool,
                                        f"{DROPPED_ATTRIBUTES_FILES}")
        CommonUtilities.writeDictToJson(self.composite_value_pool,
                                        f"{COMPOSITE_ATTRIBUTES_FILES}")

        self.exc_end_time = datetime.datetime.now()
        self.__printStats()
Beispiel #15
0
    def __makeCollisionInvDictionary(self):
        print(
            f"[{type(self).__name__}]Running ---> __makeCollisionInvDictionary"
        )

        self.exc_start_time = datetime.datetime.now()

        for keyAttribute, valueAttributeList in self.__collision_dict.items():
            for valueAttributeCount, valueAttribute in valueAttributeList[
                    "value_list"]:
                if not valueAttribute in self.collision_inv_dict.keys():
                    self.collision_inv_dict[valueAttribute] = {
                        'attribute_list': []
                    }
                self.collision_inv_dict[valueAttribute][
                    'attribute_list'].append(
                        (valueAttributeCount, keyAttribute))
        CommonUtilities.writeDictToJson(self.collision_inv_dict, self.outFile)

        self.exc_end_time = datetime.datetime.now()
    def __AggregateFileSameSpect(self):

        progressCount = 0
        print("")
        CommonUtilities.progressBar(progressCount,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")
        for objectSpect, objectSources in self.lk_path_dict.items():
            progressCount += 1
            CommonUtilities.progressBar(progressCount,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {objectSpect}")

            pathToMerge = []
            for sources, filespath in objectSources.items():
                pathToMerge.append(filespath)

            self.lk_2_path_dict[objectSpect] = f"{self.newFileNameID}"
            new_merged_file = self.__AggregateFiles(pathToMerge)
            CommonUtilities.writeDictToJson(
                new_merged_file,
                f"{self.dst_dir_name}/{self.newFileNameID}.json")
            self.newFileNameID += 1
dictBigCluster = CommonUtilities.loadJsonFile(
    f"{PHASE_3_SOURCE_DIR}/big_cluster.json", ext="")

dictColl = {}
dictColl2 = {}
dictCollInv = {}

for key, values in dictBigCluster.items():
    dictColl[key] = {}

    for src, val, *out in values:
        if not val in dictColl[key].keys():
            dictColl[key][val] = 0
        dictColl[key][val] += 1
CommonUtilities.writeDictToJson(dictColl,
                                f"{PHASE_3_SOURCE_DIR}/big_clusterColl.json")

for key, values in dictBigCluster.items():
    dictColl2[key] = {}

    for src, val, *out in values:
        if not val in dictColl2[key].keys():
            dictColl2[key][val] = {}
        if len(out) > 0:
            oldAttrName = out[0]
        else:
            oldAttrName = key
        if not oldAttrName in dictColl2[key][val].keys():
            dictColl2[key][val][oldAttrName] = 0
        dictColl2[key][val][oldAttrName] += 1
CommonUtilities.writeDictToJson(dictColl2,
Beispiel #18
0
 def __Load(self):
     if not os.path.exists(self.__pathDictionary):
         CommonUtilities.writeDictToJson({}, self.__pathDictionary)
     self.__dyn_dictionary = CommonUtilities.loadJsonFile(self.__pathDictionary, ext="")
Beispiel #19
0
 def save(self):
     CommonUtilities.writeDictToJson(self.__dyn_dictionary, self.__pathDictionary)
Beispiel #20
0
import CommonUtilities
from Constats_App import *

dictBigCluster2 = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_cluster2.json", ext="")
clusterTaDict = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_clusterkey_5.json", ext="")


clusterTaDictinv = {}
bigCluster3 = {}


for key, values in clusterTaDict.items():
	for value in values:
		clusterTaDictinv[value] = key

CommonUtilities.writeDictToJson(clusterTaDictinv, f"{PHASE_3_SOURCE_DIR}/big_clusterkey_5_inv.json")

for key, values in dictBigCluster2.items():
	if len(values) > 0:
		rootKey = clusterTaDictinv[key]
		if not rootKey in bigCluster3.keys():
			bigCluster3[rootKey] = []

		for src, value, *oldAttrName in values:
			if len(oldAttrName) < 1:
				curr_item = (src, value, key)
			else:
				curr_item = (src, value, oldAttrName[0])
			bigCluster3[rootKey].append(curr_item)

CommonUtilities.writeDictToJson(bigCluster3, f"{PHASE_3_SOURCE_DIR}/big_cluster3.json")
bigcluster = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_cluster2")
keySimInv = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/testInv")

outputData = {}
outputData2 = {}
outputData3 = {}
outputData4 = {}
outputData5 = {}

##Passo 1 count gli elementi per ogni nomeAttributo
for key, values in bigcluster.items():
    if len(values) > 0:
        outputData[key] = len(values)

CommonUtilities.writeDictToJson(outputData,
                                f"{PHASE_3_SOURCE_DIR}/big_clusterkey.json")

####Passo 2 Conto gli elementi di ogni chiave suddividendoli secondo il nome attributo originale
for key, values in bigcluster.items():
    if len(values) > 0:
        outputData2[key] = {}
        for src, val, *oldAttrname in values:
            if len(oldAttrname) > 0:
                if not oldAttrname[0] in outputData2[key].keys():
                    outputData2[key][oldAttrname[0]] = 0
                outputData2[key][oldAttrname[0]] += 1
            else:
                if not key in outputData2[key].keys():
                    outputData2[key][key] = 0
                outputData2[key][key] += 1
Beispiel #22
0
import CommonUtilities
from Constats_App import *


dictSim = CommonUtilities.loadJsonFile(f"{COLLISION_DICTIONARY_SIM_DICT}", ext="")

dictSimInv = {}

for key in dictSim:

	dictSimInv[key] = {}
	for key2 in dictSim:
		keysList = dictSim[key2]['attr_sim_list']
		if key in keysList:
			for x in range(0, len(keysList)):
				if dictSim[key2]['attr_sim_list'][x] == key:
					dictSimInv[key][key2] = dictSim[key2]['attr_sim_score'][x]

CommonUtilities.writeDictToJson(dictSimInv, f"{PHASE_3_SOURCE_DIR}/testInv.json")