def loadJsonPathFiles(self):

        self.dictionary_lk_path = CommonUtilities.loadJsonFile(
            SOURCES_BASE_LK_DICT, ext="")
        self.dictionary_ext_path = CommonUtilities.loadJsonFile(
            SOURCES_BASE_EXT_DICT, ext="")
        self.dictionary_cm_path = CommonUtilities.loadJsonFile(
            SOURCES_BASE_CM_DICT, ext="")

        print(f"[{type(self).__name__}]Completed ---> loadJsonPathFiles")
    def __AggregateAttributesFile(self):

        progressCount = 0
        print("")
        CommonUtilities.progressBar(progressCount,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")
        for objectSpect, filepath in self.lk_path_dict.items():
            progressCount += 1
            CommonUtilities.progressBar(progressCount,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {objectSpect}")

            self.__current_json_data = CommonUtilities.loadJsonFile(
                f"{self.src_dir_name}/{filepath}")
            #print(f"{self.src_dir_name}/{filepath}")
            ###Appena viene caricato il file per il l' aggregazione creo i dizionari dinamici
            self.dym_dict_local = SecondIterationDictionary(
                self.__current_json_data)
            self.dym_dict_local.Load()

            ##Istanzio nella class i dinionari
            self.coll_sim_din = self.dym_dict_local.dyn_col_sim
            self.coll_inv_din = self.dym_dict_local.col_inv

            self.__AggregateAttributes()

            CommonUtilities.writeDictToJson(
                self.__current_json_data,
                f"{self.dst_dir_name}/{filepath}.json")
        self.__dynDict.save()
    def __cleanSingleFile(self, source_name, file_path):

        #print(f"[{type(self).__name__}]Running ---> __cleanSingleFile: {file_path}")

        jsnData = CommonUtilities.loadJsonFile(
            f"{self.src_dir_name}/{file_path}")
        dt_cleaner = DataCleaner(jsnData, self.gtAttrNames)
        dt_cleaner.cleanKeys()
        dt_cleaner.cleanValues()

        jsnDataCl = dt_cleaner.getSignificantData()

        empty_keys_d, empty_value_d, composite_value_d = dt_cleaner.getEmptyDataKeys(
        )

        if len(empty_keys_d.keys()) + len(empty_value_d.keys()) > 0:
            self.discarded_info_pool[file_path] = {
                "key_empty": empty_keys_d,
                "value_empty": empty_value_d
            }

        if len(composite_value_d.keys()) > 0:
            self.composite_value_pool[file_path] = composite_value_d

        CommonUtilities.writeDictToJson(
            jsnDataCl, f"{self.dst_dir_name}/{file_path}.json")
Beispiel #4
0
    def __AggregateFiles(self, filesList):
        
        mergedData = {}

        for filepath in filesList:
            jsdata = CommonUtilities.loadJsonFile(f"{self.src_dir_name}/{filepath}")
            for attrName, attrValue in jsdata.items():
                if not attrName in mergedData.keys():
                    mergedData[attrName] = []
                mergedData[attrName].append((filepath, attrValue))
        return mergedData
    def __AggregateAttributesFile(self):

        for objectSpect, objectSources in self.lk_path_dict.items():
            for sources, filepath in objectSources.items():

                self.__findOrCreateDir(sources)

                self.__current_json_data = CommonUtilities.loadJsonFile(
                    f"{self.src_dir_name}/{filepath}")
                self.__AggregateAttributes()

                CommonUtilities.writeDictToJson(
                    self.__current_json_data,
                    f"{self.dst_dir_name}/{filepath}.json")
    def __makeCollisionDictionary(self):

        print(f"[{type(self).__name__}]Running ---> makeCollisionDictionary")

        self.exc_start_time = datetime.datetime.now()

        for objectSpect, objectValues in self.filePathData.items():
            for fileSource, filesList in objectValues.items():
                for filepath in filesList:
                    curr_jsonData = CommonUtilities.loadJsonFile(
                        f"{self.srcDir}/{filepath}")
                    self.__mergeFileWithCollisionDict(curr_jsonData)
        CommonUtilities.writeDictToJson(self.collision_dict, self.outFile)

        self.exc_end_time = datetime.datetime.now()
    def __AggregateFiles(self, filesList):

        mergedData = {}

        for filepath in filesList:
            jsdata = CommonUtilities.loadJsonFile(
                f"{self.src_dir_name}/{filepath}")
            for attrName, attrValue in jsdata.items():
                if not attrName in mergedData.keys():
                    mergedData[attrName] = []
                mergedData[attrName] += attrValue
            self.progress_bar_count += 1
            CommonUtilities.progressBar(self.progress_bar_count,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {filepath}")
        return mergedData
import CommonUtilities
from Constats_App import *

dictBigCluster = CommonUtilities.loadJsonFile(
    f"{PHASE_3_SOURCE_DIR}/big_cluster.json", ext="")

dictColl = {}
dictColl2 = {}
dictCollInv = {}

for key, values in dictBigCluster.items():
    dictColl[key] = {}

    for src, val, *out in values:
        if not val in dictColl[key].keys():
            dictColl[key][val] = 0
        dictColl[key][val] += 1
CommonUtilities.writeDictToJson(dictColl,
                                f"{PHASE_3_SOURCE_DIR}/big_clusterColl.json")

for key, values in dictBigCluster.items():
    dictColl2[key] = {}

    for src, val, *out in values:
        if not val in dictColl2[key].keys():
            dictColl2[key][val] = {}
        if len(out) > 0:
            oldAttrName = out[0]
        else:
            oldAttrName = key
        if not oldAttrName in dictColl2[key][val].keys():
 def LoadPath(self):
     self.lk_2_path_dict = CommonUtilities.loadJsonFile(
         self.dst_lk_path_dict, ext='')
 def __loadCollisionDictionary(self):
     self.collision_dict = CommonUtilities.loadJsonFile(self.outFile,
                                                        ext="")
     print(f"{type(self).__name__} CollisionDictionary Loaded!")
Beispiel #11
0
from Constats_App import *
import CommonUtilities

dimdict_1 = CommonUtilities.loadJsonFile(COLLISION_DICTIONARY_SIM_DYN_DICT_01,
                                         ext="")
dimdict_2 = CommonUtilities.loadJsonFile(COLLISION_DICTIONARY_SIM_DYN_DICT_02,
                                         ext="")

set_1 = set(dimdict_1.keys())
set_2 = set(dimdict_2.keys())

print(set_2.symmetric_difference(set_1))
print(len(set_1), len(set_2), len(set_2.intersection(set_1)),
      len(set_2.symmetric_difference(set_1)))
Beispiel #12
0
from Constats_App import *
import CommonUtilities

ordered_value = []

collisionInvData = CommonUtilities.loadJsonFile(COLLISION_DICTIONARY_INV_DICT,
                                                ext="")
for key, val in collisionInvData.items():

    ordered_value.append((key, len(val['attribute_list'])))

ordered_value.sort(key=lambda x: x[1], reverse=True)

for x in range(0, 50):
    print(ordered_value[x])
Beispiel #13
0
import CommonUtilities
from Constats_App import *

dictBigCluster2 = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_cluster2.json", ext="")
clusterTaDict = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_clusterkey_5.json", ext="")


clusterTaDictinv = {}
bigCluster3 = {}


for key, values in clusterTaDict.items():
	for value in values:
		clusterTaDictinv[value] = key

CommonUtilities.writeDictToJson(clusterTaDictinv, f"{PHASE_3_SOURCE_DIR}/big_clusterkey_5_inv.json")

for key, values in dictBigCluster2.items():
	if len(values) > 0:
		rootKey = clusterTaDictinv[key]
		if not rootKey in bigCluster3.keys():
			bigCluster3[rootKey] = []

		for src, value, *oldAttrName in values:
			if len(oldAttrName) < 1:
				curr_item = (src, value, key)
			else:
				curr_item = (src, value, oldAttrName[0])
			bigCluster3[rootKey].append(curr_item)

CommonUtilities.writeDictToJson(bigCluster3, f"{PHASE_3_SOURCE_DIR}/big_cluster3.json")
Beispiel #14
0
 def __loadPathDataFromFile(self):
     self.filePathData = CommonUtilities.loadJsonFile(SOURCES_BASE_CM_DICT,
                                                      ext="")
Beispiel #15
0
 def __Load(self):
     if not os.path.exists(self.__pathDictionary):
         CommonUtilities.writeDictToJson({}, self.__pathDictionary)
     self.__dyn_dictionary = CommonUtilities.loadJsonFile(self.__pathDictionary, ext="")
import CommonUtilities
from Constats_App import *
from fuzzywuzzy import fuzz

bigcluster = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/big_cluster2")
keySimInv = CommonUtilities.loadJsonFile(f"{PHASE_3_SOURCE_DIR}/testInv")

outputData = {}
outputData2 = {}
outputData3 = {}
outputData4 = {}
outputData5 = {}

##Passo 1 count gli elementi per ogni nomeAttributo
for key, values in bigcluster.items():
    if len(values) > 0:
        outputData[key] = len(values)

CommonUtilities.writeDictToJson(outputData,
                                f"{PHASE_3_SOURCE_DIR}/big_clusterkey.json")

####Passo 2 Conto gli elementi di ogni chiave suddividendoli secondo il nome attributo originale
for key, values in bigcluster.items():
    if len(values) > 0:
        outputData2[key] = {}
        for src, val, *oldAttrname in values:
            if len(oldAttrname) > 0:
                if not oldAttrname[0] in outputData2[key].keys():
                    outputData2[key][oldAttrname[0]] = 0
                outputData2[key][oldAttrname[0]] += 1
            else:
Beispiel #17
0
import CommonUtilities
from Constats_App import *


dictSim = CommonUtilities.loadJsonFile(f"{COLLISION_DICTIONARY_SIM_DICT}", ext="")

dictSimInv = {}

for key in dictSim:

	dictSimInv[key] = {}
	for key2 in dictSim:
		keysList = dictSim[key2]['attr_sim_list']
		if key in keysList:
			for x in range(0, len(keysList)):
				if dictSim[key2]['attr_sim_list'][x] == key:
					dictSimInv[key][key2] = dictSim[key2]['attr_sim_score'][x]

CommonUtilities.writeDictToJson(dictSimInv, f"{PHASE_3_SOURCE_DIR}/testInv.json")