def makeJsonPathFile_LK(self):

        print(f"[{type(self).__name__}]Running ---> makeJsonPathFile_LK")

        wb = xlrd.open_workbook(self.lkFileName)
        sheet = wb.sheet_by_index(0)

        for x in range(1, sheet.nrows):
            CommonUtilities.progressBar(x, sheet.nrows)
            row = CommonUtilities.splitOnComma(sheet.cell_value(x, 0))
            left_r, righ_r = self.__getLeftRightElment(row)
            source_lr = CommonUtilities.getSourceFromPath(left_r)
            source_rr = CommonUtilities.getSourceFromPath(righ_r)

            if not left_r in self.dictionary_lk_path.keys(
            ) and not left_r in self.resources_founded:
                self.dictionary_lk_path[left_r] = {source_lr: [left_r]}
                self.resources_founded.append(left_r)
                self.total_object_linked += 1
                self.total_file_linkage += 1

            if not righ_r in self.resources_founded:
                if not source_rr in self.dictionary_lk_path[left_r].keys():
                    self.dictionary_lk_path[left_r][source_rr] = []
                self.dictionary_lk_path[left_r][source_rr].append(righ_r)
                self.resources_founded.append(righ_r)
                self.total_file_linkage += 1

        CommonUtilities.writeDictToJson(self.dictionary_lk_path,
                                        SOURCES_BASE_LK_DICT)
    def __AggregateAttributesFile(self):

        progressCount = 0
        print("")
        CommonUtilities.progressBar(progressCount,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")
        for objectSpect, filepath in self.lk_path_dict.items():
            progressCount += 1
            CommonUtilities.progressBar(progressCount,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {objectSpect}")

            self.__current_json_data = CommonUtilities.loadJsonFile(
                f"{self.src_dir_name}/{filepath}")
            #print(f"{self.src_dir_name}/{filepath}")
            ###Appena viene caricato il file per il l' aggregazione creo i dizionari dinamici
            self.dym_dict_local = SecondIterationDictionary(
                self.__current_json_data)
            self.dym_dict_local.Load()

            ##Istanzio nella class i dinionari
            self.coll_sim_din = self.dym_dict_local.dyn_col_sim
            self.coll_inv_din = self.dym_dict_local.col_inv

            self.__AggregateAttributes()

            CommonUtilities.writeDictToJson(
                self.__current_json_data,
                f"{self.dst_dir_name}/{filepath}.json")
        self.__dynDict.save()
    def __AggregateFileS(self):

        self.progress_bar_count = 0
        print("")
        CommonUtilities.progressBar(self.progress_bar_count,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")

        new_merged_file = self.__AggregateFiles(self.lk_path_dict.values())
        CommonUtilities.writeDictToJson(
            new_merged_file, f"{self.dst_dir_name}/big_cluster.json")
    def __AggregateFiles(self, filesList):

        mergedData = {}

        for filepath in filesList:
            jsdata = CommonUtilities.loadJsonFile(
                f"{self.src_dir_name}/{filepath}")
            for attrName, attrValue in jsdata.items():
                if not attrName in mergedData.keys():
                    mergedData[attrName] = []
                mergedData[attrName] += attrValue
            self.progress_bar_count += 1
            CommonUtilities.progressBar(self.progress_bar_count,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {filepath}")
        return mergedData
예제 #5
0
 def __AggregateFileSameSourceAndSpect(self):
     
     progressCount = 0
     print("")
     CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status="Loading ..")
     for objectSpect, objectSources in self.lk_path_dict.items():
         progressCount +=1 
         CommonUtilities.progressBar(progressCount, len(self.lk_path_dict.keys()), status=f"Agg: {objectSpect}")
         self.lk_1_path_dict[objectSpect] = {}
         
         for sources, filespath in objectSources.items():
             self.__findOrCreateDir(sources)
             
             self.lk_1_path_dict[objectSpect][sources] = f"{sources}/{self.newFileNameID}"
             new_merged_file = self.__AggregateFiles(filespath)
             CommonUtilities.writeDictToJson(new_merged_file, f"{self.dst_dir_name}/{sources}/{self.newFileNameID}.json")
             self.newFileNameID += 1
    def __AggregateFileSameSpect(self):

        progressCount = 0
        print("")
        CommonUtilities.progressBar(progressCount,
                                    len(self.lk_path_dict.keys()),
                                    status="Loading ..")
        for objectSpect, objectSources in self.lk_path_dict.items():
            progressCount += 1
            CommonUtilities.progressBar(progressCount,
                                        len(self.lk_path_dict.keys()),
                                        status=f"Agg: {objectSpect}")

            pathToMerge = []
            for sources, filespath in objectSources.items():
                pathToMerge.append(filespath)

            self.lk_2_path_dict[objectSpect] = f"{self.newFileNameID}"
            new_merged_file = self.__AggregateFiles(pathToMerge)
            CommonUtilities.writeDictToJson(
                new_merged_file,
                f"{self.dst_dir_name}/{self.newFileNameID}.json")
            self.newFileNameID += 1
import pandas as pd
from tqdm import tqdm
import json
from collections import defaultdict
import itertools
import CommonUtilities

# Leggo la ground truth in un Pandas dataframe
pos_df = pd.read_csv('sources_3/gt_onevalue.csv')

attrNameSet = []

# ordino lessicograficamente la GT
for index, row in pos_df.iterrows():

    left_ia = row['left_instance_attribute']
    right_ia = row['right_instance_attribute']

    attrNameSet.append(left_ia)
    attrNameSet.append(right_ia)

    CommonUtilities.progressBar(index + 1, pos_df.shape[0],
                                f"{index+1}/{pos_df.shape[0]}")

attrNameSet = set(attrNameSet)
with open("attrToAnalizeFull.txt", "w") as attrFile:
    for attr in sorted(list(attrNameSet)):
        attrFile.write(f"{attr}\n")
import CommonUtilities

with open("attrToAnalizeFull.txt", "r") as AttrNamesF:
    gtAttrNames = AttrNamesF.readlines()

gtAttrNames = set([line[:-1] for line in gtAttrNames])

with open(f"sources_3/big_cluster3_refactor.json", 'r') as f:
    distros_dict = json.load(f)
    newLine = ''
    arrResult = ["left_instance_attribute,right_instance_attribute\n"]
    for key, value in distros_dict.items():
        ###Faccio il prodotto cartesiano e ordino le coppie
        print(f"Working On {key}")
        current_list = set(value)
        slist = sorted(list(current_list.intersection(gtAttrNames)))
        elementCount = len(slist)
        print("List sorted")
        for x in range(0, elementCount):

            currente = slist[x]
            ll = [currente + "," + s + "\n" for s in slist[x + 1:]]
            arrResult += ll
            CommonUtilities.progressBar(x + 1, elementCount,
                                        f"{x}/{elementCount}")
        print(f"Start Writing: {len(arrResult)} lines of {key}")
        with open(f"sources_3/custom_ground.csv", "a+") as gF:
            gF.writelines(arrResult)
        arrResult = []
        print(f"File Saved")