def SeeDCS(dcsObj): print('DCS ANALYZE') print('-' * 15) print(dcsObj.sentence) print(dcsObj.lemmas) print("Lemmas:", [rom_slp(c) for arr in dcsObj.lemmas for c in arr]) print(dcsObj.cng)
mat_file.close() print('Loaded Old Matrix') print('-' * 15) updated_matrix = {} # add cgs to the existing entries i.e {'lemma~cng': {'12_sg': [filenames]]}} ith_tup = 1 all_tups = len(list(matrix.keys())) for tup in list(matrix.keys()): # To track progress print("%d/%d" % (ith_tup, all_tups)) ith_tup += 1 lemmacng = tup.split('_') new_tup = ("%s_%s" % (rom_slp(lemmacng[0]), lemmacng[1])) updated_matrix[new_tup] = matrix[tup] current_cngs = list(matrix[tup].keys()) for group in list(new_groups.keys()): updated_matrix[new_tup][group] = [] for cng in current_cngs: cng = str(cng) if int(cng) in new_groups[group]: updated_matrix[new_tup][group] += matrix[tup][cng] if len(updated_matrix[new_tup][group]) == 0: del updated_matrix[new_tup][group] # add cg as keys to the updated matrix i.e. {'lemma~cg': {'12_sg': [filenames]}} # ith_tup = 1 # all_tups = len(list(matrix.keys())) # for tup in list(matrix.keys()):
total = len(dcsList) u = 0 for dcsFile in dcsList: print("%d/%d" % (u + 1, total)) u += 1 try: dcsObj = pickleFixLoad(dcsPath + dcsFile) except pickle.UnpicklingError as e: print("Okay some thing wrong with the file %s" % dcsFile) except EOFError: print("Okay some thing wrong with the file %s" % dcsFile) # get lemmas lemma_list = [] for arr in dcsObj.lemmas: lemma_list += [rom_slp(l) for l in arr] lemmaCount = Counter(lemma_list) # get cngs cng_list = [] for arr in dcsObj.cng: cng_list += [c for c in arr] cngCount = Counter(cng_list) # add tup to the matrix for lemma in list(lemmaCount.keys()): for cng in list(cngCount.keys()): tup = lemma + '_' + cng if tup not in list(oneD.keys()): oneD[tup] = 0 oneD[tup] += lemmaCount[lemma]
matrix = json.load(mat_file) mat_file.close() print('Loaded Old Matrix') print('-' * 15) # make a copy so or we can't loop through the matrix updated_matrix = matrix.copy() print(list(matrix.keys())) print(new_groups) for group in list(new_groups.keys()): if group not in list(updated_matrix.keys()): updated_matrix[group] = {} for cng in new_groups[group]: if str(cng) not in list(matrix.keys()): continue print(cng) for lemma in matrix[str(cng)]: print(rom_slp(lemma), 'THe RoMANov of', lemma) if lemma not in list(updated_matrix[group].keys()): updated_matrix[group][lemma] = [] print("created lemma", lemma, "for cng", group) updated_matrix[group][lemma] += matrix[str(cng)][lemma] if not updated_matrix[group]: del updated_matrix[group] for _ in updated_matrix: print(_, updated_matrix[_], '\n\n\n') with open('cng2lemma_new.json', 'w') as cng2lemma: json.dump(updated_matrix, fp=cng2lemma)
print("Loading DCS files") dcsList = os.listdir(dcsPath) # print(dcsList[:2]) oneD = {} for dcsFile in dcsList: try: dcsObj = pickleFixLoad(dcsPath + dcsFile) except pickle.UnpicklingError as e: print("Okay some thing wrong with the file %s" % dcsFile) except EOFError: print("Okay some thing wrong with the file %s" % dcsFile) cng_list = [] for arr in dcsObj.cng: cng_list += [rom_slp(c) for c in arr] cngCount = Counter(cng_list) # print(cngCount, len(cngCount)) for cng in list(cngCount.keys()): # add cg counts CgsforCng = CgsForCng(int(cng)) if len(CgsforCng) > 0: for cg in CgsforCng: if cg not in list(oneD.keys()): oneD[cg] = 0 oneD[cg] += cngCount[cng] # add cng counts if cng not in oneD: oneD[cng] = 0