コード例 #1
0
def SeeDCS(dcsObj):
    print('DCS ANALYZE')
    print('-' * 15)
    print(dcsObj.sentence)
    print(dcsObj.lemmas)
    print("Lemmas:", [rom_slp(c) for arr in dcsObj.lemmas for c in arr])
    print(dcsObj.cng)
コード例 #2
0
mat_file.close()
print('Loaded Old Matrix')
print('-' * 15)

updated_matrix = {}
# add cgs to the existing entries i.e {'lemma~cng': {'12_sg': [filenames]]}}
ith_tup = 1
all_tups = len(list(matrix.keys()))
for tup in list(matrix.keys()):

    # To track progress
    print("%d/%d" % (ith_tup, all_tups))
    ith_tup += 1

    lemmacng = tup.split('_')
    new_tup = ("%s_%s" % (rom_slp(lemmacng[0]), lemmacng[1]))
    updated_matrix[new_tup] = matrix[tup]
    current_cngs = list(matrix[tup].keys())
    for group in list(new_groups.keys()):
        updated_matrix[new_tup][group] = []
        for cng in current_cngs:
            cng = str(cng)
            if int(cng) in new_groups[group]:
                updated_matrix[new_tup][group] += matrix[tup][cng]
        if len(updated_matrix[new_tup][group]) == 0:
            del updated_matrix[new_tup][group]

# add cg as keys to the updated matrix i.e. {'lemma~cg': {'12_sg': [filenames]}}
# ith_tup = 1
# all_tups = len(list(matrix.keys()))
# for tup in list(matrix.keys()):
コード例 #3
0
total = len(dcsList)
u = 0
for dcsFile in dcsList:
    print("%d/%d" % (u + 1, total))
    u += 1
    try:
        dcsObj = pickleFixLoad(dcsPath + dcsFile)
    except pickle.UnpicklingError as e:
        print("Okay some thing wrong with the file %s" % dcsFile)
    except EOFError:
        print("Okay some thing wrong with the file %s" % dcsFile)

    # get lemmas
    lemma_list = []
    for arr in dcsObj.lemmas:
        lemma_list += [rom_slp(l) for l in arr]
    lemmaCount = Counter(lemma_list)

    # get cngs
    cng_list = []
    for arr in dcsObj.cng:
        cng_list += [c for c in arr]
    cngCount = Counter(cng_list)

    # add tup to the matrix
    for lemma in list(lemmaCount.keys()):
        for cng in list(cngCount.keys()):
            tup = lemma + '_' + cng
            if tup not in list(oneD.keys()):
                oneD[tup] = 0
            oneD[tup] += lemmaCount[lemma]
コード例 #4
0
matrix = json.load(mat_file)
mat_file.close()
print('Loaded Old Matrix')
print('-' * 15)

# make a copy so or we can't loop through the matrix
updated_matrix = matrix.copy()
print(list(matrix.keys()))
print(new_groups)
for group in list(new_groups.keys()):
    if group not in list(updated_matrix.keys()):
        updated_matrix[group] = {}
    for cng in new_groups[group]:
        if str(cng) not in list(matrix.keys()):
            continue
        print(cng)
        for lemma in matrix[str(cng)]:
            print(rom_slp(lemma), 'THe RoMANov of', lemma)
            if lemma not in list(updated_matrix[group].keys()):
                updated_matrix[group][lemma] = []
                print("created lemma", lemma, "for cng", group)
            updated_matrix[group][lemma] += matrix[str(cng)][lemma]
    if not updated_matrix[group]:
        del updated_matrix[group]

for _ in updated_matrix:
    print(_, updated_matrix[_], '\n\n\n')

with open('cng2lemma_new.json', 'w') as cng2lemma:
    json.dump(updated_matrix, fp=cng2lemma)
コード例 #5
0
print("Loading DCS files")
dcsList = os.listdir(dcsPath)
# print(dcsList[:2])

oneD = {}

for dcsFile in dcsList:
    try:
        dcsObj = pickleFixLoad(dcsPath + dcsFile)
    except pickle.UnpicklingError as e:
        print("Okay some thing wrong with the file %s" % dcsFile)
    except EOFError:
        print("Okay some thing wrong with the file %s" % dcsFile)
    cng_list = []
    for arr in dcsObj.cng:
        cng_list += [rom_slp(c) for c in arr]
    cngCount = Counter(cng_list)
    # print(cngCount, len(cngCount))
    for cng in list(cngCount.keys()):

        # add cg counts
        CgsforCng = CgsForCng(int(cng))
        if len(CgsforCng) > 0:
            for cg in CgsforCng:
                if cg not in list(oneD.keys()):
                    oneD[cg] = 0
                oneD[cg] += cngCount[cng]

        # add cng counts
        if cng not in oneD:
            oneD[cng] = 0