Ejemplo n.º 1
0
def check_wrd(index, length, word, f_wrd):
    lst = []
    lst.append(f_wrd)
    for i in range(1, length):
        k = return_key(str(index + i), h_wrd_dict)
        lst.append(k)
    w = ' '.join(lst)
    if w == word:
        return index
def return_id(word):
    lst = word.split()
    length = len(lst)
    if lst[0] in h_wrd_dict.values():
        w = []
        out = return_key(lst[0], h_wrd_dict)
        if out != None:
            w.append(lst[0])
            for i in range(1,length):
                w.append(h_wrd_dict[out+i])
        wrd = ' '.join(w)
        if wrd == word:
            return(out)
Ejemplo n.º 3
0
def check_for_kriyA_mUla(start_id, length, anu_vb_word):
    k_m_wrd = []
    ids = []
    for i in range(length):
        if start_id + i not in v_rt_dic.keys():
            val = return_key(str(start_id + i), h_wrd_dict)
            if val != None and val != 'nahIM':
                k_m_wrd.append(val)  #bAwa
                ids.append(str(start_id + i))
            elif val == 'nahIM':
                v = return_key(str(start_id + i + 1), h_wrd_dict)
                k_m_wrd.append(v)  #Bexa_nahIM_kara
                ids.append(str(start_id + i + 1))

        else:
            if v_rt_dic[start_id + i] != 'nahIM':
                k_m_wrd.append(v_rt_dic[start_id + i])  #kara
                ids.append(str(start_id + i))
            else:
                k_m_wrd.append(v_rt_dic[start_id + i + 1])
                ids.append(str(start_id + i + 1))

    if anu_vb_word == '_'.join(k_m_wrd):
        return ' '.join(ids)
Ejemplo n.º 4
0
     mngs = kriyA_mula_dic[eng_rt].split('/')
 for each in mngs:
     wrd = each.split('_')
     if wrd[0] in h_wrd_dict.keys():
         if '/' not in h_wrd_dict[wrd[0]]:
             out = check_for_kriyA_mUla(int(h_wrd_dict[wrd[0]]),
                                        len(wrd), each)
             if out != None:
                 #                       print(each, h_wrd_dict[wrd[0]])
                 print('(anu_id-manu_verb_root-ids', key, each, out,
                       ')')
             else:
                 if eng_rt in verb_dic.keys():
                     mngs = verb_dic[eng_rt].split('/')
                     for each in mngs:
                         v_id = return_key(each, v_rt_dic)
                         if v_id != None:
                             print('(anu_id-manu_verb_root-ids', key,
                                   each, v_id, ')')  #ai1E, 2.50, pA
         else:
             #print('&&', wrd[0], h_wrd_dict[wrd[0]], each)
             l = h_wrd_dict[wrd[0]].split('/')
             for item in l:
                 out = check_for_kriyA_mUla(int(item), len(wrd), each)
                 if out != None:
                     #                       print(each, h_wrd_dict[wrd[0]])
                     print('(anu_id-manu_verb_root-ids', key, each, out,
                           ')')
                 else:
                     if eng_rt in verb_dic.keys():
                         mngs = verb_dic[eng_rt].split('/')
Ejemplo n.º 5
0
#################################
#Creating domain dic:
for line in open(sys.argv[1]):
    if(line[0]!= '#'):
        lst = line.strip().split('\t')
        wrd = lst[0].split('_')
        add_data_in_dic(domain_dic, wrd[0], lst[1])

#for key in sorted(domain_dic):
#    print(str(key) + '\t' + domain_dic[key])
        
#Checking collected capital words in domain dic:
for key in sorted(domain_dic):
    if(key in cap_dic.values()):
        key_id = return_key(key, cap_dic)
   #    if(key_id not in cap_dom_dic.keys()):  #If neccessary uncomment this loop
        cap_dom_dic[key_id] = domain_dic[key]
   #    print(str(key_id) + '\t' + domain_dic[key])


#################################
#Creating default dic:
for line in open(sys.argv[2]):
    if(line[0]!= '#' and line[0] != '\t'):
        lst = line.strip().split('\t')
        add_data_in_dic(default_dic, lst[0], lst[1])

#Checking collected capital words in domain dic:
for key in sorted(default_dic):
    if(key in cap_dic.values()):
            lst = line[:-2].split()
            man_rt_dic[lst[1]] = '+'.join(lst[2:])
        if 'verb_root-tam-gnp-v_id' in line:
            lst = line[:-2].split()
            key = '+'.join(lst[4:])
            gnp_dic[key] = lst[3]

#############################################
#Aligning with tam if anu root and manual root are different:
for key in sorted(anu_tam_dic):
    #    print(anu_tam_dic[key], man_tam_dic.values())
    if anu_tam_dic[key] in man_tam_dic.keys():
        k = anu_tam_dic[key]
        val = man_tam_dic[k]
        print(val, man_rt_dic)
        man_rt = return_key('+'.join(val), man_rt_dic)
        if man_rt == None:
            man_rt = return_key(val[0], man_rt_dic)
        if anu_rt_dic[key] != man_rt:
            k_tam_dic[key] = ' '.join(val)
            k_v_t_gnp_dic[key] = man_rt + '+' + k + gnp_dic[val[0]]
#            print(key, man_rt, man_rt_dic, k)

#############################################
fw = open("word.dat", "r").readlines()
word_len = len(fw) - 1

for i in range(len(fw)):
    list_K_tam.append('-')
    list_K_tam_v.append('-')
Ejemplo n.º 7
0
                    print('(anu_id-manu_verb_root-ids', key, each, out, ')')

#Get kriyA_mUla info:
f3 = open(sys.argv[4], 'r').readlines()
kriyA_mUla_lst = []

for each in f3:
    lst = each.strip().split('\t')
    if lst[0] not in kriyA_mUla_lst:
        kriyA_mUla_lst.append(lst[0])

#print(kriyA_mUla_lst, len(kriyA_mUla_lst))

for key in sorted(v_rt_dic):
    k = key - 1
    wrd = return_key(str(k), h_wrd_dict)
    k_m_w = wrd + '_' + v_rt_dic[key]
    #    print(wrd, k_m_w)
    if k_m_w in kriyA_mUla_lst:
        #        print('&&', k , k_m_w)
        del v_rt_dic[key]
        if k_m_w not in v_rt_dic:
            v_rt_dic[k_m_w] = str(k) + ' ' + str(key)
        else:
            v_rt_dic[k_m_w] = v_rt_dic[k_m_w] + '/' + str(k) + ' ' + str(key)
        new_key = str(k) + ' ' + str(key)
        tam_dic[new_key] = tam_dic[key]
        del tam_dic[key]
    else:
        val = v_rt_dic[key]
        del v_rt_dic[key]
Ejemplo n.º 8
0
        #print expr
        expr_lst = expr.split()
        h_ids = []
        for i in range(0, len(expr_lst)):
            item = expr_lst[i]
            if item.split(':')[1] in h_wrd_dic.keys():
                val = item.split(':')[1]
                h_ids.append(
                    h_wrd_dic[val]
                )  # Ex: item.split(':')[1] = 'parimiwa' , h_wrd_dic['item.split(':')[1]] = 4 (Ex: 2.98, ai2E)
                k_mwe_dic[ids[i]] = h_wrd_dic[val]
                #print h_wrd_dic[val]
            else:
                last_id = h_ids[-1]
                cur_id = int(last_id) + 1
                o = return_key(str(cur_id), h_wrd_dic)
                if o != None:
                    #print item.split(':')[1] + '<>' + o
                    k_mwe_dic[ids[i]] = '<< ' + str(cur_id) + ' >>'

#################################
for key in sorted(k_mwe_dic):
    print(str(key) + '\t' + k_mwe_dic[key])

for i in range(len(fw) - 1):
    list_K_exact_word_align.append('-')

#Store data in list_K_alignment
for i in range(1, len(fw)):
    if str(i) in k_mwe_dic.keys():
        list_K_exact_word_align[i] = k_mwe_dic[str(i)]