def check_wrd(index, length, word, f_wrd): lst = [] lst.append(f_wrd) for i in range(1, length): k = return_key(str(index + i), h_wrd_dict) lst.append(k) w = ' '.join(lst) if w == word: return index
def return_id(word): lst = word.split() length = len(lst) if lst[0] in h_wrd_dict.values(): w = [] out = return_key(lst[0], h_wrd_dict) if out != None: w.append(lst[0]) for i in range(1,length): w.append(h_wrd_dict[out+i]) wrd = ' '.join(w) if wrd == word: return(out)
def check_for_kriyA_mUla(start_id, length, anu_vb_word): k_m_wrd = [] ids = [] for i in range(length): if start_id + i not in v_rt_dic.keys(): val = return_key(str(start_id + i), h_wrd_dict) if val != None and val != 'nahIM': k_m_wrd.append(val) #bAwa ids.append(str(start_id + i)) elif val == 'nahIM': v = return_key(str(start_id + i + 1), h_wrd_dict) k_m_wrd.append(v) #Bexa_nahIM_kara ids.append(str(start_id + i + 1)) else: if v_rt_dic[start_id + i] != 'nahIM': k_m_wrd.append(v_rt_dic[start_id + i]) #kara ids.append(str(start_id + i)) else: k_m_wrd.append(v_rt_dic[start_id + i + 1]) ids.append(str(start_id + i + 1)) if anu_vb_word == '_'.join(k_m_wrd): return ' '.join(ids)
mngs = kriyA_mula_dic[eng_rt].split('/') for each in mngs: wrd = each.split('_') if wrd[0] in h_wrd_dict.keys(): if '/' not in h_wrd_dict[wrd[0]]: out = check_for_kriyA_mUla(int(h_wrd_dict[wrd[0]]), len(wrd), each) if out != None: # print(each, h_wrd_dict[wrd[0]]) print('(anu_id-manu_verb_root-ids', key, each, out, ')') else: if eng_rt in verb_dic.keys(): mngs = verb_dic[eng_rt].split('/') for each in mngs: v_id = return_key(each, v_rt_dic) if v_id != None: print('(anu_id-manu_verb_root-ids', key, each, v_id, ')') #ai1E, 2.50, pA else: #print('&&', wrd[0], h_wrd_dict[wrd[0]], each) l = h_wrd_dict[wrd[0]].split('/') for item in l: out = check_for_kriyA_mUla(int(item), len(wrd), each) if out != None: # print(each, h_wrd_dict[wrd[0]]) print('(anu_id-manu_verb_root-ids', key, each, out, ')') else: if eng_rt in verb_dic.keys(): mngs = verb_dic[eng_rt].split('/')
################################# #Creating domain dic: for line in open(sys.argv[1]): if(line[0]!= '#'): lst = line.strip().split('\t') wrd = lst[0].split('_') add_data_in_dic(domain_dic, wrd[0], lst[1]) #for key in sorted(domain_dic): # print(str(key) + '\t' + domain_dic[key]) #Checking collected capital words in domain dic: for key in sorted(domain_dic): if(key in cap_dic.values()): key_id = return_key(key, cap_dic) # if(key_id not in cap_dom_dic.keys()): #If neccessary uncomment this loop cap_dom_dic[key_id] = domain_dic[key] # print(str(key_id) + '\t' + domain_dic[key]) ################################# #Creating default dic: for line in open(sys.argv[2]): if(line[0]!= '#' and line[0] != '\t'): lst = line.strip().split('\t') add_data_in_dic(default_dic, lst[0], lst[1]) #Checking collected capital words in domain dic: for key in sorted(default_dic): if(key in cap_dic.values()):
lst = line[:-2].split() man_rt_dic[lst[1]] = '+'.join(lst[2:]) if 'verb_root-tam-gnp-v_id' in line: lst = line[:-2].split() key = '+'.join(lst[4:]) gnp_dic[key] = lst[3] ############################################# #Aligning with tam if anu root and manual root are different: for key in sorted(anu_tam_dic): # print(anu_tam_dic[key], man_tam_dic.values()) if anu_tam_dic[key] in man_tam_dic.keys(): k = anu_tam_dic[key] val = man_tam_dic[k] print(val, man_rt_dic) man_rt = return_key('+'.join(val), man_rt_dic) if man_rt == None: man_rt = return_key(val[0], man_rt_dic) if anu_rt_dic[key] != man_rt: k_tam_dic[key] = ' '.join(val) k_v_t_gnp_dic[key] = man_rt + '+' + k + gnp_dic[val[0]] # print(key, man_rt, man_rt_dic, k) ############################################# fw = open("word.dat", "r").readlines() word_len = len(fw) - 1 for i in range(len(fw)): list_K_tam.append('-') list_K_tam_v.append('-')
print('(anu_id-manu_verb_root-ids', key, each, out, ')') #Get kriyA_mUla info: f3 = open(sys.argv[4], 'r').readlines() kriyA_mUla_lst = [] for each in f3: lst = each.strip().split('\t') if lst[0] not in kriyA_mUla_lst: kriyA_mUla_lst.append(lst[0]) #print(kriyA_mUla_lst, len(kriyA_mUla_lst)) for key in sorted(v_rt_dic): k = key - 1 wrd = return_key(str(k), h_wrd_dict) k_m_w = wrd + '_' + v_rt_dic[key] # print(wrd, k_m_w) if k_m_w in kriyA_mUla_lst: # print('&&', k , k_m_w) del v_rt_dic[key] if k_m_w not in v_rt_dic: v_rt_dic[k_m_w] = str(k) + ' ' + str(key) else: v_rt_dic[k_m_w] = v_rt_dic[k_m_w] + '/' + str(k) + ' ' + str(key) new_key = str(k) + ' ' + str(key) tam_dic[new_key] = tam_dic[key] del tam_dic[key] else: val = v_rt_dic[key] del v_rt_dic[key]
#print expr expr_lst = expr.split() h_ids = [] for i in range(0, len(expr_lst)): item = expr_lst[i] if item.split(':')[1] in h_wrd_dic.keys(): val = item.split(':')[1] h_ids.append( h_wrd_dic[val] ) # Ex: item.split(':')[1] = 'parimiwa' , h_wrd_dic['item.split(':')[1]] = 4 (Ex: 2.98, ai2E) k_mwe_dic[ids[i]] = h_wrd_dic[val] #print h_wrd_dic[val] else: last_id = h_ids[-1] cur_id = int(last_id) + 1 o = return_key(str(cur_id), h_wrd_dic) if o != None: #print item.split(':')[1] + '<>' + o k_mwe_dic[ids[i]] = '<< ' + str(cur_id) + ' >>' ################################# for key in sorted(k_mwe_dic): print(str(key) + '\t' + k_mwe_dic[key]) for i in range(len(fw) - 1): list_K_exact_word_align.append('-') #Store data in list_K_alignment for i in range(1, len(fw)): if str(i) in k_mwe_dic.keys(): list_K_exact_word_align[i] = k_mwe_dic[str(i)]