############################################ #Declarations: v_rt_dic = {} tam_dic = {} anu_rt_dic = {} anu_tam_dic = {} k_v_rt_dic = {} k_v_rt_par_dic = {} anu_km_dic_match = {} k_exact_tam_dic = {} ############################################ #storing verb rt and tam info in dics: for line in open(sys.argv[1]): lst = line.strip().split() if 'verb_root-id' in lst[0]: add_data_in_dic(v_rt_dic, lst[1], ' '.join(lst[2:-1])) #vApasa_A 8 9 if 'tam-id' in lst[0]: add_data_in_dic(tam_dic, lst[1], ' '.join(lst[2:-1])) if 'anu_id-manu_verb_root-ids' in lst[0]: add_data_in_dic(anu_km_dic_match, int(lst[1]), ' '.join(lst[3:-1])) #bAwa_kara 7 8 ############################################## #storing K layer root info in dic: for line in open(sys.argv[2]): lst = line.strip().split() if 'id-anu_root' in lst[0]: anu_rt_dic[int(lst[1])] = lst[2] if 'id-anu_tam' in lst[0]: anu_tam_dic[int(lst[1])] = lst[2]
############################## #Declarations: h_wrd_dict = {} v_rt_dic = {} tam_dic = {} anu_eng_rt_dic = {} kriyA_mula_dic = {} anu_rt_dic = {} anu_tam_dic = {} verb_dic = {} man_tam_dic = {} gnp_dic = {} ############################## for line in fhw: lst = line[:-2].split() add_data_in_dic(h_wrd_dict, lst[2], lst[1]) ############################## #storing single mng verb dic for line in open(sys.argv[3]): lst = line.strip().split('\t') add_data_in_dic(verb_dic, lst[0], lst[1]) ############################## def check_wrd(index, length, word, f_wrd): lst = [] lst.append(f_wrd) for i in range(1, length): k = return_key(str(index + i), h_wrd_dict) lst.append(k)
from functions import add_data_in_dic from functions import return_key ############################################ #Declarations: v_rt_dic = {} tam_dic = {} anu_rt_dic = {} k_v_rt_dic = {} k_v_rt_par_dic = {} ############################################ #storing verb rt and tam info in dics: for line in open(sys.argv[1]): lst = line.strip().split() if 'verb_root-id' in lst[0]: add_data_in_dic(v_rt_dic, lst[1], ' '.join(lst[2:-1])) #vApasa_A 8 9 if 'tam-id' in lst[0]: add_data_in_dic(tam_dic, lst[1], ' '.join(lst[2:-1])) ############################################## #storing K layer root info in dic: for line in open(sys.argv[2]): lst = line.strip().split() anu_rt_dic[int(lst[1])] = lst[2] ############################################## #aligning verb root for key in sorted(anu_rt_dic): anu_rt = anu_rt_dic[key] print(anu_rt)
domain_dic = {} default_dic = {} cap_dom_dic = {} cap_def_dic = {} man_mng_dic = {} k_align_dic = {} list_K_alignment=['K_1st_letter_capital_word'] ################################# #Collecting first word Capital from original_word.dat for line in fo: lst = line[:-2].split() if (lst[2].lower() not in weak_choice):#ai1E/2.67: Condition added by Kishori 9 nov to remove 1_In = 3_meM/10_meM in K_1st_letter_capital_word if(lst[2][0].isupper()): #and lst[1] != '1'): #print(lst[2]) add_data_in_dic(cap_dic, int(lst[1]), lst[2].lower()) #for key in sorted(cap_dic): # print(str(key) + '\t' + cap_dic[key]) ################################# #Creating domain dic: for line in open(sys.argv[1]): if(line[0]!= '#'): lst = line.strip().split('\t') wrd = lst[0].split('_') add_data_in_dic(domain_dic, wrd[0], lst[1])
import sys from functions import unique_val from functions import add_data_in_dic from functions import return_key hrt_dic = {} hid_dic = {} kriyA_mUla = [] for line in open(sys.argv[1]): lst = line[:-2].split() add_data_in_dic(hrt_dic, lst[2], lst[1]) add_data_in_dic(hid_dic, int(lst[1]), lst[2]) for line in open(sys.argv[2]): lst = line.strip().split('\t') mngs = lst[1].split('/') for each in mngs: if each not in kriyA_mUla: kriyA_mUla.append(each) #print(kriyA_mUla) def check_substring(lst, sub): sub_lst = [] for each in lst: if sub in each: sub_lst.append(each) return sub_lst
list_K_exact_word_align = ['K_exact_word_align'] ############################################################ #Creating wrd_dic, nwe_dic, h_wrd_dic for line in fw: lst = line[:-2].split() wrd_dic[lst[1]] = lst[2] for line in open(sys.argv[2]): lst = line.strip().split('\t') mwe_dic[lst[0]] = lst[1] for line in fr: lst = line[:-2].split('\t') add_data_in_dic(h_wrd_dic, lst[2], lst[1]) ############################################################ #Checking multi_word_expressions.dat mwe_list = [] for line in open(sys.argv[1]): mwe_mng = '' lst = line[:-2].split() if 'compound' in lst[3]: mwe_mng = lst[2] ids = lst[4:] print(ids, mwe_mng) mwe_lst = [] for each in ids: mwe_lst.append(wrd_dic[each]) #print each, wrd_dic[each], h_wrd_dic.keys() mwe = '_'.join(mwe_lst) #finite_state_machine