def K_exact_match_Roja(): k_layer_ids = anchor.load_row_from_csv(k_layer_ids_file, 1) k_layer_ids = anchor.cleaning_list( k_layer_ids) #assigning Zero at - places k_layer_ids[0] = "K_exact_match" return k_layer_ids
def A_layer(): a_layer_ids = anchor.load_row_from_csv(k_layer_ids_file, 0) a_layer_ids = anchor.cleaning_list( a_layer_ids) #Not needed but still added a_layer_ids[0] = "English_word_ids" return a_layer_ids
def K_exact_mwe_word_align_csv(): k_mwe = [] try: k_mwe_csv_file = sent_dir + '/K_exact_mwe_word_align.csv' k_mwe = anchor.load_row_from_csv(k_mwe_csv_file, 0) k_mwe = anchor.cleaning_list(k_mwe) # k_mwe = convert_words_to_ids_in_list(k_mwe, h2w) print(k_mwe) #k_mwe.insert(0,"K_exact_mwe_word_align.csv") except FileNotFoundError: k_mwe = ['0'] * (no_of_eng_words + 1) k_mwe[0] = 'K_exact_mwe_word_align.csv' log.write("FILE MISSING: " + k_mwe_csv_file + "\n") return k_mwe
def Kishori_exact_match_WSD_modulo(): dict_new = [] try: kishori_csv = sent_dir + '/Exact_match_dict.csv' dict_new = anchor.load_row_from_csv(kishori_csv, 2) dict_new = convert_words_to_ids_in_list(dict_new, h2w) dict_new.insert(0, "WSD_modulo") except FileNotFoundError: dict_new = ['0'] * (no_of_eng_words + 1) dict_new[0] = "WSD_modulo" log.write("FILE MISSING: " + kishori_csv + "\n") return dict_new
def Transliteration_Dict(): roja_transliterate_list = [] try: transl_csv = sent_dir + '/Transliterate1.csv' dict_new = anchor.load_row_from_csv(transl_csv, 2) dict_new = convert_words_to_ids_in_list(dict_new, h2w) dict_new.insert(0, "Transliteration") except FileNotFoundError: dict_new = ['0'] * (no_of_eng_words + 1) dict_new[0] = "Transliteration" log.write("FILE MISSING: " + transl_csv + "\n") return dict_new
def K_1st_letter_capital_word(): k_prop_list = [] try: k_prop_csv_file = sent_dir + '/K_1st_letter_capital_word.csv' k_prop = anchor.load_row_from_csv(k_prop_csv_file, 0) k_prop = anchor.cleaning_list(k_prop) k_prop = convert_words_to_ids_in_list(k_prop, h2w) except FileNotFoundError: k_prop = ['0'] * (no_of_eng_words + 1) k_prop[0] = 'K_1st_letter_capital_word' log.write("FILE MISSING: " + k_prop_csv_file + "\n") return k_prop
def Domain_Specific_Alignment_Dict(): dict_new = [] try: nupur_csv = sent_dir + '/Domain_Specific_Align_Dict.csv' dict_new = anchor.load_row_from_csv(nupur_csv, 2) dict_new = convert_words_to_ids_in_list(dict_new, h2w) dict_new.insert(0, "Preprocessing") except FileNotFoundError: dict_new = ['0'] * (no_of_eng_words + 1) dict_new[0] = "Preprocessing" log.write("FILE MISSING: " + nupur_csv + "\n") return dict_new
def K_dict_Roja(): k_dict_ids = anchor.load_row_from_csv(k_layer_ids_file, 5) k_dit_ids = anchor.cleaning_list(k_dict_ids) k_dict_ids[0] = "K_dict" return (k_dict_ids)
def K_partial_Roja(): k_layer_partial_ids = anchor.load_row_from_csv(k_layer_ids_file, 3) k_layer_partial_ids = anchor.cleaning_list(k_layer_partial_ids) k_layer_partial_ids[0] = "K_partial" return k_layer_partial_ids
def K_exact_without_vib_Roja(): k_exact_wo_vib_ids = anchor.load_row_from_csv(k_layer_ids_file, 2) k_exact_wo_vib_ids = anchor.cleaning_list(k_exact_wo_vib_ids) k_exact_wo_vib_ids[0] = "K_exact_without_vib" return k_exact_wo_vib_ids
stdout=subprocess.PIPE, shell=True).stdout.read() # x1 = x.decode(encoding="utf-8", errors="strict") x1 = x.decode("utf-8") x2 = x1.split(")") while "" in x2: x2.remove("") print(x2, len(x2), type(x2)) title = ["Resources"] + list(x2[:-1]) #------------------------------------------------------------------------------------ #Prashant's and Apratim's module try: prashant_csv = path_tmp + '/new_N1.csv' # print(open(prashant_csv,'r').read()) N1_layer = a.load_row_from_csv(prashant_csv, 1) print(N1_layer) N1_layer.insert(0, "N1_layer") # print(N1_layer) print(N1_layer) dfs.loc[dfs.index[-1] + 1] = N1_layer potential_anchor = a.extract_row_from_df_as_list_by_column_name_and_cell_value( dfs, 'Resources', 'Potential anchors (exact)') starting_anchor = a.extract_row_from_df_as_list_by_column_name_and_cell_value( dfs, 'Resources', 'Starting anchor') potential_anchor_partial = a.extract_row_from_df_as_list_by_column_name_and_cell_value( dfs, 'Resources', 'Potential anchors (partial)') potential_anchor_guess = a.extract_row_from_df_as_list_by_column_name_and_cell_value( dfs, 'Resources', 'Potential anchors (guess)')
import sys, os, anchor, csv tmp_path = os.getenv('HOME_anu_tmp') + '/tmp/' eng_file_name = sys.argv[1] sent_no = sys.argv[2] sent_dir = tmp_path + eng_file_name + "_tmp/" + sent_no file_ = sent_dir + "/All_Resources.csv" facts = sent_dir + "/facts.dat" print(file_) k_dict = anchor.load_row_from_csv(file_, 11) wsd_modulo = anchor.load_row_from_csv(file_, 10) print(k_dict) print(wsd_modulo) print('================') with open(facts, 'w') as f: for i in range(1, len(k_dict)): print("(E_id-k_dict-mfs-wsd_modulo\t" + str(i) + "\t" + k_dict[i].replace("_", '0') + '\tmfs\t' + wsd_modulo[i] + ')') f.write("(E_id-k_dict-mfs-wsd_modulo\t" + str(i) + "\t" + k_dict[i].replace("_", '0') + '\tmfs\t' + wsd_modulo[i] + ')\n')