def main(model_path,input_path,doc_list,raw_data_path, preocessed_path, output_pred_path,output_format,pred=True, portion = 0,split_output = False): file_n = len(doc_list) #################### for the amount of documents ranges from 20-40 ######################### folder_n = int(np.round(np.divide(float(file_n),20.00))) folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1))) #################### for the amount of documents ranges from 40 -.. ######################## # folder_n = np.divide(file_n,20) # folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1))) model = load_model(model_path) if split_output ==True : k=portion for version in range(k,k+1): start = folder[version] end = folder[version + 1] doc_list_sub = doc_list[start:end] #input = read.load_hdf5(input_path+"/test_split_input"+str(version),["char","pos","unic"]) #input = read.load_hdf5(input_path + "/split_input" + str(version), ["char", "pos", "unic"]) input = read.load_hdf5(input_path + "/input" + str(version), ["char", "pos", "unic"]) #input = read.load_hdf5(input_path + "/train_input" + str(version), ["char", "unic"]) gold = None generate_output_multiclass(model, input,gold, doc_list_sub, preocessed_path,output_pred_path,pred=pred,data_folder = str(version),format_abbre =output_format) else: start = 0 end = file_n doc_list_sub = doc_list[start:end] input = read.load_hdf5(input_path+"/input", ["char", "pos", "unic"]) gold = None generate_output_multiclass(model, input,gold,doc_list_sub,preocessed_path, output_pred_path,pred=pred,data_folder = "",format_abbre =output_format)
def main(model_path,input_path,doc_list,raw_data_path, preocessed_path, output_pred_path,output_format,pred="true",evaluate = "true"): file_n = len(doc_list) #################### for the amount of documents ranges from 20-40 ######################### # folder_n = int(np.ceil(np.divide(float(file_n),20.00))) # folder = map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1)) #################### for the amount of documents ranges from 40 -.. ######################## folder_n = np.divide(file_n,20) folder = map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1)) model = load_model(model_path) if file_n>20: k=0 for version in range(k,k+1): start = folder[version] end = folder[version + 1] doc_list_sub = doc_list[start:end] #input = read.load_hdf5(input_path+"/test_split_input"+str(version),["char","pos","unic"]) #input = read.load_hdf5(input_path + "/split_input" + str(version), ["char", "pos", "unic"]) input = read.load_hdf5(input_path + "/input_" + str(version), ["char", "pos", "unic"]) #input = read.load_hdf5(input_path + "/train_input" + str(version), ["char", "unic"]) gold = None generate_output_multiclass(model, input,gold, doc_list_sub, preocessed_path,output_pred_path,pred=pred,data_folder = str(version),format_abbre =output_format) else: start = 0 end = file_n doc_list_sub = doc_list[start:end] input = read.load_hdf5(input_path+"/input", ["char", "pos", "unic"]) #input = read.load_hdf5(input_path + "/train_input", ["char", "unic"]) gold = None generate_output_multiclass(model, input,gold,doc_list_sub,preocessed_path, output_pred_path,pred="true",data_folder = "",format_abbre =output_format) if evaluate=="true": output.evaluate(preocessed_path,output_pred_path,raw_data_path,doc_list,output_format)
def main(model_path,input_path,doc_list,raw_data_path, preocessed_path, output_pred_path,output_format,pred=True, portion = 0,split_output = False): file_n = len(doc_list) #################### for the amount of documents ranges from 20-40 ######################### folder_n = int(np.round(np.divide(float(file_n),20.00))) folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1))) #################### for the amount of documents ranges from 40 -.. ######################## # folder_n = np.divide(file_n,20) # folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1))) model = load_model(model_path) if split_output ==True : k=portion for version in range(k,k+1): start = folder[version] end = folder[version + 1] doc_list_sub = doc_list[start:end] input = read.load_hdf5(input_path + "/input" + str(version), ["char"])[0] sent_len = sentence_length(input) generate_output_multiclass(sent_len, model, input, doc_list_sub, preocessed_path,output_pred_path,pred=pred,data_folder = str(version),format_abbre =output_format) else: start = 0 end = file_n doc_list_sub = doc_list[start:end] print('INPUT PATH - ', input_path+"/input", ["char"]) input = read.load_hdf5(input_path+"/input", ["char"])[0] print('INPUT - ', input) sent_len = sentence_length(input) generate_output_multiclass(sent_len, model, input,doc_list_sub,preocessed_path, output_pred_path,pred=pred,data_folder = "",format_abbre =output_format) #if evaluate=="true": output.evaluate(preocessed_path,output_pred_path,raw_data_path,doc_list,output_format)
""" Transforms sentence into a list of indices. Post-Pad with zeroes. """ x = [] for word in sent: if word in word_idx_map: x.append(word_idx_map[word]) else: x.append(0) while len(x) < 356: x.append(4) return x char, pos, unicate = read.load_hdf5("data/cvcolon_train_input", ["char", "pos", "unic"]) char2int = read.readfrom_json("data/char2int") int2char = {index: char for char, index in char2int.items()} # print(char2int) int2char = dict((c, i) for i, c in char2int.items()) sent = list() sent_len = list() for char_x_sent in char: # 2637 8820 12760 ####2637 6183 3940 7140 sent_single = [ int2char[i] if i != 88 and i != 0 else ' ' for i in char_x_sent ] sent.append(sent_single) import torch forward_flairTorch = torch.load("data/lm-news-english-forward-v0.2rc.pt")