def parse_dir(dir_name): res_class, res_sem = [], [] f_s = open(dir_name+"/s_test.txt", "r") f_s_lines = f_s.readlines() f_s.close() style_type = [int(x) for x in f_s_lines] print("dir_name model_type \t transfer_strength content_reservation mixture") print("================================================================================") #model_types = ["multi_decoder", "embedding", "memory"] model_types = get_sub_dirnames(dir_name) model_types = [i for i in model_types if not i.endswith("txt")] for model_type in model_types: model_name = dir_name+"/"+model_type+'/' score_class = parse_model_class(model_name, style_type) score_sem = parse_model_sem(model_name, style_type) #score_sem = (score_sem-0.828)/0.172 if model_type=="memory": print(dir_name, "\t", model_type, "\t\t\t", score_class, "\t\t", score_sem, "\t", score_class*score_sem/(score_class+score_sem)) else: print(dir_name, "\t", model_type, "\t\t", score_class, "\t\t", score_sem, "\t", score_class*score_sem/(score_class+score_sem)) res_class.append(score_class) res_sem.append(score_sem) print("===============================================================================") return res_class, res_sem
def gen_score(test_dir_name): emb = Embedding(100) word_dict = emb.get_all_emb() subdir_names = ['multi_decoder', 'embedding', 'memory'] subdir_names = get_sub_dirnames(test_dir_name) subdir_names = [i for i in subdir_names if not i.endswith("txt")] for dir_name in subdir_names: for index_name in ["0", "1"]: q_file = test_dir_name+"/q_test.txt" r_file = test_dir_name+"/"+dir_name+"/style"+index_name+".txt" w_file = test_dir_name+"/"+dir_name+"/style"+index_name+"_semantics.txt" com_file(q_file, r_file, w_file, word_dict) return
def get_sample_data(dir_name): workbook = xlsxwriter.Workbook('toscore.xlsx') worksheet = workbook.add_worksheet() subdir_names = get_sub_dirnames(dir_name) subdir_names = [i for i in subdir_names if not i.endswith("txt")] q, r = [], [] subdir_names = ["embedding"] for sub_dir_name in subdir_names: f0 = open(dir_name + "/" + sub_dir_name + "/style0.txt").readlines() f1 = open(dir_name + "/" + sub_dir_name + "/style1.txt").readlines() f_q = open(dir_name + "/q_test.txt").readlines() f_s = open(dir_name + "/s_test.txt").readlines() for i, j in enumerate(f0): if int(f_s[i].strip()) == 0: continue else: q.append(f_q[i]) r.append(j) random.seed(1027) random.shuffle(q) random.seed(1027) random.shuffle(r) i = -1 count = 0 cc = 0 while True: i += 1 if q[i].strip() == r[i].strip(): cc += 1 #continue worksheet.write(count * 4, 0, "news") worksheet.write(count * 4 + 1, 0, "paper") worksheet.write(count * 4 + 2, 0, "score") worksheet.write(count * 4, 1, q[i].strip().decode('utf-8')) worksheet.write(count * 4 + 1, 1, r[i].strip().decode('utf-8')) worksheet.write(count * 4 + 2, 1, "") count += 1 if count > 99: break print(cc) workbook.close()
def get_sample_data(dir_name): workbook = xlsxwriter.Workbook('toscore1.xlsx') worksheet = workbook.add_worksheet() subdir_names = get_sub_dirnames(dir_name) subdir_names = [i for i in subdir_names if not i.endswith("txt")] q, r = [], [] for sub_dir_name in subdir_names: f0 = open(dir_name + "/" + sub_dir_name + "/style0.txt").readlines() f1 = open(dir_name + "/" + sub_dir_name + "/style1.txt").readlines() f_q = open(dir_name + "/q_test.txt").readlines() f_s = open(dir_name + "/s_test.txt").readlines() for i, j in enumerate(f0): if int(f_s[i].strip()) == 1: continue else: q.append(f_q[i]) r.append(j) random.seed(1027) random.shuffle(q) random.seed(1027) random.shuffle(r) i = -1 count = 0 cc = 0 while True: i += 1 if q[i].strip() == r[i].strip(): cc += 1 #continue res0 = q[i].strip() res1 = r[i].strip() worksheet.write(count, 0, res0.decode('utf-8')) worksheet.write(count, 1, res1.decode('utf-8')) count += 1 if count > 99: break print cc workbook.close()
def test(test_dir_name): model = model_from_json(open('./model/model_architecture.json').read()) model.load_weights('./model/model_weights.h5') #subdir_names = ['multi_decoder', 'embedding', 'memory'] subdir_names = get_sub_dirnames(test_dir_name) subdir_names = [i for i in subdir_names if not i.endswith("txt")] for dir_name in subdir_names: for index_name in ["0", "1"]: test_x = get_test_data(test_dir_name + "/"+ dir_name +"/style"+ index_name +".txt") test_x = sequence.pad_sequences(test_x, maxlen=maxlen) print(("test_x shape:"+str(test_x.shape))) scores=model.predict_proba(test_x) f = open(test_dir_name + "/"+ dir_name +"/style"+ index_name +"_classification.txt", "w") for score in scores: f.write(str(score[0])+"\n") f.close() return