예제 #1
0
def parse_dir(dir_name):
    res_class, res_sem = [], []
    f_s = open(dir_name+"/s_test.txt", "r")
    f_s_lines = f_s.readlines()
    f_s.close()

    style_type = [int(x) for x in f_s_lines]
   
    print("dir_name model_type \t transfer_strength content_reservation mixture")
    print("================================================================================")
    #model_types = ["multi_decoder", "embedding", "memory"]
    model_types = get_sub_dirnames(dir_name)
    model_types = [i for i in model_types if not i.endswith("txt")]
    for model_type in model_types:
        model_name = dir_name+"/"+model_type+'/'
        score_class = parse_model_class(model_name, style_type)
        score_sem = parse_model_sem(model_name, style_type)
        #score_sem = (score_sem-0.828)/0.172 
        if model_type=="memory":
            print(dir_name, "\t", model_type, "\t\t\t", score_class, "\t\t", score_sem,  "\t", score_class*score_sem/(score_class+score_sem))
        else:
            print(dir_name, "\t", model_type, "\t\t", score_class, "\t\t", score_sem, "\t", score_class*score_sem/(score_class+score_sem))
        res_class.append(score_class)
        res_sem.append(score_sem)

    print("===============================================================================")
    return res_class, res_sem
예제 #2
0
def gen_score(test_dir_name):
    emb = Embedding(100)
    word_dict = emb.get_all_emb()
    subdir_names = ['multi_decoder', 'embedding', 'memory']
    subdir_names = get_sub_dirnames(test_dir_name)
    subdir_names = [i for i in subdir_names if not i.endswith("txt")]
    for dir_name in subdir_names:
        for index_name in ["0", "1"]:
            q_file = test_dir_name+"/q_test.txt"
            r_file = test_dir_name+"/"+dir_name+"/style"+index_name+".txt"
            w_file = test_dir_name+"/"+dir_name+"/style"+index_name+"_semantics.txt"
            com_file(q_file, r_file, w_file, word_dict)
    return 
예제 #3
0
def get_sample_data(dir_name):

    workbook = xlsxwriter.Workbook('toscore.xlsx')
    worksheet = workbook.add_worksheet()

    subdir_names = get_sub_dirnames(dir_name)
    subdir_names = [i for i in subdir_names if not i.endswith("txt")]

    q, r = [], []
    subdir_names = ["embedding"]
    for sub_dir_name in subdir_names:
        f0 = open(dir_name + "/" + sub_dir_name + "/style0.txt").readlines()
        f1 = open(dir_name + "/" + sub_dir_name + "/style1.txt").readlines()
        f_q = open(dir_name + "/q_test.txt").readlines()
        f_s = open(dir_name + "/s_test.txt").readlines()
        for i, j in enumerate(f0):
            if int(f_s[i].strip()) == 0:
                continue
            else:
                q.append(f_q[i])
                r.append(j)

    random.seed(1027)
    random.shuffle(q)
    random.seed(1027)
    random.shuffle(r)

    i = -1
    count = 0
    cc = 0
    while True:
        i += 1
        if q[i].strip() == r[i].strip():
            cc += 1
            #continue

        worksheet.write(count * 4, 0, "news")
        worksheet.write(count * 4 + 1, 0, "paper")
        worksheet.write(count * 4 + 2, 0, "score")
        worksheet.write(count * 4, 1, q[i].strip().decode('utf-8'))
        worksheet.write(count * 4 + 1, 1, r[i].strip().decode('utf-8'))
        worksheet.write(count * 4 + 2, 1, "")
        count += 1
        if count > 99:
            break
    print(cc)
    workbook.close()
예제 #4
0
def get_sample_data(dir_name):

    workbook = xlsxwriter.Workbook('toscore1.xlsx')
    worksheet = workbook.add_worksheet()

    subdir_names = get_sub_dirnames(dir_name)
    subdir_names = [i for i in subdir_names if not i.endswith("txt")]

    q, r = [], []
    for sub_dir_name in subdir_names:
        f0 = open(dir_name + "/" + sub_dir_name + "/style0.txt").readlines()
        f1 = open(dir_name + "/" + sub_dir_name + "/style1.txt").readlines()
        f_q = open(dir_name + "/q_test.txt").readlines()
        f_s = open(dir_name + "/s_test.txt").readlines()
        for i, j in enumerate(f0):
            if int(f_s[i].strip()) == 1:
                continue
            else:
                q.append(f_q[i])
                r.append(j)

    random.seed(1027)
    random.shuffle(q)
    random.seed(1027)
    random.shuffle(r)

    i = -1
    count = 0
    cc = 0
    while True:
        i += 1
        if q[i].strip() == r[i].strip():
            cc += 1
            #continue

        res0 = q[i].strip()
        res1 = r[i].strip()
        worksheet.write(count, 0, res0.decode('utf-8'))
        worksheet.write(count, 1, res1.decode('utf-8'))
        count += 1
        if count > 99:
            break
    print cc
    workbook.close()
예제 #5
0
def test(test_dir_name):
    model = model_from_json(open('./model/model_architecture.json').read())  
    model.load_weights('./model/model_weights.h5')
    

    #subdir_names = ['multi_decoder', 'embedding', 'memory']
    subdir_names = get_sub_dirnames(test_dir_name)
    subdir_names = [i for i in subdir_names if not i.endswith("txt")]

    for dir_name in subdir_names:
        for index_name in ["0", "1"]:
            test_x = get_test_data(test_dir_name + "/"+ dir_name +"/style"+ index_name +".txt")
            test_x = sequence.pad_sequences(test_x, maxlen=maxlen)
            print(("test_x shape:"+str(test_x.shape)))   
            scores=model.predict_proba(test_x)
            f = open(test_dir_name + "/"+ dir_name +"/style"+ index_name +"_classification.txt", "w")
            for score in scores:
                f.write(str(score[0])+"\n")
            f.close()
    return