コード例 #1
0
ファイル: output.py プロジェクト: bethard/timenorm
def main(model_path,input_path,doc_list,raw_data_path, preocessed_path, output_pred_path,output_format,pred=True, portion = 0,split_output = False):
    file_n = len(doc_list)
    #################### for the amount of documents ranges from 20-40 #########################
    folder_n = int(np.round(np.divide(float(file_n),20.00)))
    folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1)))
    #################### for the amount of documents ranges from 40 -.. ########################
    # folder_n = np.divide(file_n,20)
    # folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1)))

    model = load_model(model_path)
    if split_output ==True :
        k=portion
        for version in range(k,k+1):
            start = folder[version]
            end = folder[version + 1]
            doc_list_sub = doc_list[start:end]
            #input = read.load_hdf5(input_path+"/test_split_input"+str(version),["char","pos","unic"])
            #input = read.load_hdf5(input_path + "/split_input" + str(version), ["char", "pos", "unic"])
            input = read.load_hdf5(input_path + "/input" + str(version), ["char", "pos", "unic"])
            #input = read.load_hdf5(input_path + "/train_input" + str(version), ["char",  "unic"])
            gold = None
            generate_output_multiclass(model, input,gold, doc_list_sub, preocessed_path,output_pred_path,pred=pred,data_folder = str(version),format_abbre =output_format)
    else:
        start = 0
        end = file_n
        doc_list_sub = doc_list[start:end]
        input = read.load_hdf5(input_path+"/input", ["char", "pos", "unic"])
        gold = None
        generate_output_multiclass(model, input,gold,doc_list_sub,preocessed_path, output_pred_path,pred=pred,data_folder = "",format_abbre =output_format)
コード例 #2
0
ファイル: output.py プロジェクト: dongfang91/timenorm
def main(model_path,input_path,doc_list,raw_data_path, preocessed_path, output_pred_path,output_format,pred="true",evaluate = "true"):
    file_n = len(doc_list)
    #################### for the amount of documents ranges from 20-40 #########################
    # folder_n = int(np.ceil(np.divide(float(file_n),20.00)))
    # folder = map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1))
    #################### for the amount of documents ranges from 40 -.. ########################
    folder_n = np.divide(file_n,20)
    folder = map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1))

    model = load_model(model_path)
    if file_n>20:
        k=0
        for version in range(k,k+1):
            start = folder[version]
            end = folder[version + 1]
            doc_list_sub = doc_list[start:end]
            #input = read.load_hdf5(input_path+"/test_split_input"+str(version),["char","pos","unic"])
            #input = read.load_hdf5(input_path + "/split_input" + str(version), ["char", "pos", "unic"])
            input = read.load_hdf5(input_path + "/input_" + str(version), ["char", "pos", "unic"])
            #input = read.load_hdf5(input_path + "/train_input" + str(version), ["char",  "unic"])
            gold = None
            generate_output_multiclass(model, input,gold, doc_list_sub, preocessed_path,output_pred_path,pred=pred,data_folder = str(version),format_abbre =output_format)
    else:
        start = 0
        end = file_n
        doc_list_sub = doc_list[start:end]
        input = read.load_hdf5(input_path+"/input", ["char", "pos", "unic"])
        #input = read.load_hdf5(input_path + "/train_input", ["char", "unic"])
        gold = None
        generate_output_multiclass(model, input,gold,doc_list_sub,preocessed_path, output_pred_path,pred="true",data_folder = "",format_abbre =output_format)

    if evaluate=="true":
        output.evaluate(preocessed_path,output_pred_path,raw_data_path,doc_list,output_format)
コード例 #3
0
ファイル: output.py プロジェクト: praveenarallabandi/timenorm
def main(model_path,input_path,doc_list,raw_data_path, preocessed_path, output_pred_path,output_format,pred=True, portion = 0,split_output = False):
    file_n = len(doc_list)
    #################### for the amount of documents ranges from 20-40 #########################
    folder_n = int(np.round(np.divide(float(file_n),20.00)))
    folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1)))
    #################### for the amount of documents ranges from 40 -.. ########################
    # folder_n = np.divide(file_n,20)
    # folder = list(map(lambda x: int(x), np.linspace(0, file_n, folder_n + 1)))

    model = load_model(model_path)
    if split_output ==True :
        k=portion
        for version in range(k,k+1):
            start = folder[version]
            end = folder[version + 1]
            doc_list_sub = doc_list[start:end]
            input = read.load_hdf5(input_path + "/input" + str(version), ["char"])[0]
            sent_len = sentence_length(input)
            generate_output_multiclass(sent_len, model, input, doc_list_sub, preocessed_path,output_pred_path,pred=pred,data_folder = str(version),format_abbre =output_format)
    else:
        start = 0
        end = file_n
        doc_list_sub = doc_list[start:end]
        print('INPUT PATH - ', input_path+"/input", ["char"])
        input = read.load_hdf5(input_path+"/input", ["char"])[0]
        print('INPUT - ', input)
        sent_len = sentence_length(input)
        generate_output_multiclass(sent_len, model, input,doc_list_sub,preocessed_path, output_pred_path,pred=pred,data_folder = "",format_abbre =output_format)

    #if evaluate=="true":
    output.evaluate(preocessed_path,output_pred_path,raw_data_path,doc_list,output_format)
コード例 #4
0
    """
    Transforms sentence into a list of indices. Post-Pad with zeroes.
    """
    x = []
    for word in sent:
        if word in word_idx_map:
            x.append(word_idx_map[word])
        else:
            x.append(0)

    while len(x) < 356:
        x.append(4)
    return x


char, pos, unicate = read.load_hdf5("data/cvcolon_train_input",
                                    ["char", "pos", "unic"])

char2int = read.readfrom_json("data/char2int")
int2char = {index: char for char, index in char2int.items()}
# print(char2int)
int2char = dict((c, i) for i, c in char2int.items())
sent = list()
sent_len = list()
for char_x_sent in char:  # 2637    8820     12760     ####2637     6183    3940     7140
    sent_single = [
        int2char[i] if i != 88 and i != 0 else ' ' for i in char_x_sent
    ]
    sent.append(sent_single)

import torch
forward_flairTorch = torch.load("data/lm-news-english-forward-v0.2rc.pt")