Beispiel #1
0
def read_data_use(option,  sen2id):
    file_name = option.use_data_path
    max_length = option.num_steps
    dict_size = option.dict_size
    with open(file_name) as f:
        data=[]
        for line in f:
            data.append(sen2id(line.strip().lower().split()))
    data_new=array_data(data, max_length, dict_size)
    return data_new # sentence
Beispiel #2
0
def read_data_use(option, sen2id):

    file_name = option.use_data_path
    max_length = option.num_steps
    dict_size = option.dict_size
    time1 = time.time()
    Rake = RAKE.Rake(RAKE.SmartStopList())
    z = ZPar(option.pos_path)
    tagger = z.get_tagger()
    time2 = time.time()
    print("read data load time: ", time2 - time1)
    with open(file_name) as f:
        data = []
        vector = []
        sta_vec_list = []
        j = 0
        for line in f:
            if len(line.strip().split()) > 15:
                line = ' '.join(line.strip().split()[:15])
            sta_vec = list(np.zeros([option.num_steps - 1]))
            keyword = Rake.run(line.strip())
            pos_list = tagger.tag_sentence(line.strip()).split()
            pos = list(zip(*[x.split('/') for x in pos_list]))[0]
            # pos=list(zip(*[x.split('/') for x in pos_list]))[0]
            if keyword != []:
                keyword = list(list(zip(*keyword))[0])
                keyword_new = []
                linewords = line.strip().split()
                for i in range(len(linewords)):
                    for item in keyword:
                        length11 = len(item.split())
                        if ' '.join(linewords[i:i + length11]) == item:
                            keyword_new.extend(
                                [i + k for k in range(length11)])
                for i in range(len(keyword_new)):
                    ind = keyword_new[i]
                    if ind <= option.num_steps - 2:
                        sta_vec[ind] = 1
            if option.keyword_pos == True:
                sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos))
            else:
                sta_vec_list.append(list(np.zeros([option.num_steps - 1])))
            data.append(sen2id(line.strip().lower().split()))
    data_new = array_data(data, max_length, dict_size)
    return data_new, sta_vec_list  # sentence, keyvector
Beispiel #3
0
def read_data_use1(option, sen2id):

    file_name = option.use_data_path
    max_length = option.num_steps
    dict_size = option.dict_size
    Rake = RAKE.Rake(RAKE.SmartStopList())
    z = ZPar(option.pos_path)
    tagger = z.get_tagger()
    with open(file_name) as f:
        data = []
        vector = []
        sta_vec_list = []
        j = 0
        for line in f:
            print('sentence:' + line)
            sta_vec = list(np.zeros([option.num_steps - 1]))
            keyword = Rake.run(line.strip())
            pos_list = tagger.tag_sentence(line.strip()).split()
            # pos=zip(*[x.split('/') for x in pos_list])[0]
            pos = list(zip(*[x.split('/') for x in pos_list]))[0]
            print(keyword)
            if keyword != []:
                keyword = list(list(zip(*keyword))[0])
                keyword_new = []
                for item in keyword:
                    tem1 = [
                        line.strip().split().index(x) for x in item.split()
                        if x in line.strip().split()
                    ]
                    print('id', tem1)
                    keyword_new.extend(tem1)
                print(keyword_new)
                for i in range(len(keyword_new)):
                    ind = keyword_new[i]
                    if ind <= option.num_steps - 2:
                        sta_vec[ind] = 1
            if option.keyword_pos == True:
                sta_vec_list.append(keyword_pos2sta_vec(option, sta_vec, pos))
            else:
                sta_vec_list.append(list(np.zeros([option.num_steps - 1])))
            print(keyword_pos2sta_vec(option, sta_vec, pos))
            data.append(sen2id(line.strip().lower().split()))
    data_new = array_data(data, max_length, dict_size)
    return data_new, sta_vec_list  # sentence, keyvector