Exemple #1
0
 def generate_data(self,text):
     if isinstance(text,str):
         sens = tools.seperate_sentences(text)
     else:
         sens = text
     words =[]
     sen_words = []
     for sen in sens:
         wp = tools.sen_pog( sen)
         tmp = []
         for w,p in wp:
             if "n" in p or "v" in p or "m" in p:
                 tmp.append(w)
                 if w not in words:
                     words.append(w)
         sen_words.append(tmp)
     vector = []
     for sen_w in sen_words:
         tmp =[0]*len(words)
         for i in range(len(words)):
             w=  words[i]
             if w in sen_w:
                 tmp[i] = 1
         vector.append(tmp)
     return words,vector
Exemple #2
0
 def preprocess(self,text):
     sens_words, sens_tag = [], []
     sens = tools.seperate_sentences(text)
     for sen in sens:
         tmp_words, tmp_tag = [], []
         for w, t in tools.sen_pog(sen):
             tmp_words.append(w)
             tmp_tag.append(t)
         sens_words.append(tmp_words)
         sens_tag.append(tmp_tag)
     return sens, sens_words, sens_tag
Exemple #3
0
 def vectorize(self, text):
     sens = tools.seperate_sentences(text)
     matrix = []
     for sen in sens:
         tmp = tools.sen_pog(sen)
         pog_tmp = []
         for w, p in tmp:
             if p == "n" or "v" in p:
                 pog_tmp.append(w)
         matrix.append(pog_tmp)
     tr_res = self.tr.textrank_matrix(matrix)
Exemple #4
0
def text2pic(text):
    sens = tools.seperate_sentences(text)
    nodes = []

    nodes_dict = {}
    sen_words =[]
    sen_noun_words =[]
    for sen in sens:
        wp = tools.sen_pog(sen)
        tmp_w =[]
        tmp_p = []
        tmp_noun =[]
        for w,p in wp:
            if "n" in p or "v" in p or "m" in p:
                if w not in nodes:
                    nodes.append(w)
                if w not in nodes_dict.keys():
                    nodes_dict[w] = 0
                nodes_dict[w]+=1
                tmp_noun.append(w)
            # tmp.append([w,p])
            tmp_w.append(w)
            tmp_p.append(p)
        sen_noun_words.append(tmp_noun)
        sen_words .append([tmp_w,tmp_p])
    # nodes = []
    # tmp = sorted(nodes_dict.items(), key= lambda d:d[1],reverse=True)
    # for var,count in tmp:
    #     nodes.append(var)
    #
    # print(tmp)

    matrix = [[0]*len(nodes) for var in range(len(nodes))]
    for k in range(len(sen_noun_words)):
        var = sen_noun_words[k]
        for i in range(len(var)-1):
            for j in range(i+1,len(var)):
                #
                matrix[nodes.index(var[i])][nodes.index(var[j])] += 1
                matrix[nodes.index(var[j])][nodes.index(var[i])] += 1
                # nouni_index = sen_words[k][0].index(var[i])
                # nounj_index = sen_words[k][0].index(var[j])
                # if nouni_index == nounj_index-1 and True:
                #     matrix[nodes.index(var[i])][nodes.index(var[j])] +=1
                #     matrix[nodes.index(var[j])][nodes.index(var[i])] +=1
                # else:
                # for p in sen_words[k][1][nouni_index:nounj_index]:
                #     if "v" in p or "m" in p:
                #         matrix[nodes.index(var[i])][nodes.index(var[j])] += 1
                #         matrix[nodes.index(var[j])][nodes.index(var[i])] += 1
                #         break
    return matrix,nodes
Exemple #5
0
def sum2pic(text,nodes):
    sens = tools.seperate_sentences(text)
    sen_n =[]
    sen_w =[]
    sen_p = []
    # nodes_dict = {}
    for sen in sens:
        wp = tools.sen_pog(sen)
        tmp_sen_n =[]
        tmp_sen_w = []
        tmp_sen_p =[]
        for w,p in wp:
            if  ("n" in p or "v" in p or "m" in p )and w in nodes:
                tmp_sen_n.append(w)
            # if w not in nodes_dict.keys():
            #     nodes_dict[w] = 0
            # nodes_dict[w] += 1
            tmp_sen_w.append(w)
            tmp_sen_p.append(p)
        sen_n.append(tmp_sen_n)
        sen_w.append(tmp_sen_w)
        sen_p.append(tmp_sen_p)

    # nodes = []
    # tmp = sorted(nodes_dict.items(), key=lambda d: d[1], reverse=True)
    # for var, count in tmp:
    #     nodes.append(var)

    # print(tmp)

    matrix = [[0]*len(nodes) for var in range(len(nodes))]
    for i in range(len(sen_n)):

        for j in range(len(sen_n[i])):
            for k in range(j+1,len(sen_n[i])):
                # nouni_index = sen_w[i].index(sen_n[i][j])
                # nounj_index = sen_w[i].index(sen_n[i][k])
                matrix[nodes.index(sen_n[i][j])][nodes.index(sen_n[i][k])] += 1
                matrix[nodes.index(sen_n[i][k])][nodes.index(sen_n[i][j])] += 1
                # if nouni_index == nounj_index-1 and True :
                #     matrix[nodes.index(sen_n[i][j])][nodes.index(sen_n[i][k])] +=1
                #     matrix[nodes.index(sen_n[i][k])][nodes.index(sen_n[i][j])] +=1
                # for p in sen_p[i][nouni_index:nounj_index+1]:
                #     if "v" in p or "m" in p:
                #         matrix[nodes.index(sen_n[i][j])][nodes.index(sen_n[i][k])] += 1
                #         matrix[nodes.index(sen_n[i][k])][nodes.index(sen_n[i][j])] += 1
                #         break
    return matrix