Beispiel #1
0
 def analyze(self, text):
     sens_words, sens_tag = [], []
     sens = tools.seperate_sentences(text)
     for sen in sens:
         tmp_words, tmp_tag = tools.seperate_pog(sen)
         sens_words.append(tmp_words)
         sens_tag.append(tmp_tag)
     return sens, sens_words, sens_tag
Beispiel #2
0
 def get_sens_words(self,text):
     sens = tools.seperate_sentences(text)
     sens_words = []
     for line in sens:
         words, tags = tools.seperate_pog(line)
         for i in range(len(words)):
             w = words[i]
             if w not in self.words_tags_dict.keys():
                 self.words_tags_dict[w] = tags[i]
         sens_words.append(words)
     return sens_words
Beispiel #3
0
 def analyze(self, text):
     sens_words, sens_tag = [], []
     sens = tools.seperate_sentences(text)
     tmp = []
     for sen in sens:
         if "原标题" in sen:
             continue
         tmp.append(sen)
         tmp_words, tmp_tag = tools.seperate_pog(sen)
         sens_words.append(tmp_words)
         sens_tag.append(tmp_tag)
     return tmp, sens_words, sens_tag
Beispiel #4
0
    def preprocess(self, text):
        sens_words, sens_tag = [], []
        sens = tools.seperate_sentences(text)
        tmp = []
        for i in range(1, len(sens)):
            sen = sens[i]
            # for sen in sens:
            if "原标题" in sen:
                continue
            tmp.append(sen)
            tmp_words, tmp_tag = tools.seperate_pog(sen)
            sens_words.append(tmp_words)
            sens_tag.append(tmp_tag)

        return tmp, sens_words, sens_tag
Beispiel #5
0
 def build_graph(self,sentences):
         # sentences = essay
     # print(sentences)
     entry_graph,sent_graph = {},{}
     for i  in range(sentences.__len__()):
         sen = sentences[i]
         sent_graph[i] = set()
         words, tags = tools.seperate_pog(sen)
         for word_i in range(len(words)):
             word = words[word_i]
             tag = tags[word_i]
             # print(word,tag)
             if str(tag) in self.targets or "all" in self.targets or ("n" in str(tag) and "all_n" in self.targets):
                 sent_graph[i].add(word)
                 if word not in entry_graph.keys():
                     entry_graph[word] = set()
                 entry_graph[word].add(i)
     # print_graph(entry_graph)
     # print_graph(sent_graph)
     return entry_graph,sent_graph
Beispiel #6
0
    absts = ftools.read_lines(abstract_path)
    res = []
    for i in range(len(absts)):
        max_v, max_index = 0, 0
        for j in range(len(lines)):
            v = tools.sim(absts[i], lines[j])
            if v > max_v:
                max_v = v
                max_index = j
        res.append(max_index)
    print(res)

    sens, tags = [], []
    for line in lines:
        tmp0, tmp1 = tools.seperate_pog(line)
        sens.append(tmp0)
        tags.append(tmp1)
    gv = Graph_Vec()
    sensv, essayv = gv.vectorize(sens, tags)
    dist = tools.Dist()

    print(sensv[res[0]])
    print(sensv[res[1]])
    print(sensv[res[2]])
    print(essayv)

    print(dist.sim(sensv[res[0]], essayv))
    print(dist.sim(sensv[res[1]], essayv))
    print(dist.sim(sensv[res[2]], essayv))
    print("-----")
Beispiel #7
0
                if sens_words[i][j] in other_index_words.keys():
                    other_index = other_index_words[sens_words[i][j]]
                    tmp[labels_other_graph[other_index] + ks[1] + ks[0]] += 1
                    essay_vector[labels_other_graph[other_index]] += 1

            sens_vectors.append(tmp)
        return sens_vectors, essay_vector


if __name__ == "__main__":

    # sen2v = Sen2Vec()
    # sen2v.train()
    # doc2v= Doc2Vec()
    # doc2v.train()
    from src.tools import FileTools as ftools
    from src.tools import Tools as tools
    import Dir

    sens = ftools.read_lines(Dir.res + "/cleandata_604/news/training_4.txt")
    myvec = MyVector()
    sens_words, sens_pog = [], []
    for line in sens:
        w, p = tools.seperate_pog(line)
        sens_words.append(w)
        sens_pog.append(p)
    sens, essay = myvec.vectorize(sens_words, sens_pog)
    print(sens[0])
    for ss in sens:
        print(ss)
    print(essay)