Ejemplo n.º 1
0
        aspect_keywords.append(keywords_vector)

sentence = []
for s in sentences:
    for i in s['text']:
        if i in string.punctuation:  # 如果字符是标点符号的话就将其替换为空格
            s['text'] = s['text'].replace(i, " ")
    sentence.append(s['text'])

sents = [word_tokenize(sent) for sent in sentence]

corpus = TextCollection(sents)

tf_idf = []
for sen in sents:
    td = []
    for data in sen:
        elem = []
        data = data.lower()
        if data not in stop_words:
            # print(data)
            td.append(corpus.tf_idf(data, corpus))
    tf_idf.append(td)

for aspect in aspect_keywords:
    for vector in aspect[1:]:
        print(
            deal_data.cosine(aspect[0], vector) *
            corpus.tf_idf('food', corpus))
    print('\n')
Ejemplo n.º 2
0
    sentences_vector = []
    for w in sents[i]:
        w = w.lower()
        if w not in stop_words:
            try:
                word_index = list(words_index.keys())[list(
                    words_index.values()).index(w)]
                sentences_vector.append(wordVectors[word_index])
            except ValueError:
                continue
    count = []
    for aspect in aspect_vector:
        word_aspect_cosine = []
        ci = 0
        for j in range(len(sentences_vector)):
            data_td = deal_data.cosine(aspect,
                                       sentences_vector[j]) * tf_idf[i][j]
            word_aspect_cosine.append(data_td)
            if data_td > 0.0025:
                ci = ci + 1
        count.append(ci)
        aspect_cosine.append(word_aspect_cosine)

    count_s.append(count)
    # # print(aspect_cosine)
    # cosine.append(aspect_cosine)

print(len(sents))
print(len(sentences))
print(len(count_s))
#
count_a = 0
Ejemplo n.º 3
0
    aspect_cosine = []
    for w in s_w:
        w = w.lower()
        if w not in stop_words:
            try:
                word_index = list(words_index.keys())[list(
                    words_index.values()).index(w)]
                sentences_vector.append(wordVectors[word_index])
            except ValueError:
                continue
    count = [s['id']]
    for aspect in aspect_vector:
        word_aspect_cosine = []
        i = 0
        for word_vector in sentences_vector:
            word_aspect_cosine.append(deal_data.cosine(aspect, word_vector))
            if deal_data.cosine(aspect, word_vector) > 0.70:
                i = i + 1
        count.append(i)
        aspect_cosine.append(word_aspect_cosine)

    count_s.append(count)
    # print(aspect_cosine)
    # cosine.append(aspect_cosine)
#
for data in count_s:
    print(data)

count_a = 0
for i in range(len(sentences)):
    index = [
Ejemplo n.º 4
0
    average_cosine = [s['id']]
    for w in s_w:
        w = w.lower()
        if w not in stop_words:
            try:
                word_index = list(words_index.keys())[list(
                    words_index.values()).index(w)]
                sentences_vector.append(wordVectors[word_index])
            except ValueError:
                continue
    for data in aspect_keywords:
        word_cosine = []
        for word_vector in sentences_vector:
            sum_cosine = 0
            for i in range(len(data)):
                sum_cosine = sum_cosine + deal_data.cosine(
                    data[i], word_vector)
            word_cosine.append(sum_cosine / len(data))
        if len(sentences_vector) != 0:
            average_cosine.append(sum(word_cosine) / len(word_cosine))
    cosine.append(average_cosine)

# print(cosine)

# 计算准确率accuracy:两个及以上aspect的句子怎么算?

# print(len(sentences))
# print(len(cosine))
# for i in range(len(sentences)):
#     print(cosine[i])
#     print(sentences[i])
        w = w.lower()
        if w not in stop_words:
            try:
                word_index = list(words_index.keys())[list(
                    words_index.values()).index(w)]
                sentences_vector.append(wordVectors[word_index])
            except ValueError:
                continue
    count = []
    for data in aspect_keywords:
        word_cosine = []
        c = 0
        for word_vector in sentences_vector:
            label = set()
            for i in range(len(data)):
                if deal_data.cosine(data[i], word_vector) > 0.7:
                    label.add(1)
            if 1 in label:
                c = c + 1
        count.append(c)

    count_s.append(count)

for count in count_s:
    print(count)

count_a = 0
for i in range(len(sentences)):
    index = [j for j, data in enumerate(count_s[i]) if data == max(count_s[i])]
    label_max = set()
    for data in index:
Ejemplo n.º 6
0
    s_w = s['text'].split()
    sentences_vector = []
    average_cosine = [s['id']]
    for w in s_w:
        w = w.lower()
        if w not in stop_words:
            try:
                word_index = list(words_index.keys())[list(
                    words_index.values()).index(w)]
                sentences_vector.append(wordVectors[word_index])
            except ValueError:
                continue
    for aspect in aspect_vector:
        sum_cosine = 0
        for word_vector in sentences_vector:
            sum_cosine = sum_cosine + deal_data.cosine(aspect, word_vector)
        if len(sentences_vector) != 0:
            average_cosine.append(sum_cosine / len(sentences_vector))
    cosine.append(average_cosine)

# 计算准确率accuracy:两个及以上aspect的句子怎么算?

# print(len(sentences))
# print(len(cosine))
# for i in range(len(sentences)):
#     print(cosine[i])
#     print(sentences[i])

count = 0
count_empty = 0
for i in range(len(sentences)):