Beispiel #1
0
def movieword(code):
    df1=movie_start.Getdata([code],20)
    noun_extractor = LRNounExtractor(verbose=True)
    noun_extractor.train(df1['text'])
    nouns = noun_extractor.extract()
    movie_wordcloud.displayWordCloud(str(code),' '.join(nouns))    
    return "ok"
Beispiel #2
0
def movieword(code):
    df1 = movie_start.Getdata([code])
    # 명사만 뽑는 작업 나중에 코드 쓰기
    noun_extractor = LRNounExtractor(verbose=True)
    noun_extractor.train(df1['text'])
    nouns = noun_extractor.extract()
    # 명사들을 연결해서 워드클라우드로 뽑음
    movie_wordcloud.displayWordCloud(str(code), ' '.join(nouns))
    return "ok"
Beispiel #3
0
def tag_counting(law_event_type):
    prec = pd.read_csv('law_list_detail.csv', encoding='utf-8')

    noun_extractor = LRNounExtractor(verbose=True)
    noun_extractor.train(prec[prec['law_event_type'] == law_event_type]
                         ['law_content'].astype('str').apply(preprocessing))
    nouns = noun_extractor.extract()

    count = Counter(nouns)
    # print(count)

    tag_count = []
    stopwords = make_stopword()
    # print(stopwords)

    for n, c in count.most_common(200):
        if n not in stopwords:
            dics = {'tag': n, 'count': c[0]}
            tag_count.append(dics)

        if len(tag_count) == 20:
            break

    # print(tag_count)

    for tag in tag_count:
        print("{:<14}".format(tag['tag']), end='\t')
        print("{}".format(tag['count']))

    df = pd.DataFrame.from_dict(tag_count, orient='columns')
    df.set_index(df['tag'], inplace=True)
    # print(df)

    # 스타일 서식 지정
    plt.style.use('ggplot')

    ax1 = df.plot(kind='bar',
                  figsize=(20, 10),
                  width=0.7,
                  stacked=False,
                  legend=None)

    ax1.set_ylim(0, 60000)
    ax1.set_xlabel('단어', size=20)
    ax1.set_ylabel('빈도수', size=20)

    plt.title('사건 종류별 특정 단어 빈도수(형사)', size=20)

    plt.show()
                          height=height).generate(data)
    wordcloud.to_file(os.path.join(currdir, "wc" + num + ".png"))
    #plt.figure(figsize = (15 , 10))
    #plt.imshow(wordcloud)
    #plt.axis("off")
    #plt.show()


# In[51]:

from soynlp.noun import LRNounExtractor

# In[52]:

noun_extractor = LRNounExtractor(verbose=True)
noun_extractor.train(sentences1)
nouns1 = noun_extractor.extract()
noun_extractor.train(sentences2)
nouns2 = noun_extractor.extract()
noun_extractor.train(sentences3)
nouns3 = noun_extractor.extract()
noun_extractor.train(sentences4)
nouns4 = noun_extractor.extract()
noun_extractor.train(sentences5)
nouns5 = noun_extractor.extract()
noun_extractor.train(sentences6)
nouns6 = noun_extractor.extract()
noun_extractor.train(sentences7)
nouns7 = noun_extractor.extract()
noun_extractor.train(sentences8)
nouns8 = noun_extractor.extract()
Beispiel #5
0
    plt.show()


df = pd.read_csv('foo1.csv', engine='python', encoding='utf-8')
tokenizer = RegexTokenizer()
stopwords_kr = [
    '하지만', '그리고', '그런데', '저는', '제가', '그럼', '이런', '저런', '합니다', '많은', '많이', '정말',
    '너무', '[', ']', '것으로', '했습니다', '했다'
]

sentences = df['본문'].apply(preprocessing)
displayWordCloud(' '.join(sentences))

# soynlp로 명사 추출하기
noun_extractor = LRNounExtractor(verbose=True)
noun_extractor.train(sentences)
nouns = noun_extractor.extract()
displayWordCloud(' '.join(nouns))

# 이미지 파일위에 출력하기
img = Image.open('cloud.png')
img_array = np.array(img)

wordcloud = WordCloud(font_path='/Library/Fonts/NanumBarunGothic.ttf',
                      stopwords=stopwords_kr,
                      background_color='white',
                      mask=img_array,
                      width=800,
                      height=600).generate(' '.join(nouns))
plt.figure(figsize=(15, 10))
plt.imshow(wordcloud)
Beispiel #6
0
law_categoriesMin1 = law_categoriesMin['law_content'].astype('str').apply(
    preprocessing)
# print(law_categoriesMin1.head())
# displayWordCloud(' '.join(law_categoriesMin1))

# law_categoriesSe = prec[prec['law_event_type'] == "세무"]
# law_categoriesSe1 = law_categoriesSe['law_content'].astype('str').apply(preprocessing)
# displayWordCloud(' '.join(law_categoriesSe1))

# law_categoriesH = prec[prec['law_event_type'] == "일반행정"]
# law_categoriesH1 = law_categoriesH['law_content'].astype('str').apply(preprocessing)
# displayWordCloud(' '.join(law_categoriesH1))

# law_categoriesT = prec[prec['law_event_type'] == "특허"]
# law_categoriesT1 = law_categoriesT['law_content'].astype('str').apply(preprocessing)
# # print(law_categoriesT1)
# displayWordCloud(' '.join(law_categoriesT1))

# law_categoriesP = prec[prec['law_event_type'] == "형사"]
# law_categoriesP1 = law_categoriesP['law_content'].astype('str').apply(preprocessing)
# displayWordCloud(' '.join(law_categoriesP1))

noun_extractor = LRNounExtractor(verbose=True)
noun_extractor.train(law_categoriesMin1)
# 명사만 추출
nouns = noun_extractor.extract()
# print(type(nouns))
# print(nouns)
displayWordCloud(' '.join(nouns))

# displayWordCloud(' '.join(law_categoriesGa1))
Beispiel #7
0
def detail(m_no, current_movie_title):

    conn = pymysql.connect(host='127.0.0.1',
                           user='******',
                           password='******',
                           db='movie',
                           charset='utf8mb4',
                           cursorclass=pymysql.cursors.DictCursor)
    try:
        with conn.cursor() as cursor:
            sql = 'select * from current_movie c inner join test t on c.current_movie_title = t.title where current_movie_title = %s;'
            cursor.execute(sql, (current_movie_title))
            result = cursor.fetchone()  #하나만 가져올떄

            sql = 'select * from current_movie where current_movie_title = %s;'
            cursor.execute(sql, (current_movie_title))
            result1 = cursor.fetchone()  #하나만 가져올떄

            sql = 'select * from board where m_no= %s;'
            cursor.execute(sql, (m_no))
            board = cursor.fetchall()
    finally:
        conn.close()
    if result is not None:
        tmrvl = []
        movieName = result['codem']

        for page in range(1, 200):
            url = "https://movie.naver.com/movie/bi/mi/review.nhn?code=" + str(
                movieName) + "&page=" + str(page)
            response = urllib.request.urlopen(url)

            soup = BeautifulSoup(response, 'html.parser')
            table = soup.select('ul.rvw_list_area li a')
            for result3 in table:
                mrv = str(result3.string)
                tmrv = tuple([mrv])
                tmrvl.append(tmrv)
                #tmrv1=str(tmrv)
                #f.write(tmrv1)
        df = pd.DataFrame(tmrvl)

        def preprocessing(text):
            # 개행문자 제거
            text = re.sub('\\\\n', ' ', text)
            return text

        tokenizer = RegexTokenizer()
        stopwords_kr = [
            '하지만', '그리고', '그런데', '저는', '제가', '그럼', '이런', '저런', '합니다', '많은',
            '많이', '정말', '너무', '[', ']', '것으로', '했습니다', '했다'
        ]

        sentences = df[0].apply(preprocessing)

        # soynlp로 명사 추출하기
        noun_extractor = LRNounExtractor(verbose=True)
        noun_extractor.train(sentences)
        nouns = noun_extractor.extract()

        # 이미지 파일위에 출력하기
        img = Image.open('IT_Bank_Movie/static/img/cloud.png')
        img_array = np.array(img)

        wordcloud = WordCloud(font_path='/Library/Fonts/NanumBarunGothic.ttf',
                              stopwords=stopwords_kr,
                              background_color='white',
                              mask=img_array,
                              width=800,
                              height=600).generate(' '.join(nouns))
        plt.figure(figsize=(15, 10))
        plt.imshow(wordcloud)
        plt.axis("off")
        #plt.show()
        url1 = "IT_Bank_Movie/static/wordcloud/" + current_movie_title + ".png"
        wordcloud.to_file(url1)

    return render_template('movie_detail.html',
                           wordInfo=result,
                           board=board,
                           movieInfo=result1)