def movieword(code): df1=movie_start.Getdata([code],20) noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(df1['text']) nouns = noun_extractor.extract() movie_wordcloud.displayWordCloud(str(code),' '.join(nouns)) return "ok"
def movieword(code): df1 = movie_start.Getdata([code]) # 명사만 뽑는 작업 나중에 코드 쓰기 noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(df1['text']) nouns = noun_extractor.extract() # 명사들을 연결해서 워드클라우드로 뽑음 movie_wordcloud.displayWordCloud(str(code), ' '.join(nouns)) return "ok"
def tag_counting(law_event_type): prec = pd.read_csv('law_list_detail.csv', encoding='utf-8') noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(prec[prec['law_event_type'] == law_event_type] ['law_content'].astype('str').apply(preprocessing)) nouns = noun_extractor.extract() count = Counter(nouns) # print(count) tag_count = [] stopwords = make_stopword() # print(stopwords) for n, c in count.most_common(200): if n not in stopwords: dics = {'tag': n, 'count': c[0]} tag_count.append(dics) if len(tag_count) == 20: break # print(tag_count) for tag in tag_count: print("{:<14}".format(tag['tag']), end='\t') print("{}".format(tag['count'])) df = pd.DataFrame.from_dict(tag_count, orient='columns') df.set_index(df['tag'], inplace=True) # print(df) # 스타일 서식 지정 plt.style.use('ggplot') ax1 = df.plot(kind='bar', figsize=(20, 10), width=0.7, stacked=False, legend=None) ax1.set_ylim(0, 60000) ax1.set_xlabel('단어', size=20) ax1.set_ylabel('빈도수', size=20) plt.title('사건 종류별 특정 단어 빈도수(형사)', size=20) plt.show()
height=height).generate(data) wordcloud.to_file(os.path.join(currdir, "wc" + num + ".png")) #plt.figure(figsize = (15 , 10)) #plt.imshow(wordcloud) #plt.axis("off") #plt.show() # In[51]: from soynlp.noun import LRNounExtractor # In[52]: noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(sentences1) nouns1 = noun_extractor.extract() noun_extractor.train(sentences2) nouns2 = noun_extractor.extract() noun_extractor.train(sentences3) nouns3 = noun_extractor.extract() noun_extractor.train(sentences4) nouns4 = noun_extractor.extract() noun_extractor.train(sentences5) nouns5 = noun_extractor.extract() noun_extractor.train(sentences6) nouns6 = noun_extractor.extract() noun_extractor.train(sentences7) nouns7 = noun_extractor.extract() noun_extractor.train(sentences8) nouns8 = noun_extractor.extract()
plt.show() df = pd.read_csv('foo1.csv', engine='python', encoding='utf-8') tokenizer = RegexTokenizer() stopwords_kr = [ '하지만', '그리고', '그런데', '저는', '제가', '그럼', '이런', '저런', '합니다', '많은', '많이', '정말', '너무', '[', ']', '것으로', '했습니다', '했다' ] sentences = df['본문'].apply(preprocessing) displayWordCloud(' '.join(sentences)) # soynlp로 명사 추출하기 noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(sentences) nouns = noun_extractor.extract() displayWordCloud(' '.join(nouns)) # 이미지 파일위에 출력하기 img = Image.open('cloud.png') img_array = np.array(img) wordcloud = WordCloud(font_path='/Library/Fonts/NanumBarunGothic.ttf', stopwords=stopwords_kr, background_color='white', mask=img_array, width=800, height=600).generate(' '.join(nouns)) plt.figure(figsize=(15, 10)) plt.imshow(wordcloud)
law_categoriesMin1 = law_categoriesMin['law_content'].astype('str').apply( preprocessing) # print(law_categoriesMin1.head()) # displayWordCloud(' '.join(law_categoriesMin1)) # law_categoriesSe = prec[prec['law_event_type'] == "세무"] # law_categoriesSe1 = law_categoriesSe['law_content'].astype('str').apply(preprocessing) # displayWordCloud(' '.join(law_categoriesSe1)) # law_categoriesH = prec[prec['law_event_type'] == "일반행정"] # law_categoriesH1 = law_categoriesH['law_content'].astype('str').apply(preprocessing) # displayWordCloud(' '.join(law_categoriesH1)) # law_categoriesT = prec[prec['law_event_type'] == "특허"] # law_categoriesT1 = law_categoriesT['law_content'].astype('str').apply(preprocessing) # # print(law_categoriesT1) # displayWordCloud(' '.join(law_categoriesT1)) # law_categoriesP = prec[prec['law_event_type'] == "형사"] # law_categoriesP1 = law_categoriesP['law_content'].astype('str').apply(preprocessing) # displayWordCloud(' '.join(law_categoriesP1)) noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(law_categoriesMin1) # 명사만 추출 nouns = noun_extractor.extract() # print(type(nouns)) # print(nouns) displayWordCloud(' '.join(nouns)) # displayWordCloud(' '.join(law_categoriesGa1))
def detail(m_no, current_movie_title): conn = pymysql.connect(host='127.0.0.1', user='******', password='******', db='movie', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) try: with conn.cursor() as cursor: sql = 'select * from current_movie c inner join test t on c.current_movie_title = t.title where current_movie_title = %s;' cursor.execute(sql, (current_movie_title)) result = cursor.fetchone() #하나만 가져올떄 sql = 'select * from current_movie where current_movie_title = %s;' cursor.execute(sql, (current_movie_title)) result1 = cursor.fetchone() #하나만 가져올떄 sql = 'select * from board where m_no= %s;' cursor.execute(sql, (m_no)) board = cursor.fetchall() finally: conn.close() if result is not None: tmrvl = [] movieName = result['codem'] for page in range(1, 200): url = "https://movie.naver.com/movie/bi/mi/review.nhn?code=" + str( movieName) + "&page=" + str(page) response = urllib.request.urlopen(url) soup = BeautifulSoup(response, 'html.parser') table = soup.select('ul.rvw_list_area li a') for result3 in table: mrv = str(result3.string) tmrv = tuple([mrv]) tmrvl.append(tmrv) #tmrv1=str(tmrv) #f.write(tmrv1) df = pd.DataFrame(tmrvl) def preprocessing(text): # 개행문자 제거 text = re.sub('\\\\n', ' ', text) return text tokenizer = RegexTokenizer() stopwords_kr = [ '하지만', '그리고', '그런데', '저는', '제가', '그럼', '이런', '저런', '합니다', '많은', '많이', '정말', '너무', '[', ']', '것으로', '했습니다', '했다' ] sentences = df[0].apply(preprocessing) # soynlp로 명사 추출하기 noun_extractor = LRNounExtractor(verbose=True) noun_extractor.train(sentences) nouns = noun_extractor.extract() # 이미지 파일위에 출력하기 img = Image.open('IT_Bank_Movie/static/img/cloud.png') img_array = np.array(img) wordcloud = WordCloud(font_path='/Library/Fonts/NanumBarunGothic.ttf', stopwords=stopwords_kr, background_color='white', mask=img_array, width=800, height=600).generate(' '.join(nouns)) plt.figure(figsize=(15, 10)) plt.imshow(wordcloud) plt.axis("off") #plt.show() url1 = "IT_Bank_Movie/static/wordcloud/" + current_movie_title + ".png" wordcloud.to_file(url1) return render_template('movie_detail.html', wordInfo=result, board=board, movieInfo=result1)