Example #1
0
def run_twitter(news):
    twitter = Twitter()
    start_time = time.time()
    print('twitter 시작')
    #     twitter_morphs = twitter.morphs(news)
    twitter_nouns = twitter.nouns(news)
    #     twitter_pos = twitter.pos(news)
    end_time = time.time()
    #     print(twitter_pos)
    print('twitter 끝 - %s 초' % str(end_time - start_time))

    with open('twitter_noun.txt', 'w', encoding='utf-8') as fstream:
        #         fstream.write('twitter time : %s s\n' % str(end_time - start_time) )
        #         fstream.write('twitter_morphs\n')
        #         write_list(twitter_morphs, fstream)
        #         fstream.write('\n\n')
        #
        fstream.write('twitter_nouns\n')
        write_list(twitter_nouns, fstream)
        fstream.write('\n\n')
import time
import string
import datetime
import csv
from ckonlpy.tag import Twitter
from selenium import webdriver
from bs4 import BeautifulSoup

driver = webdriver.Chrome("c:/Users/yooat/Downloads/chromedriver/chromedriver")

driver.get('http://www.cheonan.go.kr/covid19/sub02_01.do')
time.sleep(1)
twitter = Twitter()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
name = soup.find_all("dl",class_="item")
f1 = open('corona.txt','w+t')

for test in name:
    if  "14일이" in test.get_text():
        continue
    f1.write(test.get_text() + "\n")
f1.close();

f1 = open('corona.txt','r')
nowDate = datetime.datetime.now()
c = csv.writer(open(nowDate.strftime("result_" + "%Y-%m-%d_%H-%M-%S") + ".csv","w",encoding="cp949"))
for l in f1:
    c.writerow(twitter.nouns(l))
time.sleep(3)
Example #3
0
class PreprocessingText:
    def help(self):
        print("******PreprocessingText******")
        print("1) make_content_re(df['컬럼이름'](Series)) : 입력받은 열을 전처리 후 시리즈로 반환")
        print("2) add_noun_dict('list') : 명사 사전에 단어 추가")
        print("3) add_stopwords('list') : 불용어 사전에 단어 추가")
        print("4) tokenize(df['컬럼이름'](Series)) : 입력받은 열을 토큰화한 후 시리즈로 반환")
        print(
            "5) change_similar_words(토큰화된 문서(Series), 유의어 사전(dictionary)) : 유의어 사전을 기반으로 문서 내 유의어를 대표어로 변환하고, 변환된 문서를 시리즈로 반환한다."
        )
        print("*****************************")

    def __init__(self):
        self.reg_reporter = re.compile('[가-힣]+\s[가-힣]*기자')  # 기자
        self.reg_email = re.compile(
            '[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$')  # 이메일
        self.reg_eng = re.compile('[a-z]+')  # 소문자 알파벳, 이메일 제거용, 대문자는 남겨둔다
        self.reg_chi = re.compile("[\u4e00-\u9fff]+")  # 한자
        self.reg_sc = re.compile(
            "·|…|◆+|◇+|▶+|●+|▲+|“|”|‘|’|\"|\'|\(|\)|\W+")  # 특수문자
        self.reg_date = re.compile(
            '\d+일|\d+월|\d+년|\d+시|\d+분|\(현지시간\)|\(현지시각\)|\d+')  # 날짜,시간,숫자

        self.twitter_obj = Twitter()
        self.stopwords = []
        self.noun_list = []

    def preprocessing(self, doc):
        tmp = re.sub(self.reg_reporter, '', doc)
        tmp = re.sub(self.reg_email, '', tmp)
        tmp = re.sub(self.reg_eng, '', tmp)
        tmp = re.sub(self.reg_chi, '', tmp)
        tmp = re.sub(self.reg_sc, ' ', tmp)
        tmp = re.sub(self.reg_date, '', tmp)
        return tmp

    def make_content_re(self, data):
        pp_data = data.apply(self.preprocessing)
        return pp_data

    def add_noun_dict(self, noun_list):
        self.twitter_obj.add_dictionary(noun_list, 'Noun')
        self.noun_list.extend(noun_list)
        print("추가한 명사")
        print(noun_list)

    def add_stopwords(self, stopword_list):
        self.stopwords.extend(stopword_list)
        print("추가한 불용어")
        print(stopword_list)

    def change_similar_words(self, tokenized_docs, similar_words_dict):
        changed_docs = []
        for doc in tokenized_docs:
            changed_doc = []
            for word in doc:
                if word in similar_words_dict.keys():
                    changed_doc.append(similar_words_dict[word])
                else:
                    changed_doc.append(word)
            changed_docs.append(changed_doc)
        return changed_docs

    def tokenize(self, data):
        print('추가한 명사:', self.noun_list)
        print('불용어: ', self.stopwords)
        tokenized_doc = data.apply(lambda x: self.twitter_obj.nouns(x))
        tokenized_doc_without_stopwords = tokenized_doc.apply(
            lambda x:
            [item.lower() for item in x if item not in self.stopwords])
        tokenized_data = tokenized_doc_without_stopwords
        return pd.Series(tokenized_data)
Example #4
0
def naver():
    from selenium import webdriver
    import re
    from selenium.webdriver.common.keys import Keys
    import time
    cr_name = 'naver'
    # 이미지파일 저장 장소 확인
    save_path = os.path.join(Main.img_path, cr_name)
    if os.path.isdir(save_path):
        print(cr_name + ' 이미지 경로 확인 완료')
    elif os.path.isdir(Main.img_path):
        os.mkdir(save_path)
    else:
        os.mkdir(Main.img_path)
        os.mkdir(save_path)

    text_save_path = os.path.join(Main.text_path, cr_name)
    if os.path.isdir(text_save_path):
        print(cr_name + ' 텍스트 경로 확인 완료')
    elif os.path.isdir(Main.text_path):
        os.mkdir(text_save_path)
    else:
        os.mkdir(Main.text_path)
        os.mkdir(text_save_path)

    # 네이버 헤드라인 가져오는소스

    date = time.strftime('%Y%m%d', time.localtime(time.time()))
    date2 = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))

    result = []
    res = []

    # 웹 셋팅
    chrome = chromedriver.generate_chrome(driver_path=Main.driver_path,
                                          headless=Main.headless,
                                          download_path=Main.DOWNLOAD_DIR)

    # 웹접속 - 네이버 이미지 접속
    print("Naver 접속중")
    # driver = webdriver.Chrome(executable_path="./chromedriver.exe")
    # driver.implicitly_wait(30)

    url = 'https://news.naver.com/main/ranking/popularDay.nhn?rankingType=popular_day&date={}'.format(
        date)
    chrome.get(url)
    time.sleep(2)

    # scroll(3)
    for sun in range(4, 10):
        pr = chrome.find_elements_by_xpath(
            '//*[@id="wrap"]/table/tbody/tr/td[2]/div/div[{}]'.format(sun))
        for p in pr:
            result.append(p.find_elements_by_tag_name('a'))
        # print(result)

        for i, q in enumerate(result):
            for e in q:
                res.append(e.get_attribute('href'))
    http = list(set(res))
    len(http)
    https = []

    for idx in range(len(http)):
        if http[idx].find('popularDay') >= 0:
            continue
        else:
            https.append(http[idx])

    files = pd.DataFrame()

    for i in range(len(https)):
        res = requests.get(https[i])
        soup = BeautifulSoup(res.content, 'html.parser')
        body = soup.select('._article_body_contents')
        files = files.append(
            pd.DataFrame(
                {
                    'Title':
                    soup.find('div', attrs={
                        'class': 'article_info'
                    }).h3.text,
                    'Contents':
                    re.sub(
                        '   ', '',
                        re.sub(
                            '    ', '',
                            re.sub(
                                '\t', '',
                                cleanText(body[0].text)
                                [(cleanText(body[0].text)).find('{}') + 2:]))),
                    'link':
                    https[i]
                },
                index=[i]))

    text2 = files.Contents
    # 텍스트파일에 저장 csv
    files.to_csv(text_save_path + '/네이버종합뉴스_{}.csv'.format(date2),
                 index=False,
                 encoding='utf-8')

    # -------------------------------------

    # 사전만들기
    from ckonlpy.tag import Twitter
    t = Twitter()
    t.add_dictionary(Main.sajun(), 'Noun')

    import nltk
    tokens_ko = []

    for i in range(len(text2)):
        tokens_ko.append(t.nouns(text2[i]))

    final = []
    for _, q in enumerate(tokens_ko):
        for i in range(len(q)):
            final.insert(-1, q[i])

    ko = nltk.Text(final, name="첫번째")
    data = ko.vocab().most_common(1000)

    data_1 = []
    for i in range(len(data)):
        for q in range(0, 1, 1):
            if len(data[i][0]) >= 2:
                data_1.append(data[i])

    from wordcloud import WordCloud
    import matplotlib.pyplot as plt

    import time
    date = time.strftime('%Y%m%d', time.localtime(time.time()))
    date2 = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))

    tmp_data = dict(data_1)

    wordcloud = WordCloud(font_path='/Library/Fonts/NanumMyeongjo.ttf',
                          background_color='white',
                          max_words=230).generate_from_frequencies(tmp_data)
    plt.figure(figsize=(10, 8))
    plt.imshow(wordcloud)
    plt.axis('off'), plt.xticks([]), plt.yticks([])
    plt.tight_layout()
    plt.subplots_adjust(left=0, bottom=0, right=1, top=1, hspace=0, wspace=0)
    plt.savefig(save_path + "/naver_{}.png".format(date),
                bbox_inces='tight',
                dpi=400,
                pad_inches=0)
def comm_date(comm_name, dates_array):
    for dates in dates_array:
        client = MongoClient('mongodb://*****:*****@\·\"\"\%\,\(\)\&]+', ' ',
                          text)
            text = re.sub('[\n\xa0\r]+', ' ', text)

            # 토큰화
            token = twitter.nouns(text)  # 명사만

            if token != []:
                tokened_texts.extend(token)

            print(dates, i, '/', len(idate_with_all))

        pickle_name = str(comm_name) + str(dates)
        with open(pickle_name, "wb") as fw:
            pickle.dump(tokened_texts, fw)
        print('저장완료')
Example #6
0
def twitter():
    cr_name = 'twitter'
    # 이미지파일 저장 장소 확인
    save_path = os.path.join(Main.img_path, cr_name)
    if os.path.isdir(save_path):
        print(cr_name + ' 이미지 경로 확인 완료')
    elif os.path.isdir(Main.img_path):
        os.mkdir(save_path)
    else:
        os.mkdir(Main.img_path)
        os.mkdir(save_path)

    text_save_path = os.path.join(Main.text_path, cr_name)
    if os.path.isdir(text_save_path):
        print(cr_name + ' 텍스트 경로 확인 완료')
    elif os.path.isdir(Main.text_path):
        os.mkdir(text_save_path)
    else:
        os.mkdir(Main.text_path)
        os.mkdir(text_save_path)


    import time
    import nltk
    keyword = Main.text()

    # 웹 셋팅
    chrome = chromedriver.generate_chrome(
        driver_path=Main.driver_path,
        headless=Main.headless,
        download_path=Main.DOWNLOAD_DIR)

    # 웹접속 - 네이버 이미지 접속
    print("Twitter 접속중")
    # driver = webdriver.Chrome(executable_path="./chromedriver.exe")
    # driver.implicitly_wait(30)

    url = 'https://twitter.com/search?q={}&src=typed_query'.format(keyword)
    chrome.get(url)
    time.sleep(3)


    # text2 = chrome.find_elements_by_css_selector('#react-root > div > div > div > main > div > div > div > div > div > div:nth-child(2) > div')


    # for i in range(15):
    #     for q in range(3):
    #         body = chrome.find_element_by_css_selector('body')
    #         body.send_keys(Keys.PAGE_DOWN)
    #         time.sleep(1)
    #     for ttt in tqdm(text2):
    #         result.append(ttt.text)
    #     time.sleep(1)
    #
    #
    # result2 = []
    # for i in range(len(result)):
    #     if i % 2 == 0:
    #         result2.append(result[i])
    # print(len(result2))
    #
    # result3 = []
    # for i in range(len(result2)):
    #     result3.append(cleanText(result2[i]))

    body = chrome.find_element_by_css_selector('body')
    text2 = chrome.find_elements_by_css_selector('#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(2) > div > div > section > div')

    for i in range(10):
        for q in range(3):
            body.send_keys(Keys.PAGE_DOWN)
            time.sleep(1)
        for ttt in tqdm(text2):
            result.append(re.sub('\n', '', ttt.text))

    t = Twitter()
    t.add_dictionary(Main.sajun(), 'Noun')

    tokens_ko = []

    for i in range(len(result)):
        tokens_ko.append(t.nouns(result[i]))
    final = []
    for _, q in enumerate(tokens_ko):
        for i in range(len(q)):
            final.insert(-1, q[i])

    ko = nltk.Text(final, name="첫번째")
    data = ko.vocab().most_common(1000)
    date = time.strftime('%Y%m%d', time.localtime(time.time()))
    date2 = time.strftime('%Y%m%d_%H%M', time.localtime(time.time()))


    # 텍스트파일에 댓글 저장하기
    file = open(text_save_path+'/twitter{}.txt'.format(date2), 'w', encoding='utf-8')

    for review in result:
        file.write(review + '\n')

    file.close()

    tmp_data = dict(data)

    wordcloud = WordCloud(font_path='/Library/Fonts/NanumMyeongjo.ttf',
                          background_color='white', max_words=230).generate_from_frequencies(tmp_data)
    plt.figure(figsize=(10, 8))
    plt.imshow(wordcloud)
    plt.axis('off'), plt.xticks([]), plt.yticks([])
    plt.tight_layout()
    plt.subplots_adjust(left=0, bottom=0, right=1, top=1, hspace=0, wspace=0)
    plt.savefig(save_path+"/twitter_{}.png".format(date), bbox_inces='tight', dpi=400, pad_inches=0)
Example #7
0
    def Daum(self):
        cr_name = 'daum'
        # 이미지파일 저장 장소 확인
        save_path = os.path.join(self.img_path, cr_name)
        if os.path.isdir(save_path):
            print(cr_name + ' 이미지 경로 확인 완료')
        elif os.path.isdir(self.img_path):
            os.mkdir(save_path)
        else:
            os.mkdir(self.img_path)
            os.mkdir(save_path)

        text_save_path = os.path.join(self.text_path, cr_name)
        if os.path.isdir(text_save_path):
            print(cr_name + ' 텍스트 경로 확인 완료')
        elif os.path.isdir(self.text_path):
            os.mkdir(text_save_path)
        else:
            os.mkdir(self.text_path)
            os.mkdir(text_save_path)

        # 다음뉴스 헤드라인 긁어오기
        http = []
        print('Daum 접속 중')
        httz = 'https://media.daum.net/ranking/popular/?regDate={}'.format(
            self.date)
        res = requests.get(httz)
        soup = BeautifulSoup(res.content, 'html.parser')
        body = soup.select('#mArticle > div.rank_news > ul.list_news2')
        body = body[0].find_all('a')

        for i in range(len(body)):
            t = body[i].get('href')
            http.append(t)

        # 중복제거
        http = list(set(http))

        files = pd.DataFrame()
        for i in range(len(http)):
            res = requests.get(http[i])
            soup = BeautifulSoup(res.content, 'html.parser')
            body = soup.select('.article_view')[0]

            files = files.append(
                pd.DataFrame(
                    {
                        'Title':
                        soup.find('div', attrs={
                            'class': 'head_view'
                        }).h3.text,
                        'Contents':
                        " ".join(p.get_text() for p in body.find_all('p')),
                        'link':
                        http[i]
                    },
                    index=[i]))
        text2 = files.Contents

        # 텍스트파일에 댓글 저장하기
        files.to_csv(text_save_path + '/다음뉴스종합_{}.csv'.format(self.date),
                     index=False,
                     encoding='utf-8')
        print('다음 텍스트 저장완료!')

        t = Twitter()
        t.add_dictionary(self.sajun(), 'Noun')
        print('형태소 사전 업로드 완료!!')

        tokens_ko = []

        for i in range(len(text2)):
            tokens_ko.append(t.nouns(text2[i]))

        final = []
        for _, q in enumerate(tokens_ko):
            for i in range(len(q)):
                final.insert(-1, q[i])

        ko = nltk.Text(final, name="첫번째")
        data = ko.vocab().most_common(1000)
        print('nltk 완료')

        # 다음뉴스는 50페이지 긁어오는거라서 1글자는 삭제했음. 필요한건 바로바로 보고서 사전에 추가해서 태깅 다시해야함.
        data_1 = []
        for i in range(len(data)):
            for q in range(0, 1, 1):
                if len(data[i][0]) >= 2:
                    data_1.append(data[i])

        tmp_data = dict(data_1)
        print('wordcloud 실행')
        wordcloud = WordCloud(
            font_path=self.fontPath, background_color='white',
            max_words=230).generate_from_frequencies(tmp_data)
        print('wordcloud 실행!!!')
        plt.figure(figsize=(10, 8))
        plt.imshow(wordcloud)
        plt.axis('off'), plt.xticks([]), plt.yticks([])
        plt.tight_layout()
        plt.subplots_adjust(left=0,
                            bottom=0,
                            right=1,
                            top=1,
                            hspace=0,
                            wspace=0)
        plt.savefig(save_path + "/daum_{}.png".format(self.date),
                    bbox_inces='tight',
                    dpi=400,
                    pad_inches=0)
Example #8
0
    def Naver(self):
        cr_name = 'naver'
        # 이미지파일 저장 장소 확인
        save_path = os.path.join(self.img_path, cr_name)
        if os.path.isdir(save_path):
            print(cr_name + ' 이미지 경로 확인 완료')
        elif os.path.isdir(self.img_path):
            os.mkdir(save_path)
        else:
            os.mkdir(self.img_path)
            os.mkdir(save_path)

        text_save_path = os.path.join(self.text_path, cr_name)
        if os.path.isdir(text_save_path):
            print(cr_name + ' 텍스트 경로 확인 완료')
        elif os.path.isdir(self.text_path):
            os.mkdir(text_save_path)
        else:
            os.mkdir(self.text_path)
            os.mkdir(text_save_path)

        # 네이버 헤드라인 가져오는소스

        result = []
        res = []

        # 웹 셋팅
        if self.platform == 'linux':

            display = Display(visible=0, size=(800, 600))
            display.start()

            options = Options()
            options.binary_location = "/usr/bin/google-chrome"

            # chrome_options = webdriver.ChromeOptions()
            options.headless = True
            options.add_argument('--headless')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-gpu')
            options.add_argument('--disable-dev-shm-usage')

            chrome = webdriver.Chrome(executable_path=self.driver_path,
                                      options=options)
        else:
            chrome = self.generate_chrome(driver_path=self.driver_path,
                                          headless=self.headless,
                                          download_path=self.DOWNLOAD_DIR)

        # 웹접속 - 네이버 이미지 접속
        print("Naver 접속중")
        # driver = webdriver.Chrome(executable_path="./chromedriver.exe")
        # driver.implicitly_wait(30)

        url = 'https://news.naver.com/main/ranking/popularDay.nhn?rankingType=popular_day&date={}'.format(
            self.date)
        chrome.get(url)
        chrome.implicitly_wait(30)

        # scroll(3)
        for sun in range(4, 10):
            pr = chrome.find_elements_by_xpath(
                '//*[@id="wrap"]/table/tbody/tr/td[2]/div/div[{}]'.format(sun))
            for p in pr:
                result.append(p.find_elements_by_tag_name('a'))
            # print(result)

            for i, q in enumerate(result):
                for e in q:
                    res.append(e.get_attribute('href'))
        http = list(set(res))
        len(http)
        https = []

        for idx in range(len(http)):
            if http[idx].find('popularDay') >= 0:
                continue
            else:
                https.append(http[idx])

        files = pd.DataFrame()

        if self.platform == 'linux':
            chrome.close()
            display.stop()

        for i in range(len(https)):
            res = requests.get(https[i])
            soup = BeautifulSoup(res.content, 'html.parser')
            body = soup.select('._article_body_contents')
            files = files.append(
                pd.DataFrame(
                    {
                        'Title':
                        soup.find('div', attrs={
                            'class': 'article_info'
                        }).h3.text,
                        'Contents':
                        re.sub(
                            '   ', '',
                            re.sub(
                                '    ', '',
                                re.sub(
                                    '\t', '',
                                    self.cleanText(body[0].text)[
                                        (self.cleanText(body[0].text)
                                         ).find('{}') + 2:]))),
                        'link':
                        https[i]
                    },
                    index=[i]))

        text2 = files.Contents
        # 텍스트파일에 저장 csv
        files.to_csv(text_save_path + '/네이버종합뉴스_{}.csv'.format(self.date),
                     index=False,
                     encoding='utf-8')

        # -------------------------------------

        # 사전만들기
        t = Twitter()
        t.add_dictionary(self.sajun(), 'Noun')

        tokens_ko = []

        for i in range(len(text2)):
            tokens_ko.append(t.nouns(text2[i]))

        final = []
        for _, q in enumerate(tokens_ko):
            for i in range(len(q)):
                final.insert(-1, q[i])

        ko = nltk.Text(final, name="첫번째")
        data = ko.vocab().most_common(1000)

        data_1 = []
        for i in range(len(data)):
            for q in range(0, 1, 1):
                if len(data[i][0]) >= 2:
                    data_1.append(data[i])

        tmp_data = dict(data_1)

        wordcloud = WordCloud(
            font_path=self.fontPath, background_color='white',
            max_words=230).generate_from_frequencies(tmp_data)
        plt.figure(figsize=(10, 8))
        plt.imshow(wordcloud)
        plt.axis('off'), plt.xticks([]), plt.yticks([])
        plt.tight_layout()
        plt.subplots_adjust(left=0,
                            bottom=0,
                            right=1,
                            top=1,
                            hspace=0,
                            wspace=0)
        plt.savefig(save_path + "/naver_{}.png".format(self.date),
                    bbox_inces='tight',
                    dpi=400,
                    pad_inches=0)
Example #9
0
    def twitter(self):
        cr_name = 'twitter'
        # 이미지파일 저장 장소 확인
        save_path = os.path.join(self.img_path, cr_name)
        if os.path.isdir(save_path):
            print(cr_name + ' 이미지 경로 확인 완료')
        elif os.path.isdir(self.img_path):
            os.mkdir(save_path)
        else:
            os.mkdir(self.img_path)
            os.mkdir(save_path)

        text_save_path = os.path.join(self.text_path, cr_name)
        if os.path.isdir(text_save_path):
            print(cr_name + ' 텍스트 경로 확인 완료')
        elif os.path.isdir(self.text_path):
            os.mkdir(text_save_path)
        else:
            os.mkdir(self.text_path)
            os.mkdir(text_save_path)
        keyword = self.scan_name

        # if self.platform == 'linux':
        #     print('System platform : Linux')
        #     self.driver_path = './static/lib/webDriver/chromedriver_lnx'
        #     from pyvirtualdisplay import Display
        #     self.display = Display(visible=0, size=(800, 600))
        #     self.display.start()
        # 웹 셋팅
        if self.platform == 'linux':

            display = Display(visible=0, size=(1024, 768))
            display.start()

            options = Options()
            options.binary_location = "/usr/bin/google-chrome"

            # chrome_options = webdriver.ChromeOptions()
            options.headless = True
            options.add_argument('--headless')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-gpu')
            options.add_argument('--disable-dev-shm-usage')

            chrome = webdriver.Chrome(executable_path=self.driver_path,
                                      options=options)
        else:
            chrome = self.generate_chrome(driver_path=self.driver_path,
                                          headless=self.headless,
                                          download_path=self.DOWNLOAD_DIR)

        # 웹접속 - 네이버 이미지 접속
        print("Twitter 접속중")
        # driver = webdriver.Chrome(executable_path="./chromedriver.exe")
        # driver.implicitly_wait(30)

        url = 'https://twitter.com/search?q={}&src=typed_query'.format(keyword)
        chrome.get(url)
        chrome.implicitly_wait(30)

        body = chrome.find_element_by_css_selector('body')
        text2 = chrome.find_elements_by_css_selector(
            '#react-root > div > div > div.css-1dbjc4n.r-18u37iz.r-13qz1uu.r-417010 > main > div > div > div > div > div > div:nth-child(2) > div > div > section > div'
        )
        result = []

        for i in range(10):
            for q in range(3):
                body.send_keys(Keys.PAGE_DOWN)
                time.sleep(1)
            for ttt in text2:
                result.append(re.sub('\n', '', ttt.text))
        print(result)

        time.sleep(1)
        if self.platform == 'linux':
            chrome.close()
            display.stop()

        t = Twitter()

        t.add_dictionary(self.sajun(), 'Noun')
        print('단어사전 추출완료')
        tokens_ko = []

        for i in range(len(result)):
            tokens_ko.append(t.nouns(result[i]))

        final = []
        for _, q in enumerate(tokens_ko):
            for i in range(len(q)):
                final.insert(-1, q[i])
        print('형태소분석 완료!')
        ko = nltk.Text(final, name="첫번째")
        data = ko.vocab().most_common(1000)

        # 텍스트파일에 댓글 저장하기
        file = open(text_save_path + '/twitter{}.txt'.format(self.date),
                    'w',
                    encoding='utf-8')

        for review in result:
            file.write(review + '\n')

        file.close()

        tmp_data = dict(data)

        wordcloud = WordCloud(
            font_path=self.fontPath, background_color='white',
            max_words=230).generate_from_frequencies(tmp_data)
        plt.figure(figsize=(10, 8))
        plt.imshow(wordcloud)
        plt.axis('off'), plt.xticks([]), plt.yticks([])
        plt.tight_layout()
        plt.subplots_adjust(left=0,
                            bottom=0,
                            right=1,
                            top=1,
                            hspace=0,
                            wspace=0)
        plt.savefig(save_path + "/twitter_{}.png".format(self.date),
                    bbox_inces='tight',
                    dpi=400,
                    pad_inches=0)