Example #1
0
    def get_df_news_word(self, filePath, encoding):
        df = handler.load_to_csv(filePath, encoding)
        contents = df['contents'].tolist()
        # times = df['time'].tolist()

        # print('list load end')

        total = []
        stopWords = [
            '크게', '여기', '서울', '정부', '위원회', '사업', '한국', '옵티머스', '의원', '금융감독원',
            '국회', '지난'
        ]

        okt = Okt()

        for content in contents:
            content = str(content)
            #     print(f'content : {content}')
            # noun = okt.nouns(content)
            morph = okt.pos(content)

            for word, tag in morph:
                if tag in ['Noun'] and len(word) > 1 and word not in stopWords:
                    total.append(word)

        count = Counter(total)
        noun_list = count.most_common(10)

        # print('noun_list load end')

        # for value in noun_list:
        #     print(value)

        # print('create wordclude')
        # wc = WordCloud(font_path='./font/NanumBarunGothic.ttf', background_color="white", width=1000, height=1000, max_words=50, max_font_size=300)
        # wc.generate_from_frequencies(dict(noun_list))

        # wc.to_file('wordCloud.png')

        colnames = ['word', 'count']

        if not Checker.check_folder_path('./csv'):
            handler.crete_folder('./csv')

        handler.save_to_csv('./csv/result_economy_news_word.csv', noun_list,
                            colnames, 'utf-8-sig')

        return pd.DataFrame(noun_list, columns=colnames)
    def frequency_naver_news(self, filePath, encoding):
        df = handler.load_to_csv(filePath, encoding)
        contents = df['content'].tolist()

        print('list load end')

        total = []

        okt = Okt()
        for content in contents:
            content = str(content)
            content = content.replace("\n","")
        #     print(f'content : {content}')
            noun = okt.nouns(content)
            
            for i,v in enumerate(noun):
                if len(v)<2:
                    noun.pop(i)
            
            total.extend(noun)
            
        count = Counter(total)
        noun_list = count.most_common(100)

        print('noun_list load end')

        for value in noun_list:
            print(value)

        colnames = ['word', 'count']

        return pd.DataFrame(noun_list, columns=colnames)
Example #3
0
    def get_df_news_word(self, filePath, encoding):
        df = handler.load_to_csv(filePath, encoding)
        contents = df['contents'].tolist()
        # times = df['time'].tolist()

        print('list load end')

        total = []
        stopWords = ['지난', '진자', '판정', '대통령', '위해', '지역', '사람', '관련', '이후', '대해', '개발', '올해', '당국', 
                     '경우', '국내', '때문', '조사', '최근', '이번', '확인', '증가', '진행', '통해', '신종', '지난달', '대상'
                     '단계', '우리', '상황', '현재', '조치']

        okt = Okt()

        for content in contents:
            content = str(content)
        #     print(f'content : {content}')
            # noun = okt.nouns(content)
            morph = okt.pos(content)
                                   
            for word, tag in morph:
                if tag in ['Noun'] and len(word) > 1 and word not in stopWords:
                    total.append(word)
            
        count = Counter(total)
        noun_list = count.most_common(30)

        # print('noun_list load end')

        if not Checker.check_folder_path('./csv'):
                handler.crete_folder('./csv')
            
        handler.save_to_csv('./csv/result_covid_news_word.csv', noun_list, ['word','count'], 'utf-8-sig')

        # for value in noun_list:
        #     print(value)

        # print('create wordclude')
        # wc = WordCloud(font_path='./font/NanumBarunGothic.ttf', background_color="white", width=1000, height=1000, max_words=50, max_font_size=300)
        # wc.generate_from_frequencies(dict(noun_list))

        # wc.to_file('wordCloud.png')

        colnames = ['word', 'count']
        
        return pd.DataFrame(noun_list, columns=colnames)
Example #4
0
    def get(self):

        econmoy_news_count = self.news_dao.count()

        if econmoy_news_count == 0:
            kdd = EconomyNewsKdd()
            urls = kdd.get_economy_news_urls()
            # print(datas)

            if not Checker.check_folder_path('./csv'):
                handler.crete_folder('./csv')
                    
            handler.save_to_csv('./csv/result_economy_urls.csv', urls, ['urls'], 'utf-8-sig')

            result_list = []
            thread_count = 6
            thread_list = []
            div_count = int(len(urls) / thread_count)   # 600

            for idx in range(0, thread_count):
                start_idx = idx * div_count
                end_idx = (start_idx + div_count)

                div_url = urls[int(start_idx):int(end_idx)]

                thread = threading.Thread(target=kdd.get_contents_from_economy_urls, args=(div_url, result_list))
                thread_list.append(thread)
                thread.start()
            
            for thread in thread_list:
                thread.join()

            if not Checker.check_folder_path('./csv'):
                handler.crete_folder('./csv')
            
            handler.save_to_csv('./csv/result_economy_news.csv', result_list, ['time','contents'], 'utf-8-sig')
            df = self.df.get_df_news(result_list)
            self.news_dao.save_data_bulk(df)
        
        econmoy_word_count = self.word_dao.count()
        if econmoy_word_count == 0:
            df = self.df.get_df_news_word('./csv/result_economy_news.csv', 'utf-8-sig')
            self.word_dao.save_data_bulk(df)
        
        result = self.word_dao.find_all()
        return jsonify([item.json for item in result])
Example #5
0
 def __init__(self):
     print('-----------FinanceDfo--------------')
     self.fileHandler = FileHandler()
Example #6
0
 def __init__(self):
     self.fileHandler = FileHandler()
Example #7
0
 def predict_hana(self, csv_path):
     datas = FileHandler.load_to_csv(csv_path, 'utf-8 sig')
     return self.predict_data(self.hana_model, datas)
Example #8
0
    def predict_hana_date(self, csv_path, date, repeat_count):
        data = FileHandler.load_to_csv(csv_path, 'utf-8 sig')
        pred_values, real_values, dates = self.predict_data_with_time(
            self.hana_model, data, date, repeat_count)

        return pred_values, real_values, dates
Example #9
0
 def predict_celltrion(self, csv_path):
     datas = FileHandler.load_to_csv(csv_path, 'utf-8 sig')
     return self.predict_data(self.celltrion_model, datas)
Example #10
0
    def get(self):

        params = request.get_json()
        keyword = None

        if params is not None:
            keyword = params['keyword']

        if keyword is not None:

            count = self.news_dao.count()

            if count == 0:
                crawer = CovidNewsKDD()
                print('get urls start')
                start_time = time.time()
                urls = crawer.get_naver_news_urls(keyword)
                print(
                    f'get urls end. processing time : {time.time() - start_time}s'
                )

                if not Checker.check_folder_path('./csv'):
                    handler.crete_folder('./csv')

                handler.save_to_csv('./csv/result_Covid19_urls.csv', urls,
                                    ['urls'], 'utf-8-sig')

                # url_df = handler.load_to_csv('./csv/result_Covid19_urls.csv', 'utf-8-sig')
                # urls = url_df['urls'].tolist()

                print('get contents from urls start')

                start_time = time.time()

                result_list = []
                thread_count = 5
                thread_list = []
                div_count = int(len(urls) / thread_count)  # 600

                for idx in range(0, thread_count):
                    start_idx = idx * div_count
                    end_idx = (start_idx + div_count)

                    div_url = urls[int(start_idx):int(end_idx)]

                    thread = threading.Thread(
                        target=crawer.get_contents_from_naver_urls,
                        args=(div_url, result_list))
                    thread_list.append(thread)
                    thread.start()

                for thread in thread_list:
                    thread.join()

                print(
                    f'get contents from urls end. processing time : {time.time() - start_time}s'
                )

                if not Checker.check_folder_path('./csv'):
                    handler.crete_folder('./csv')

                handler.save_to_csv('./csv/result_covid19_news.csv',
                                    result_list, ['time', 'contents'],
                                    'utf-8-sig')

                df = self.df.get_df_news(result_list)
                # df = handler.load_to_csv('./csv/result_Covid19_News.csv', 'utf-8-sig')
                # print(df)
                # print(df.isnull().values.any())
                # counter = df['contents'].isnull().sum()
                # print(f'contents is non : {counter}')
                # counter = df['time'].isnull().sum()
                # print(f'time is non : {counter}')
                self.news_dao.save_data_bulk(df)

            wordcount = self.word_dao.count()

            if wordcount == 0:
                df = self.df.get_df_news_word('./csv/result_covid19_news.csv',
                                              'utf-8-sig')
                # print(df)

                self.word_dao.save_data_bulk(df)

            result = self.word_dao.find_all()
            return jsonify([item.json for item in result])
            # result = self.news_dao.find_all()
            # return jsonify(json_list=[item.json for item in result])

        else:
            return {f'message': 'keyword is Empty. check keyword'}
Example #11
0
 def __init__(self):
     print('-----------ExchangeDfo--------------')
     self.fileHandler = FileHandler()
Example #12
0
    def get(self, stockName: str, date: str):

        now = datetime.datetime.now()
        date_obj = None

        try:
            date_obj = datetime.datetime.strptime(date, '%Y%m%d')
        except:
            return {'message': f'This date is not valid. date : {date}'}

        if date_obj > now:
            return {'message': f'This date is not valid. date : {date}'}
        else:

            date = date_obj.strftime('%Y-%m-%d')
            pred_values = []
            real_values = []
            dates = []

            if stockName == "삼성전자":
                pred_values, real_values, dates = self.ai.predict_samsung_date(
                    './csv/samsung.csv', date, 5)
            elif stockName == "셀트리온":
                pred_values, real_values, dates = self.ai.predict_samsung_date(
                    './csv/celltrion.csv', date, 5)
            elif stockName == "하나투어":
                pred_values, real_values, dates = self.ai.predict_samsung_date(
                    './csv/hana.csv', date, 5)
            else:
                return {
                    'message': f'unknown Stock Name. Stock Name : {stockName}'
                }

            pred_values.reverse()
            real_values.reverse()
            dates.reverse()

            millis = int(round(time.time() * 1000))

            if not Checker.check_folder_path('./plt'):
                FileHandler.crete_folder('./plt')

            if len(pred_values) != 0 and len(real_values) != 0 and len(
                    dates) != 0:

                plt.figure(facecolor='white', figsize=(20, 10))
                plt.plot(dates, pred_values)
                plt.plot(dates, real_values)

                plt.xlabel('date')
                plt.ylabel('value')

                plt.legend(['Pred value', 'True_value'])
                plt.savefig(f'./plt/fredictDate_{millis}.png', dpi=600)
                # plt.show()
                plt.close()

                return_data = []

                for idx in range(0, len(pred_values)):
                    return_data.append({
                        'real': real_values[idx],
                        'pred': pred_values[idx],
                        'date': dates[idx]
                    })

                return {
                    'img':
                    Checker.get_abs_path(f'./plt/fredictDate_{millis}.png'),
                    'datas': return_data
                }
            else:
                return {
                    'message':
                    f'Not Include Date In Stock data. Stock Name : {stockName}, date : {date}'
                }
Example #13
0
 def __init__(self):
     print('-----------emotionDfo--------------')
     print(keyword)
     self.fileReader = FileHandler()