def get_df_news_word(self, filePath, encoding): df = handler.load_to_csv(filePath, encoding) contents = df['contents'].tolist() # times = df['time'].tolist() # print('list load end') total = [] stopWords = [ '크게', '여기', '서울', '정부', '위원회', '사업', '한국', '옵티머스', '의원', '금융감독원', '국회', '지난' ] okt = Okt() for content in contents: content = str(content) # print(f'content : {content}') # noun = okt.nouns(content) morph = okt.pos(content) for word, tag in morph: if tag in ['Noun'] and len(word) > 1 and word not in stopWords: total.append(word) count = Counter(total) noun_list = count.most_common(10) # print('noun_list load end') # for value in noun_list: # print(value) # print('create wordclude') # wc = WordCloud(font_path='./font/NanumBarunGothic.ttf', background_color="white", width=1000, height=1000, max_words=50, max_font_size=300) # wc.generate_from_frequencies(dict(noun_list)) # wc.to_file('wordCloud.png') colnames = ['word', 'count'] if not Checker.check_folder_path('./csv'): handler.crete_folder('./csv') handler.save_to_csv('./csv/result_economy_news_word.csv', noun_list, colnames, 'utf-8-sig') return pd.DataFrame(noun_list, columns=colnames)
def frequency_naver_news(self, filePath, encoding): df = handler.load_to_csv(filePath, encoding) contents = df['content'].tolist() print('list load end') total = [] okt = Okt() for content in contents: content = str(content) content = content.replace("\n","") # print(f'content : {content}') noun = okt.nouns(content) for i,v in enumerate(noun): if len(v)<2: noun.pop(i) total.extend(noun) count = Counter(total) noun_list = count.most_common(100) print('noun_list load end') for value in noun_list: print(value) colnames = ['word', 'count'] return pd.DataFrame(noun_list, columns=colnames)
def get_df_news_word(self, filePath, encoding): df = handler.load_to_csv(filePath, encoding) contents = df['contents'].tolist() # times = df['time'].tolist() print('list load end') total = [] stopWords = ['지난', '진자', '판정', '대통령', '위해', '지역', '사람', '관련', '이후', '대해', '개발', '올해', '당국', '경우', '국내', '때문', '조사', '최근', '이번', '확인', '증가', '진행', '통해', '신종', '지난달', '대상' '단계', '우리', '상황', '현재', '조치'] okt = Okt() for content in contents: content = str(content) # print(f'content : {content}') # noun = okt.nouns(content) morph = okt.pos(content) for word, tag in morph: if tag in ['Noun'] and len(word) > 1 and word not in stopWords: total.append(word) count = Counter(total) noun_list = count.most_common(30) # print('noun_list load end') if not Checker.check_folder_path('./csv'): handler.crete_folder('./csv') handler.save_to_csv('./csv/result_covid_news_word.csv', noun_list, ['word','count'], 'utf-8-sig') # for value in noun_list: # print(value) # print('create wordclude') # wc = WordCloud(font_path='./font/NanumBarunGothic.ttf', background_color="white", width=1000, height=1000, max_words=50, max_font_size=300) # wc.generate_from_frequencies(dict(noun_list)) # wc.to_file('wordCloud.png') colnames = ['word', 'count'] return pd.DataFrame(noun_list, columns=colnames)
def get(self): econmoy_news_count = self.news_dao.count() if econmoy_news_count == 0: kdd = EconomyNewsKdd() urls = kdd.get_economy_news_urls() # print(datas) if not Checker.check_folder_path('./csv'): handler.crete_folder('./csv') handler.save_to_csv('./csv/result_economy_urls.csv', urls, ['urls'], 'utf-8-sig') result_list = [] thread_count = 6 thread_list = [] div_count = int(len(urls) / thread_count) # 600 for idx in range(0, thread_count): start_idx = idx * div_count end_idx = (start_idx + div_count) div_url = urls[int(start_idx):int(end_idx)] thread = threading.Thread(target=kdd.get_contents_from_economy_urls, args=(div_url, result_list)) thread_list.append(thread) thread.start() for thread in thread_list: thread.join() if not Checker.check_folder_path('./csv'): handler.crete_folder('./csv') handler.save_to_csv('./csv/result_economy_news.csv', result_list, ['time','contents'], 'utf-8-sig') df = self.df.get_df_news(result_list) self.news_dao.save_data_bulk(df) econmoy_word_count = self.word_dao.count() if econmoy_word_count == 0: df = self.df.get_df_news_word('./csv/result_economy_news.csv', 'utf-8-sig') self.word_dao.save_data_bulk(df) result = self.word_dao.find_all() return jsonify([item.json for item in result])
def __init__(self): print('-----------FinanceDfo--------------') self.fileHandler = FileHandler()
def __init__(self): self.fileHandler = FileHandler()
def predict_hana(self, csv_path): datas = FileHandler.load_to_csv(csv_path, 'utf-8 sig') return self.predict_data(self.hana_model, datas)
def predict_hana_date(self, csv_path, date, repeat_count): data = FileHandler.load_to_csv(csv_path, 'utf-8 sig') pred_values, real_values, dates = self.predict_data_with_time( self.hana_model, data, date, repeat_count) return pred_values, real_values, dates
def predict_celltrion(self, csv_path): datas = FileHandler.load_to_csv(csv_path, 'utf-8 sig') return self.predict_data(self.celltrion_model, datas)
def get(self): params = request.get_json() keyword = None if params is not None: keyword = params['keyword'] if keyword is not None: count = self.news_dao.count() if count == 0: crawer = CovidNewsKDD() print('get urls start') start_time = time.time() urls = crawer.get_naver_news_urls(keyword) print( f'get urls end. processing time : {time.time() - start_time}s' ) if not Checker.check_folder_path('./csv'): handler.crete_folder('./csv') handler.save_to_csv('./csv/result_Covid19_urls.csv', urls, ['urls'], 'utf-8-sig') # url_df = handler.load_to_csv('./csv/result_Covid19_urls.csv', 'utf-8-sig') # urls = url_df['urls'].tolist() print('get contents from urls start') start_time = time.time() result_list = [] thread_count = 5 thread_list = [] div_count = int(len(urls) / thread_count) # 600 for idx in range(0, thread_count): start_idx = idx * div_count end_idx = (start_idx + div_count) div_url = urls[int(start_idx):int(end_idx)] thread = threading.Thread( target=crawer.get_contents_from_naver_urls, args=(div_url, result_list)) thread_list.append(thread) thread.start() for thread in thread_list: thread.join() print( f'get contents from urls end. processing time : {time.time() - start_time}s' ) if not Checker.check_folder_path('./csv'): handler.crete_folder('./csv') handler.save_to_csv('./csv/result_covid19_news.csv', result_list, ['time', 'contents'], 'utf-8-sig') df = self.df.get_df_news(result_list) # df = handler.load_to_csv('./csv/result_Covid19_News.csv', 'utf-8-sig') # print(df) # print(df.isnull().values.any()) # counter = df['contents'].isnull().sum() # print(f'contents is non : {counter}') # counter = df['time'].isnull().sum() # print(f'time is non : {counter}') self.news_dao.save_data_bulk(df) wordcount = self.word_dao.count() if wordcount == 0: df = self.df.get_df_news_word('./csv/result_covid19_news.csv', 'utf-8-sig') # print(df) self.word_dao.save_data_bulk(df) result = self.word_dao.find_all() return jsonify([item.json for item in result]) # result = self.news_dao.find_all() # return jsonify(json_list=[item.json for item in result]) else: return {f'message': 'keyword is Empty. check keyword'}
def __init__(self): print('-----------ExchangeDfo--------------') self.fileHandler = FileHandler()
def get(self, stockName: str, date: str): now = datetime.datetime.now() date_obj = None try: date_obj = datetime.datetime.strptime(date, '%Y%m%d') except: return {'message': f'This date is not valid. date : {date}'} if date_obj > now: return {'message': f'This date is not valid. date : {date}'} else: date = date_obj.strftime('%Y-%m-%d') pred_values = [] real_values = [] dates = [] if stockName == "삼성전자": pred_values, real_values, dates = self.ai.predict_samsung_date( './csv/samsung.csv', date, 5) elif stockName == "셀트리온": pred_values, real_values, dates = self.ai.predict_samsung_date( './csv/celltrion.csv', date, 5) elif stockName == "하나투어": pred_values, real_values, dates = self.ai.predict_samsung_date( './csv/hana.csv', date, 5) else: return { 'message': f'unknown Stock Name. Stock Name : {stockName}' } pred_values.reverse() real_values.reverse() dates.reverse() millis = int(round(time.time() * 1000)) if not Checker.check_folder_path('./plt'): FileHandler.crete_folder('./plt') if len(pred_values) != 0 and len(real_values) != 0 and len( dates) != 0: plt.figure(facecolor='white', figsize=(20, 10)) plt.plot(dates, pred_values) plt.plot(dates, real_values) plt.xlabel('date') plt.ylabel('value') plt.legend(['Pred value', 'True_value']) plt.savefig(f'./plt/fredictDate_{millis}.png', dpi=600) # plt.show() plt.close() return_data = [] for idx in range(0, len(pred_values)): return_data.append({ 'real': real_values[idx], 'pred': pred_values[idx], 'date': dates[idx] }) return { 'img': Checker.get_abs_path(f'./plt/fredictDate_{millis}.png'), 'datas': return_data } else: return { 'message': f'Not Include Date In Stock data. Stock Name : {stockName}, date : {date}' }
def __init__(self): print('-----------emotionDfo--------------') print(keyword) self.fileReader = FileHandler()