def k10FromDate(): prices = dict() top10 = topK(10) date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') for code in top10: crawler = NaverStockCrawler.create(code) data = crawler.crawling(date) prices[code] = { NaverDate.formatDate(item.date): item.close for item in data } df = pd.DataFrame(prices) df = df.fillna(method='ffill') if df.isnull().values.any(): #그래도 구멍이 남아 있으면 df = df.fillna(method='bfill') return df
def loadDomesticIndex(self, name, beforeStr, endStr): crawler = NaverCrawler.create(targetName=name) date = NaverDate.create(startDate=beforeStr, endDate=endStr) data = crawler.crawling(dateData=date) df = pd.DataFrame(columns=['종가', '전일비', '등락률', '거래량', '거래대금']) for v in data: df.loc[v.index()] = v.value() return df
def loadDomesticIndex(self, name, beforeStr, endStr): if not os.path.isfile(name): print(name, 'collect...') crawler = NaverCrawler.create(targetName=name.split('_')[0]) date = NaverDate.create(startDate=beforeStr, endDate=endStr) data = crawler.crawling(dateData=date) df = pd.DataFrame(columns=['종가', '전일비', '등락률', '거래량', '거래대금']) for v in data: df.loc[v.index()] = v.value() df.to_hdf(name, key='df', mode='w') else: print(name, 'read...') df = pd.read_hdf(name, key='df') return df
def showGraphK10KOSPI200(): k10_price = k10FromDate() k10_info = makeK10() k10_historical_mc = k10_price * k10_info['Outstanding'] * k10_info[ 'Floating'] k10 = pd.DataFrame() k10['k10 Market Cap'] = k10_historical_mc.sum(axis=1) k10['k10'] = k10['k10 Market Cap'] / k10['k10 Market Cap'][0] * 100 crawler = NaverCrawler.create(targetName='KPI200') date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') kospi200 = crawler.crawling(dateData=date) kospi200Close = { NaverDate.formatDate(item.date): item.close for item in kospi200 } k200 = pd.DataFrame({'K200': kospi200Close}) plt.figure(figsize=(10, 5)) plt.plot(k10['k10']) plt.plot(k200['K200'] / k200['K200'][0] * 100) plt.legend(loc=0) plt.grid(True, color='0.7', linestyle=':', linewidth=1)
def loadStockFromArr(self, name, targets, beforeStr, endStr): prices = dict() if not os.path.isfile(name): date = NaverDate.create(startDate=beforeStr, endDate=endStr) for target in targets: print(target['Name'], 'collect...') crawler = NaverStockCrawler.create(target['Code']) data = crawler.crawling(date) prices[target['Name']] = { pd.to_datetime(item.date, format='%Y-%m-%d') : item.close for item in data } bonddf = pd.DataFrame(prices) bonddf.to_hdf(name, key='df', mode='w') else: print(name, 'read...') bonddf = pd.read_hdf(name, key='df') return bonddf
def makeDataFrame(): crawler = NaverCrawler.create(targetName='KPI200') date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') kospi200 = crawler.crawling(dateData=date) kospi200Close = { NaverDate.formatDate(item.date): item.close for item in kospi200 } worldDate = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') worldCrawler = NaverWorldCrawler.create(targetName='SPI@SPX') sp500 = worldCrawler.crawling(dateData=worldDate) sp500Close = { NaverDate.formatDate(item.date): item.close for item in sp500 } data = {'S&P500': sp500Close, 'KOSPI200': kospi200Close} df = pd.DataFrame(data) df = df.fillna(method='ffill') if df.isnull().values.any(): df = df.fillna(method='bfill') return df
def loadStockFromDict(self, name, targets, beforeStr, endStr): prices = dict() if not os.path.isfile(name): date = NaverDate.create(startDate=beforeStr, endDate=endStr) progress = 0 compliteLen = len(targets.keys()) for key in targets: print(targets[key],'collect...', str(progress),'/',str(compliteLen) ,str(progress/compliteLen)+'%') crawler = NaverStockCrawler.create(key) data = crawler.crawling(date) prices[targets[key]] = { pd.to_datetime(item.date, format='%Y-%m-%d') : item.close for item in data } progress+=1 topdf = pd.DataFrame(prices) topdf.to_hdf(name, key='df', mode='w') else: print(name, 'read...') topdf = pd.read_hdf(name, key='df') return topdf
prices = dict() # In[12]: read topcap = pd.read_hdf('ZIPTOPCAP2007-01-01-2019-12-31.h5') # In[12] targets = {} for index, row in topcap.iterrows(): targets[row['Code']] = row['Name'] # In[12]: 종목별 종가 import os.path name = 'STOCKZIPTOPCAP2007-01-01-2019-12-31.h5' if not os.path.isfile(name): date = NaverDate.create(startDate=beforeStr, endDate=endStr) progress = 0 compliteLen = len(targets.keys()) for key in targets: print(targets[key], 'collect...', str(progress), '/', str(compliteLen), str(progress / compliteLen) + '%') crawler = NaverStockCrawler.create(key) data = crawler.crawling(date) prices[targets[key]] = { pd.to_datetime(item.date, format='%Y-%m-%d'): item.close for item in data } progress += 1 topdf = pd.DataFrame(prices) topdf.to_hdf('STOCKZIPTOPCAP2007-01-01-2019-12-31.h5', key='df', mode='w')
def crawling(self, dateData=''): driver = webdriver.PhantomJS( 'C:/Users/lsj/Downloads/phantomjs-2.1.1-windows/bin/phantomjs.exe') driver.get(self.makeWorldUrl()) data = [] pageNo = '1' isRunning = True while (isRunning): elePage = driver.find_element_by_link_text(pageNo) if not elePage: break elePage.click() pageNo = elePage.text text = driver.page_source soup = bs4.BeautifulSoup(text, 'lxml') table = soup.find(class_='tb_status2 tb_status2_t2').find('tbody') rows = filter(lambda val: type(val) == bs4.element.Tag, table.children) #자식들에서 td태그를 찾음 tds = map(lambda row: row.find_all('td'), list(rows)) #1차원으로 변경 flattenTds = list(itertools.chain(*tds)) #없는 자식들 제거 tdsf = filter(lambda td: type(td) == bs4.element.Tag, flattenTds) #텍스트 추출 values = map(lambda value: value.stripped_strings, tdsf) #1차원으로 변경 strings = list(itertools.chain(*values)) #6개씩 자름 splitData = [strings[i:i + 6] for i in range(0, len(strings), 6)] for one in splitData: date = NaverDate.formatDate(date=one[0]) # print(dateData.startDate) print(date) # print(dateData.endDate) if dateData.startDate <= date and date <= dateData.endDate: resultData = NaverWorldResultData.create(date=one[0], close=one[1], diff=one[2], open=one[3], high=one[4], low=one[5]) data.append(resultData) elif dateData.startDate > date: isRunning = False break # print('pageNo:' + str(pageNo)) # for value in data: # print(value) # print(data) eleNext = driver.find_elements_by_css_selector('#dayPaging .next') nextPageNo = str(int(pageNo) + 1) print(nextPageNo) if len(eleNext) > 0 and int(pageNo) % 10 == 0: eleNext[0].click() wait = WebDriverWait(driver, 10) wait.until( EC.presence_of_element_located((By.LINK_TEXT, nextPageNo))) driver.implicitly_wait(1) pageNo = nextPageNo if len(driver.find_elements_by_link_text(pageNo)) == 0: break driver.close() return data
# In[1]: 상장기업... import pandas as pd kospiCompanyDf = pd.read_excel('fin/시가총액_2019_01_02.xlsx',sheet_name='시가총액', skiprows=3, converters={'종목코드':str}) kospiCompanyDf = kospiCompanyDf.iloc[1:] codeName = {} for index, row in kospiCompanyDf.iterrows(): codeName[row['종목코드']] = row['종목명'] codeName # In[2]: crawling 현재가... from crawler.NaverStockCrawler import NaverStockCrawler from crawler.data.NaverDate import NaverDate prices = dict() date = NaverDate.create(startDate=before, endDate=now) progress = 0 compliteLen = len(codeName.keys()) for key in codeName: print(codeName[key],'collect...', str(progress),'/',str(compliteLen) ,str(progress/compliteLen*100)+'%') crawler = NaverStockCrawler.create(key) data = crawler.crawling(date) prices[key] = { pd.to_datetime(item.date, format='%Y-%m-%d') : item.close for item in data } progress+=1 topDf = pd.DataFrame(prices) topDf # In[3]: 재무제표 upCodes = ['제조업', '은행업', '증권업', '보험업', '종합금융업', '여신전문금융업', '신용금고'] factors = ['per', 'pcr', 'pbr', 'roe', '당기순이익', '영업활동으로인한현금흐름', '투자활동으로인한현금흐름', '재무활동으로인한현금흐름'] factorDf = {}
import numpy as np from sklearn.linear_model import LinearRegression # In[2]: font 설정 import matplotlib.font_manager as fm import matplotlib as mpl import matplotlib.pyplot as plt path = '/Library/Fonts/NanumBarunGothicLight.otf' font_name = fm.FontProperties(fname=path, size=18).get_name() print(font_name) mpl.rc('font', family=font_name) # In[3]: test prices = dict() date = NaverDate.create(startDate='1997-06-01') crawler = NaverStockCrawler.create('035720', logging=True) data = crawler.crawling(date) prices['카카오'] = { pd.to_datetime(item.date, format='%Y-%m-%d'): item.close for item in data } topdf = pd.DataFrame(prices) topdf # crawler = NavarSearchCodeCrawler.create('KODEX') # data = crawler.crawling() # data # In[4]: test2 print('collect...') crawler = NaverCrawler.create(targetName='KOSPI')
kospi200Close = { NaverDate.formatDate(item.date): item.close for item in kospi200 } k200 = pd.DataFrame({'K200': kospi200Close}) plt.figure(figsize=(10, 5)) plt.plot(k10['k10']) plt.plot(k200['K200'] / k200['K200'][0] * 100) plt.legend(loc=0) plt.grid(True, color='0.7', linestyle=':', linewidth=1) # In[3]: 코스피 가져오기 crawler = NaverCrawler.create(targetName='KPI200') date = NaverDate.create(startDate='2018-06-01') kospi200 = crawler.crawling(dateData=date) df = pd.DataFrame(columns=['종가', '전일비', '등락률', '거래량', '거래대금']) for v in kospi200: df.loc[v.index()] = v.value() df # In[4]: 월 평균 값 monthly_df = df.resample('M', how={'종가': np.mean}) monthly_df # In[5]: 모멘텀 구하기 monthly_df['종가'][-1] - monthly_df['종가'] # In[6]: 30종목 종가 버그 있음 prices = dict() targets = topK(30) + [{'code': "114800", 'name': 'KODEX 인버스'}]