def makeDataFrame(): crawler = NaverCrawler.create(targetName='KPI200') date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') kospi200 = crawler.crawling(dateData=date) kospi200Close = { NaverDate.formatDate(item.date): item.close for item in kospi200 } worldDate = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') worldCrawler = NaverWorldCrawler.create(targetName='SPI@SPX') sp500 = worldCrawler.crawling(dateData=worldDate) sp500Close = { NaverDate.formatDate(item.date): item.close for item in sp500 } data = {'S&P500': sp500Close, 'KOSPI200': kospi200Close} df = pd.DataFrame(data) df = df.fillna(method='ffill') if df.isnull().values.any(): df = df.fillna(method='bfill') return df
def k10FromDate(): prices = dict() top10 = topK(10) date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') for code in top10: crawler = NaverStockCrawler.create(code) data = crawler.crawling(date) prices[code] = { NaverDate.formatDate(item.date): item.close for item in data } df = pd.DataFrame(prices) df = df.fillna(method='ffill') if df.isnull().values.any(): #그래도 구멍이 남아 있으면 df = df.fillna(method='bfill') return df
def showGraphK10KOSPI200(): k10_price = k10FromDate() k10_info = makeK10() k10_historical_mc = k10_price * k10_info['Outstanding'] * k10_info[ 'Floating'] k10 = pd.DataFrame() k10['k10 Market Cap'] = k10_historical_mc.sum(axis=1) k10['k10'] = k10['k10 Market Cap'] / k10['k10 Market Cap'][0] * 100 crawler = NaverCrawler.create(targetName='KPI200') date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04') kospi200 = crawler.crawling(dateData=date) kospi200Close = { NaverDate.formatDate(item.date): item.close for item in kospi200 } k200 = pd.DataFrame({'K200': kospi200Close}) plt.figure(figsize=(10, 5)) plt.plot(k10['k10']) plt.plot(k200['K200'] / k200['K200'][0] * 100) plt.legend(loc=0) plt.grid(True, color='0.7', linestyle=':', linewidth=1)
def crawling(self, dateData=''): driver = webdriver.PhantomJS( 'C:/Users/lsj/Downloads/phantomjs-2.1.1-windows/bin/phantomjs.exe') driver.get(self.makeWorldUrl()) data = [] pageNo = '1' isRunning = True while (isRunning): elePage = driver.find_element_by_link_text(pageNo) if not elePage: break elePage.click() pageNo = elePage.text text = driver.page_source soup = bs4.BeautifulSoup(text, 'lxml') table = soup.find(class_='tb_status2 tb_status2_t2').find('tbody') rows = filter(lambda val: type(val) == bs4.element.Tag, table.children) #자식들에서 td태그를 찾음 tds = map(lambda row: row.find_all('td'), list(rows)) #1차원으로 변경 flattenTds = list(itertools.chain(*tds)) #없는 자식들 제거 tdsf = filter(lambda td: type(td) == bs4.element.Tag, flattenTds) #텍스트 추출 values = map(lambda value: value.stripped_strings, tdsf) #1차원으로 변경 strings = list(itertools.chain(*values)) #6개씩 자름 splitData = [strings[i:i + 6] for i in range(0, len(strings), 6)] for one in splitData: date = NaverDate.formatDate(date=one[0]) # print(dateData.startDate) print(date) # print(dateData.endDate) if dateData.startDate <= date and date <= dateData.endDate: resultData = NaverWorldResultData.create(date=one[0], close=one[1], diff=one[2], open=one[3], high=one[4], low=one[5]) data.append(resultData) elif dateData.startDate > date: isRunning = False break # print('pageNo:' + str(pageNo)) # for value in data: # print(value) # print(data) eleNext = driver.find_elements_by_css_selector('#dayPaging .next') nextPageNo = str(int(pageNo) + 1) print(nextPageNo) if len(eleNext) > 0 and int(pageNo) % 10 == 0: eleNext[0].click() wait = WebDriverWait(driver, 10) wait.until( EC.presence_of_element_located((By.LINK_TEXT, nextPageNo))) driver.implicitly_wait(1) pageNo = nextPageNo if len(driver.find_elements_by_link_text(pageNo)) == 0: break driver.close() return data