def k10FromDate():
    prices = dict()
    top10 = topK(10)
    date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    for code in top10:
        crawler = NaverStockCrawler.create(code)
        data = crawler.crawling(date)
        prices[code] = {
            NaverDate.formatDate(item.date): item.close
            for item in data
        }
    df = pd.DataFrame(prices)
    df = df.fillna(method='ffill')
    if df.isnull().values.any():  #그래도 구멍이 남아 있으면
        df = df.fillna(method='bfill')
    return df
Exemple #2
0
 def loadDomesticIndex(self, name, beforeStr, endStr):
     crawler = NaverCrawler.create(targetName=name)
     date = NaverDate.create(startDate=beforeStr, endDate=endStr)
     data = crawler.crawling(dateData=date)
     df = pd.DataFrame(columns=['종가', '전일비', '등락률', '거래량', '거래대금'])
     for v in data:
         df.loc[v.index()] = v.value()
     return df
Exemple #3
0
 def loadDomesticIndex(self, name, beforeStr, endStr):
     if not os.path.isfile(name):
         print(name, 'collect...')
         crawler = NaverCrawler.create(targetName=name.split('_')[0])
         date = NaverDate.create(startDate=beforeStr, endDate=endStr)
         data = crawler.crawling(dateData=date)
         df = pd.DataFrame(columns=['종가', '전일비', '등락률', '거래량', '거래대금'])
         for v in data:
             df.loc[v.index()] = v.value()
         df.to_hdf(name, key='df', mode='w')
     else:
         print(name, 'read...')
         df = pd.read_hdf(name, key='df')
     return df
def showGraphK10KOSPI200():
    k10_price = k10FromDate()
    k10_info = makeK10()
    k10_historical_mc = k10_price * k10_info['Outstanding'] * k10_info[
        'Floating']
    k10 = pd.DataFrame()
    k10['k10 Market Cap'] = k10_historical_mc.sum(axis=1)
    k10['k10'] = k10['k10 Market Cap'] / k10['k10 Market Cap'][0] * 100

    crawler = NaverCrawler.create(targetName='KPI200')
    date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    kospi200 = crawler.crawling(dateData=date)
    kospi200Close = {
        NaverDate.formatDate(item.date): item.close
        for item in kospi200
    }
    k200 = pd.DataFrame({'K200': kospi200Close})

    plt.figure(figsize=(10, 5))
    plt.plot(k10['k10'])
    plt.plot(k200['K200'] / k200['K200'][0] * 100)
    plt.legend(loc=0)
    plt.grid(True, color='0.7', linestyle=':', linewidth=1)
Exemple #5
0
 def loadStockFromArr(self, name, targets, beforeStr, endStr):
     prices = dict()
     if not os.path.isfile(name):
         date = NaverDate.create(startDate=beforeStr, endDate=endStr)
         for target in targets:
             print(target['Name'], 'collect...')
             crawler = NaverStockCrawler.create(target['Code'])
             data = crawler.crawling(date)
             prices[target['Name']] = { pd.to_datetime(item.date, format='%Y-%m-%d') : item.close for item in data }
         bonddf = pd.DataFrame(prices)
         bonddf.to_hdf(name, key='df', mode='w')
     else:
         print(name, 'read...')
         bonddf = pd.read_hdf(name, key='df')
     return bonddf
def makeDataFrame():
    crawler = NaverCrawler.create(targetName='KPI200')
    date = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    kospi200 = crawler.crawling(dateData=date)
    kospi200Close = {
        NaverDate.formatDate(item.date): item.close
        for item in kospi200
    }

    worldDate = NaverDate.create(startDate='2019-01-02', endDate='2019-05-04')
    worldCrawler = NaverWorldCrawler.create(targetName='SPI@SPX')
    sp500 = worldCrawler.crawling(dateData=worldDate)

    sp500Close = {
        NaverDate.formatDate(item.date): item.close
        for item in sp500
    }

    data = {'S&P500': sp500Close, 'KOSPI200': kospi200Close}
    df = pd.DataFrame(data)
    df = df.fillna(method='ffill')
    if df.isnull().values.any():
        df = df.fillna(method='bfill')
    return df
Exemple #7
0
    def loadStockFromDict(self, name, targets, beforeStr, endStr):
        prices = dict()
        if not os.path.isfile(name):
            date = NaverDate.create(startDate=beforeStr, endDate=endStr)
            progress = 0
            compliteLen = len(targets.keys())
            for key in targets:
                print(targets[key],'collect...', str(progress),'/',str(compliteLen) ,str(progress/compliteLen)+'%')
                crawler = NaverStockCrawler.create(key)
                data = crawler.crawling(date)
                prices[targets[key]] = { pd.to_datetime(item.date, format='%Y-%m-%d') : item.close for item in data }
                progress+=1

            topdf = pd.DataFrame(prices)
            topdf.to_hdf(name, key='df', mode='w')
        else:
            print(name, 'read...')
            topdf = pd.read_hdf(name, key='df')
        return topdf
prices = dict()

# In[12]: read

topcap = pd.read_hdf('ZIPTOPCAP2007-01-01-2019-12-31.h5')
# In[12]
targets = {}
for index, row in topcap.iterrows():
    targets[row['Code']] = row['Name']
# In[12]: 종목별 종가
import os.path

name = 'STOCKZIPTOPCAP2007-01-01-2019-12-31.h5'

if not os.path.isfile(name):
    date = NaverDate.create(startDate=beforeStr, endDate=endStr)
    progress = 0
    compliteLen = len(targets.keys())
    for key in targets:
        print(targets[key], 'collect...', str(progress), '/', str(compliteLen),
              str(progress / compliteLen) + '%')
        crawler = NaverStockCrawler.create(key)
        data = crawler.crawling(date)
        prices[targets[key]] = {
            pd.to_datetime(item.date, format='%Y-%m-%d'): item.close
            for item in data
        }
        progress += 1

    topdf = pd.DataFrame(prices)
    topdf.to_hdf('STOCKZIPTOPCAP2007-01-01-2019-12-31.h5', key='df', mode='w')
Exemple #9
0
    def crawling(self, dateData=''):
        driver = webdriver.PhantomJS(
            'C:/Users/lsj/Downloads/phantomjs-2.1.1-windows/bin/phantomjs.exe')
        driver.get(self.makeWorldUrl())
        data = []
        pageNo = '1'
        isRunning = True
        while (isRunning):
            elePage = driver.find_element_by_link_text(pageNo)
            if not elePage:
                break
            elePage.click()
            pageNo = elePage.text

            text = driver.page_source
            soup = bs4.BeautifulSoup(text, 'lxml')
            table = soup.find(class_='tb_status2 tb_status2_t2').find('tbody')
            rows = filter(lambda val: type(val) == bs4.element.Tag,
                          table.children)
            #자식들에서 td태그를 찾음
            tds = map(lambda row: row.find_all('td'), list(rows))
            #1차원으로 변경
            flattenTds = list(itertools.chain(*tds))
            #없는 자식들 제거
            tdsf = filter(lambda td: type(td) == bs4.element.Tag, flattenTds)
            #텍스트 추출
            values = map(lambda value: value.stripped_strings, tdsf)
            #1차원으로 변경
            strings = list(itertools.chain(*values))
            #6개씩 자름
            splitData = [strings[i:i + 6] for i in range(0, len(strings), 6)]
            for one in splitData:
                date = NaverDate.formatDate(date=one[0])
                # print(dateData.startDate)
                print(date)
                # print(dateData.endDate)
                if dateData.startDate <= date and date <= dateData.endDate:
                    resultData = NaverWorldResultData.create(date=one[0],
                                                             close=one[1],
                                                             diff=one[2],
                                                             open=one[3],
                                                             high=one[4],
                                                             low=one[5])
                    data.append(resultData)
                elif dateData.startDate > date:
                    isRunning = False
                    break
            # print('pageNo:' + str(pageNo))
            # for value in data:
            # print(value)
            # print(data)
            eleNext = driver.find_elements_by_css_selector('#dayPaging .next')
            nextPageNo = str(int(pageNo) + 1)
            print(nextPageNo)
            if len(eleNext) > 0 and int(pageNo) % 10 == 0:
                eleNext[0].click()
                wait = WebDriverWait(driver, 10)
                wait.until(
                    EC.presence_of_element_located((By.LINK_TEXT, nextPageNo)))
                driver.implicitly_wait(1)
            pageNo = nextPageNo
            if len(driver.find_elements_by_link_text(pageNo)) == 0:
                break
        driver.close()
        return data

# In[1]: 상장기업...
import pandas as pd
kospiCompanyDf = pd.read_excel('fin/시가총액_2019_01_02.xlsx',sheet_name='시가총액', skiprows=3, converters={'종목코드':str})
kospiCompanyDf = kospiCompanyDf.iloc[1:]
codeName = {}
for index, row in kospiCompanyDf.iterrows():
    codeName[row['종목코드']] = row['종목명']
codeName
# In[2]: crawling 현재가...
from crawler.NaverStockCrawler import NaverStockCrawler
from crawler.data.NaverDate import NaverDate

prices = dict()
date = NaverDate.create(startDate=before, endDate=now)
progress = 0
compliteLen = len(codeName.keys())
for key in codeName:
    print(codeName[key],'collect...', str(progress),'/',str(compliteLen) ,str(progress/compliteLen*100)+'%')
    crawler = NaverStockCrawler.create(key)
    data = crawler.crawling(date)
    prices[key] = { pd.to_datetime(item.date, format='%Y-%m-%d') : item.close for item in data }
    progress+=1

topDf = pd.DataFrame(prices)
topDf
# In[3]: 재무제표
upCodes = ['제조업', '은행업', '증권업', '보험업', '종합금융업', '여신전문금융업', '신용금고']
factors = ['per', 'pcr', 'pbr', 'roe', '당기순이익', '영업활동으로인한현금흐름', '투자활동으로인한현금흐름', '재무활동으로인한현금흐름']
factorDf = {}
Exemple #11
0
import numpy as np
from sklearn.linear_model import LinearRegression

# In[2]: font 설정
import matplotlib.font_manager as fm
import matplotlib as mpl
import matplotlib.pyplot as plt

path = '/Library/Fonts/NanumBarunGothicLight.otf'
font_name = fm.FontProperties(fname=path, size=18).get_name()
print(font_name)
mpl.rc('font', family=font_name)

# In[3]: test
prices = dict()
date = NaverDate.create(startDate='1997-06-01')
crawler = NaverStockCrawler.create('035720', logging=True)
data = crawler.crawling(date)
prices['카카오'] = {
    pd.to_datetime(item.date, format='%Y-%m-%d'): item.close
    for item in data
}
topdf = pd.DataFrame(prices)
topdf
# crawler = NavarSearchCodeCrawler.create('KODEX')
# data = crawler.crawling()
# data

# In[4]: test2
print('collect...')
crawler = NaverCrawler.create(targetName='KOSPI')
    kospi200Close = {
        NaverDate.formatDate(item.date): item.close
        for item in kospi200
    }
    k200 = pd.DataFrame({'K200': kospi200Close})

    plt.figure(figsize=(10, 5))
    plt.plot(k10['k10'])
    plt.plot(k200['K200'] / k200['K200'][0] * 100)
    plt.legend(loc=0)
    plt.grid(True, color='0.7', linestyle=':', linewidth=1)


# In[3]: 코스피 가져오기
crawler = NaverCrawler.create(targetName='KPI200')
date = NaverDate.create(startDate='2018-06-01')
kospi200 = crawler.crawling(dateData=date)
df = pd.DataFrame(columns=['종가', '전일비', '등락률', '거래량', '거래대금'])
for v in kospi200:
    df.loc[v.index()] = v.value()
df

# In[4]: 월 평균 값
monthly_df = df.resample('M', how={'종가': np.mean})
monthly_df

# In[5]: 모멘텀 구하기
monthly_df['종가'][-1] - monthly_df['종가']
# In[6]: 30종목 종가 버그 있음
prices = dict()
targets = topK(30) + [{'code': "114800", 'name': 'KODEX 인버스'}]