from tqdm import tqdm os.chdir(path_code) import util os.chdir(path_data) ############################################################################## # 0. 데이터 로드 & 클렌징 ############################################################################## ''' 재무데이터 : 퀀티와이즈 가격데이터 : 퀀트DB ''' # 재무정보 ocf = util.data_cleansing(pd.read_excel('fin.xlsx', sheet_name='ocf_Q')) cfTTM = util.data_cleansing(pd.read_excel('fin.xlsx', sheet_name='cfTTM_Q')) ocfTTM = util.data_cleansing(pd.read_excel('fin.xlsx', sheet_name='ocfTTM_Q')) opmTTM = util.data_cleansing(pd.read_excel('fin.xlsx', sheet_name='opm_Q')) # 수급 및 유동성 정보 (20일 거래대금, 20일누적 기관순매수수량, 시가총액, 상장주식수) vol_20MA = util.data_cleansing_ts( pd.read_excel('liq.xlsx', sheet_name='vol_20MA_M')) netbuy20 = util.data_cleansing_ts( pd.read_excel('liq.xlsx', sheet_name='netbuy20_M')) mktcap = util.data_cleansing_ts( pd.read_excel('liq.xlsx', sheet_name='mktcap_M')) numStock = util.data_cleansing_ts( pd.read_excel('liq.xlsx', sheet_name='numStock_M')) # 상장시장, 섹터, 거래정지 여부 등 기본 정보
- Crash / Jackpot - ''' import os import pandas as pd import numpy as np import datetime path = 'C:/Woojin/###. Git/Project_Q/V. Small Cap strategy' os.chdir(path) import util as ut rdata = pd.ExcelFile(path + '/Data/data.xlsx') mkt_info = ut.data_cleansing(pd.read_excel(rdata, 'market_info')) mktcap = ut.data_cleansing(pd.read_excel(rdata, 'mktcap')) net_income = ut.data_cleansing_as(pd.read_excel(rdata, 'net_income')) vol = ut.data_cleansing(pd.read_excel(rdata, 'volume')) caution_1 = ut.data_cleansing(pd.read_excel(rdata, '거래정지')) caution_2 = ut.data_cleansing(pd.read_excel(rdata, '투자유의')) caution_3 = ut.data_cleansing(pd.read_excel(rdata, '관리종목')) per = ut.data_cleansing(pd.read_excel(rdata, 'PER')) psr = ut.data_cleansing(pd.read_excel(rdata, 'PSR')) pbr = ut.data_cleansing(pd.read_excel(rdata, 'PBR')) def get_universe(rebalDate, df_info, df_mktcap, df_netincome, df_vol, df_caution_1, df_caution_2, df_caution_3): # 코스피 소형주 대상 lookback_info = df_info.loc[:rebalDate, :].tail(1).transpose() lookback_info = lookback_info[lookback_info == 3].dropna().index.values
start_invest = pd.datetime(start_year, start_month, start_day) end_invest = pd.datetime(end_year, end_month, end_day) rebal_sche = pd.date_range(start_invest, end_invest, freq='M') # Load market info data marketInfo = util.data_cleansing_ts( pd.read_excel('factorData.xlsx', sheet_name='market')) mktcap = util.data_cleansing_ts( pd.read_excel('factorData.xlsx', sheet_name='mktcap')) risk_1 = util.data_cleansing_ts( pd.read_excel('factorData.xlsx', sheet_name='risk_1')) risk_2 = util.data_cleansing_ts( pd.read_excel('factorData.xlsx', sheet_name='risk_2')) # Load factor data factor_book = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='book')) factor_size = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='size')) factor_yield = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='dividend')) current_asset = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='CA')) current_liability = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='CL')) total_liability = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='TL')) accural_ratio = (current_asset - current_liability).pct_change() earnings = util.data_cleansing( pd.read_excel('factorData.xlsx', sheet_name='earnings'))
start_invest = pd.datetime(start_year, start_month, start_day) end_invest = pd.datetime(end_year, end_month, end_day) rebal_sche = pd.date_range(start_invest, end_invest, freq='M') # Load market info data marketInfo = util.data_cleansing_ts( pd.read_excel('testData.xlsx', sheet_name='market')) mktcap = util.data_cleansing_ts( pd.read_excel('testData.xlsx', sheet_name='mktcap')) risk_1 = util.data_cleansing_ts( pd.read_excel('testData.xlsx', sheet_name='risk_1')) risk_2 = util.data_cleansing_ts( pd.read_excel('testData.xlsx', sheet_name='risk_2')) # Load factor data factor_PSR = util.data_cleansing( pd.read_excel('testData.xlsx', sheet_name='sales')) factor_PBR = util.data_cleansing( pd.read_excel('testData.xlsx', sheet_name='book')) factor_PER = util.data_cleansing( pd.read_excel('testData.xlsx', sheet_name='earnings')) # I. Cross-sectional ''' ex. Value & Momentum (Sales to Price, Book to Price, Momentum) ''' def get_priceRatio_multi(factorData, mktcapData): code_f = factorData.index.values code_p = mktcapData.columns.values code = list(set(code_f).intersection(code_p))
import matplotlib.pylab as plt from datetime import datetime import statsmodels.api as sm from pykalman import KalmanFilter from math import sqrt path = 'C:/Woojin/###. Git/Project_Q/III. Factor Exposed Pairs Trading' os.chdir(path) #import backtest_pipeline_ver2 as bt import util codes = pd.read_excel('firmCode.xlsx')['Code'].values price = util.get_stock_price(codes, pd.to_datetime('2010-01-01'), pd.to_datetime('2019-06-30')) size = util.data_cleansing(pd.read_excel('firmSize.xlsx')) #NOTE CRITICAL LEVEL HAS BEEN SET TO 5% FOR COINTEGRATION TEST def find_cointegrated_pairs(dataframe, critial_level=0.05): n = dataframe.shape[1] # the length of dateframe pvalue_matrix = np.ones((n, n)) # initialize the matrix of p keys = dataframe.columns # get the column names pairs = [] # initilize the list for cointegration for i in range(n): for j in range(i + 1, n): # for j bigger than i stock1 = dataframe[keys[i]] # obtain the price of "stock1" stock2 = dataframe[keys[j]] # obtain the price of "stock2" result = sm.tsa.stattools.coint(stock1, stock2) # get conintegration pvalue = result[1] # get the pvalue