def __init__(self, date, saveLoc=saveLoc): desc = df.dbLite(saveLoc, 'descriptor') self.industry = desc.getList('descriptor', 'industry') data = desc.getDateBulk('descriptor', date).copy() self.info_columns = [ 'date', 'code', 'name', 'mkt', 'sector', 'industry', 'subIndustry', 'volume', 'tmv' ] self.info_desc = [ l for l in data.columns if l not in self.info_columns ] data[['opGrowth', 'copGrowth', 'ebitdaGrowth']] add = data[['opGrowth', 'copGrowth', 'ebitdaGrowth']].replace(['흑전', '적지▼', '적지▲', '적전'], [200, -100, -20, -200], regex=True).apply(pd.to_numeric) raw = data[['opGrowth', 'copGrowth', 'ebitdaGrowth']].applymap( lambda x: float((re.findall('\((.*?)\)', str(x)) + ['0'])[0])) data[['opGrowth', 'copGrowth', 'ebitdaGrowth']] = add + raw # remove too many same Values for desc in self.info_desc: if data.groupby(desc)[desc].count().max( ) > len(data) * 0.05 and desc not in [ 'divPrp_spot', 'divRate_spot' ]: data[desc] = np.nan self.data = data self.normalized_data = pd.DataFrame() del desc
def get_stocks_ts(self, dateStart): dateStart = dateStart data = pd.DataFrame() for db in self.dbsTs: print('-----'+'ts_' + str(db) +'------') dbInstance = df.dbLite(self.dbLoc,db) if self.dbsTs[db] =='': columns = dbInstance.getColumnNames(db) else: columns = self.dbsTs[db] instance = dbInstance.getTimeSeries(db, columns, self.stocksCode, dateStart, self.date) instance = instance.drop_duplicates() if data.empty: data = instance else: data = data.merge(instance, left_on = ['date', 'code'], right_on = ['date','code'], how = 'left') self.stockTsBulk = data
def get_desc_ret(dbLoc, name): desc = df.dbLite(dbLoc,name) descRet = desc.getBulk(name) descRet = descRet.sort_values('date').reset_index(drop=True) descRet.index = descRet.date descRet = descRet.drop(columns=['date']) return descRet
def __init__(self, excelName, worksheetName, excelLoc, dbLoc, dataLoc, dbName, tableName, source='qtw', totalUpdate=False, econ=False, crss=True): # self.excelName = excelName self.worksheetName = worksheetName self.excelLoc = excelLoc self.dbLoc = dbLoc self.dataLoc = dataLoc self.dbName = dbName self.tableName = tableName self.source = source tradingDateDb = df.dbLite(dbLoc, 'tradingDate') self.tradingDate = tradingDateDb.getBulk('tradingDate') self.totalUpdate = totalUpdate self.econ = econ self.crss = crss if self.crss == True: self.excelName = excelName + '_crss.xlsm' else: self.excelName = excelName + '.xlsm'
def longShortSimulator_basketDb_eom(basket_dir, basket_db, basket_table, simulation_dir, simulation_db, ret_all, base, descriptor_list, db_option=True, period=1, fee=0, idx=72): ''' retAll : all stock return base : longShortSimSetting weight : db.getBulk() ==> full weight change descList : descriptro List eom : end of month idx : number ''' eom = base.eom_trading_date basket = df.dbLite(basket_dir,basket_db) if idx + period > len(eom)-1: end_date = len(eom) -1 else : end_date = idx + period weight_buy_all = basket.getDateBulk(basket_table,eom[idx]) weight_rebal_all = basket.getDateBulk(basket_table,eom[end_date]) baseDate = base.shortProductRet[(base.shortProductRet.index > eom[idx]) & (base.shortProductRet.index <= eom[end_date])] result = pd.DataFrame(index=baseDate.index) for desc in descriptor_list: proc_name = multiprocessing.current_process().name print(desc, str(idx), proc_name) weight_buy = weight_buy_all[weight_buy_all.strategy == desc].sort_values('weight', ascending=False) weight_rebal = weight_rebal_all[weight_rebal_all.strategy == desc].sort_values('weight', ascending=False) ret = get_ret_adj(ret_all, weight_buy, eom[idx], eom[end_date]) cum = simulation(weight_buy[['code', 'weight']], ret) simul = cum.portfolioSimul() cost = cum.getCost(weight_rebal[['code', 'weight']], fee=fee) simul = simul.merge(base.shortProductRet, left_index=True, right_index=True) simul['코스피 200'] = (1 + simul['코스피 200']).cumprod() simul_pct = simul.pct_change() simul_pct.iloc[0, 0] = simul.iloc[0, 0] - 1 simul_pct.iloc[0, 1] = simul.iloc[0, 1] - 1 simul_pct[desc] = simul_pct['simulation'] - simul_pct['코스피 200'] simul_pct.iloc[-1, 2] = simul_pct.iloc[-1, 2] - cost if simul_pct[desc].isnull().values.any(): simul_pct[desc]=np.nan if result.empty: result = pd.DataFrame(simul_pct[desc], columns=[desc]) else: result = result.merge(simul_pct[desc], left_index=True, right_index=True) result = result.reset_index() if db_option: df.insertDBLite(simulation_dir, simulation_db, simulation_db, result) else: return result
def strategyMix_BulkGetter(): saveLocation = saveLoc + 'strategyMix/result/' db = df.dbLite(saveLocation, 'strategyMix_summary') data = db.getBulk('strategyMix_summary') data = data.sort_values(by=['riskAdj'], ascending=False).reset_index(drop=True) data = data.drop_duplicates(['riskAdj']) data = data.reset_index(drop=True) return data
def __init__(self,dbName, dbLoc = dbLoc): self.dbLoc = dbLoc self.dbName = dbName[:dbName.find('.db')] self.db = df.dbLite(self.dbLoc, self.dbName) self.tableName = list(self.db.getTableName()[0]) tableDict = {} keys = self.tableName for k in keys: tableDict[k] = self.db.getColumnNames(k) self.tableColumnNames = tableDict
def __init__(self, basket_dir, basket_db, basket_table, stock_return_matrix, sim_setting ): basket = df.dbLite(basket_dir,basket_db) basket_dataframe = basket.getBulk(basket_table) self.stock_return_matrix = stock_return_matrix self.sim_setting = sim_setting self.basket_dataframe = basket_dataframe.copy() del basket del basket_dataframe
def get_stocks_cs(self, date): if self.date != date: print('cs_' + str(date)) dbInstance = df.dbLite(self.dbLoc, self.infoDb) data = dbInstance.getCrossSectional(self.infoDb,self.infoCol,list(self.stocksCode), date) df_sortedCode = pd.DataFrame(self.stocksCode, columns = ['code']) data = df_sortedCode.merge(data, left_on = 'code', right_on = 'code', how = 'left') collist = list(data.columns) collist = [collist[1]]+[collist[0]]+collist[2:] data = data[collist] else: print('cs_' + str(self.date)) data = self.tradables.copy() for db in self.dbs: print('cs_' + str(db)) dbInstance = df.dbLite(self.dbLoc, db) instance = dbInstance.getCrossSectional(db,self.dbs[db], list(self.stocksCode), date) columns = list(instance.columns) columns = [value for value in columns if value not in ['date', 'name']] data = data.merge(instance[columns], left_on = 'code', right_on = 'code', how = 'left') # data = data.sort_values(by=['tmv'], ascending = False).reset_index(drop = True) self.stockCsBulk = data
def __init__(self, dbLoc=dbLoc, saveLoc=saveLoc, shortProduct='코스피 200'): self.trdDate = dh.getTradingDate() short = dh.getBenchmark(dbLoc) short.getBmTs(self.trdDate[0], self.trdDate[len(self.trdDate) - 1], [shortProduct]) shortBp = short.pivoted().copy() self.shortProductRet = shortBp.pct_change() self.eom_trading_date = utils.getEomTrdDate(self.trdDate) k2db = df.dbLite(dbLoc, 'k200') self.info_k2 = k2db.getBulk('k200').copy() self.eom_info_k2 = list(sorted(set(self.info_k2.date))) del k2db del short
def __init__(self, date, dbs, dbsTs, dbLoc = dbLoc, volume = 0, tmv = 0, trdYN = 0, mgt = 0): self.date = date self.infoDb = 'stock_information' self.dbLoc = dbLoc self.dbs = dbs self.dbsTs = dbsTs dbInstance = df.dbLite(self.dbLoc, self.infoDb) self.infoCol = dbInstance.getColumnNames(self.infoDb) data = dbInstance.getDateBulk(self.infoDb, self.date) data = data.drop_duplicates() data = data[(data['volume'] > volume) & (data['tmv'] > tmv) & (data['trdYN'] == trdYN) & (data['mgt'] == mgt) & (data['mkt'] != 'K-OTC') & (data['mkt'] != 'EX')] # dbInstance.close() data = data.sort_values(by=['tmv'], ascending = False).reset_index(drop = True) self.tradables = data self.stocksCode = self.tradables.code self.stockCsBulk = pd.DataFrame() self.stockTsBulk = pd.DataFrame()
bmData = bmData.sort_values(by=['weight'], ascending = False).reset_index(drop = True) clst = bmData.groupby(clstName).sum() clst = clst.sort_values(by=['weight'],ascending = False).reset_index() self.bmBasket = bmData self.clstDf = clst self.clstName = clstName #%% if __name__ == '__main__': bm = df.dbLite(dbLoc,'k200') bmBasket = bm.getBulk('k200') # get benchmark data dateList = list(sorted(set(bmBasket.date))) dbFile = dh.getAlldatabase(saveLoc) trd = dh.getTradingDate() eom = utils.getEomTrdDate(trd) # get descriptor and adjust data raw = df.dbLite(saveLoc,'descriptor') rawData = raw.getDateBulk('descriptor', eom[72]) sFactor = dh.descriptorAdjustment(rawData) sFactor.toDataFrame() sFactor.conHandling() sFactor.winsorization()
def __init__(self, dbLoc = dbLoc) : self.dbInstance = df.dbLite(dbLoc,'economyData') self.item = self.dbInstance.getList('economyData','code') self.econCs = pd.DataFrame() self.econTs = pd.DataFrame()
def __init__(self, dbLoc = dbLoc) : self.dbInstance = df.dbLite(dbLoc,'benchmark') self.item = self.dbInstance.getList('benchmark','name') self.bmCs = pd.DataFrame() self.bmTs = pd.DataFrame()
def __init__(self,dbLoc): self.k2WeightB = df.dbLite(dbLoc,'k200').getBulk('k200') self.k2Date = list(sorted(set(self.k2WeightB.date))) self.k2Weight = pd.DataFrame()
iterable = tuple(zip(desc['strategyFirst'], desc['strategySecond']))[:iterNumb] return iterable, desc if __name__ == '__main__': # strategyMix_summarySetter() data = strategyMix_BulkGetter() data['strategyName'] = data['desc'].str.split('/').str[0] data = data.iloc[:100,:] strategyList = list(data.strategyName) strategyList = list(dict.fromkeys(strategyList)) final = pd.DataFrame() for strategy in strategyList: print(strategy) colList = list(data.desc[data['strategyName'] == strategy]) db = df.dbLite(saveLoc+'strategyMix/', 'strategyMix_'+strategy) spot = db.getBulk('strategyMix_'+strategy)[['date']+colList] if strategy == strategyList[0]: final = final.append(spot, ignore_index=True) else: final = final.merge(spot, left_on='date', right_on='date', how='left') final.index = final.date final = final.drop(columns=['date']) desc = st.descTest(final) desc.make([st.annualized_ret, st.stdev, st.mdd, st.hit_ratio]) desc.ratio('annRet', 'stdev', 'riskAdj') desc.rank('riskAdj') summaryView = desc.descriptor
def upload(self, thresh=4): files = utils.findFiles(dbLoc) if self.dbName + '.db' in files: # 부분업데이트 db = df.dbLite(dbLoc, self.dbName) lastUpdatedDate = db.getList(self.tableName, 'date')[-1] updateStart = self.tradingDate.index[ self.tradingDate['date'] == lastUpdatedDate].tolist()[0] + 1 start = updateStart end = len(self.tradingDate) if start < end: wb = xw.Book(self.excelLoc + self.excelName) sht = wb.sheets(self.worksheetName) sht.cells(7, 2).value = self.tradingDate.date[start] # if crss is false then update before looping if self.crss == False: if self.totalUpdate == True: wb.macro('QntWS_RefresAll')() for i in range(start, end): sht.cells(7, 2).value = self.tradingDate.date[i] if (self.econ == True) and (i > 0): sht.cells(10, 2).value = self.tradingDate.date[i - 1] if self.crss == True: wb.macro('QntWS_RefresAll')() data = pd.DataFrame(sht.range(self.dataLoc).value) header = data.iloc[0] data = pd.DataFrame(data.values[1:], columns=header) data.replace('', np.nan, inplace=True) data = data.dropna(thresh=thresh) data['date'] = data['date'].apply( lambda x: dt.datetime.strftime(x, '%Y-%m-%d')) df.insertDBLite(self.dbLoc, self.dbName, self.tableName, data) print('----------', self.dbName, str(i), len(self.tradingDate), '---------') wb.save() wb.close() else: print(self.dbName, 'do not need to update') else: #전체 업데이트 wb = xw.Book(self.excelLoc + self.excelName) sht = wb.sheets(self.worksheetName) sht.cells(7, 2).value = self.tradingDate.date[0] if (self.totalUpdate == True) and (self.crss == False): wb.macro('QntWS_RefreshAll')() for i in range(len(self.tradingDate)): sht.cells(7, 2).value = self.tradingDate.date[i] if (self.econ == True) and (i > 0): sht.cells(10, 2).value = self.tradingDate.date[i - 1] if self.crss == True: wb.macro('QntWS_RefreshAll')() data = pd.DataFrame(sht.range(self.dataLoc).value) header = data.iloc[0] data = pd.DataFrame(data.values[1:], columns=header) data.replace('', np.nan, inplace=True) data = data.dropna(thresh=thresh) data['date'] = data['date'].apply( lambda x: dt.datetime.strftime(x, '%Y-%m-%d')) df.insertDBLite(self.dbLoc, self.dbName, self.tableName, data) print('----------', self.dbName, str(i), len(self.tradingDate), '---------') # wb.macro('QntWS_RefreshAll')() wb.save() wb.close()
def getTradingDate(): dbTd = df.dbLite(dbLoc, 'tradingDate') tradingDate = list(dbTd.getBulk('tradingDate').date) return tradingDate
def get_ret_bulk(): ret = df.dbLite(dbLoc,'stock_information') data = ret.getBulk('stock_information') ret_data = data.pivot(index = 'date', columns = 'code', values = 'ret') return ret_data
def get_ret(data, starting, ending): ret = df.dbLite(dbLoc,'stock_information') ret_data = ret.getTimeSeries('stock_information', ['date','code', 'ret'], list(data.code),starting, ending) ret_data = ret_data.pivot(index = 'date', columns = 'code', values = 'ret').iloc[1:,:].copy() return ret_data
wb.save() wb.close() #%% database deletion # print('hi') if __name__ == '__main__': db = ['stock_tech_ohlc', 'previousAdjPrice'] db = utils.findFiles('C:/Users/nine/dbLocKgh/db_ml_v1') db = [d[:-3] for d in db] dateList = ['2021-05-21'] for d in db: for date in dateList: print(str(d) + '_' + str(date)) dbM = df.dbLite(dbLoc, d) dbM.deleteDB2(d, date) # dbM = df.dbLite(dbLoc,'stock_con_fwd_fcf_roe_pe_pb_ps_pc') # date = dbM.getList('stock_con_fwd_fcf_roe_pe_pb_ps_pc','date') # data = dbM.getDateBulk('stock_con_fwd_fcf_roe_pe_pb_ps_pc','2021-03-30') # dbM.deleteDB2('stock_con_fwd_fcf_roe_pe_pb_ps_pc','2021-03-30') #%% # xw.Book(excelLoc+'stock.xlsx') # xw.Book(excelLoc+'sample/stock_information_crss.xlsm') #%%
pass print('-----------' + ret.index[j] + '-------------') self.factorRet = finalRet self.factorScore = finalScore #%% desc return bulk update if __name__ == '__main__': database1 = dh.getAlldatabase(saveLoc) database2 = dh.getAlldatabase(dbLoc) tradingDate = dh.getTradingDate() eom = utils.getEomTrdDate(tradingDate) # dbs = dh.stockDatabaseOrder(dbLoc) descriptor = df.dbLite(saveLoc, 'descriptor') dbsTech = ['stock_tech_ohlc'] dummyList = descriptor.getList('descriptor', 'industry') starting = 126 ending = len(eom) # get timeSeries for i in range(starting, ending - 1): # get timeSeries timeSeries = dh.getStockTimeSeries(eom[i], eom[i + 1], dbs=dbsTech) ret = timeSeries.getPivotedTimeSeries('ret') # co = timeSeries.getPivotedTimeSeries('co') # opc = timeSeries.getPivotedTimeSeries('opc')
how = 'left') #%% if __name__ == '__main__': allDbs = getAlldatabase(dbLoc) trdDate = getTradingDate() eom = utils.getEomTrdDate(trdDate) des = df.dbLite(saveLoc, 'descriptor') industry = des.getList('descriptor', 'industry') data = des.getDateBulk('descriptor', eom[243]) # info_column and rank column info_columns = ['date', 'code', 'name', 'mkt', 'sector', 'industry', 'subIndustry', 'volume', 'tmv'] info_desc = [l for l in data.columns if l not in info_columns] # string change of growth factor data[['opGrowth', 'copGrowth','ebitdaGrowth']] add = data[['opGrowth', 'copGrowth','ebitdaGrowth']].replace(['흑전','적지▼', '적지▲', '적전'], [200,-100,-20,-200],regex = True).apply(pd.to_numeric) raw = data[['opGrowth', 'copGrowth','ebitdaGrowth']].applymap(lambda x : float((re.findall('\((.*?)\)', str(x))+['0'])[0])) data[['opGrowth', 'copGrowth','ebitdaGrowth']] = add + raw