def getStocksCs(self, date): if self.date != date: print('cs_' + str(date)) dbInstance = df.dbLite(self.dbLoc, self.infoDb) data = dbInstance.getCrossSectional(self.infoDb, self.infoCol, list(self.stocksCode), date) df_sortedCode = pd.DataFrame(self.stocksCode, columns=['code']) data = df_sortedCode.merge(data, left_on='code', right_on='code', how='left') collist = list(data.columns) collist = [collist[1]] + [collist[0]] + collist[2:] data = data[collist] else: print('cs_' + str(self.date)) data = self.tradables.copy() for db in self.dbs: print('cs_' + str(db)) dbInstance = df.dbLite(self.dbLoc, db) instance = dbInstance.getCrossSectional(db, self.dbs[db], list(self.stocksCode), date) columns = list(instance.columns) columns = [ value for value in columns if value not in ['date', 'name'] ] data = data.merge(instance[columns], left_on='code', right_on='code', how='left') # data = data.sort_values(by=['tmv'], ascending = False).reset_index(drop = True) self.stockCsBulk = data
def __init__(self, date, dbs, dbsTs, dbLoc=dbLoc, volume=0, tmv=0, trdYN=0, mgt=0): self.date = date self.infoDb = 'stock_information' self.dbLoc = dbLoc self.dbs = dbs self.dbsTs = dbsTs dbInstance = df.dbLite(self.dbLoc, self.infoDb) self.infoCol = dbInstance.getColumnNames(self.infoDb) data = dbInstance.getDateBulk(self.infoDb, self.date) data = data.drop_duplicates() data = data[(data['volume'] > volume) & (data['tmv'] > tmv) & (data['trdYN'] == trdYN) & (data['mgt'] == mgt) & (data['mkt'] != 'K-OTC') & (data['mkt'] != 'EX')] # dbInstance.close() data = data.sort_values(by=['tmv'], ascending=False).reset_index(drop=True) self.tradables = data self.stocksCode = self.tradables.code self.stockCsBulk = pd.DataFrame() self.stockTsBulk = pd.DataFrame()
def __init__(self, excelName, worksheetName, excelLoc, dbLoc, dataLoc, dbName, tableName, source='qtw', totalUpdate=False, econ=False, crss=True): # self.excelName = excelName self.worksheetName = worksheetName self.excelLoc = excelLoc self.dbLoc = dbLoc self.dataLoc = dataLoc self.dbName = dbName self.tableName = tableName self.source = source tradingDateDb = df.dbLite(dbLoc, 'tradingDate') self.tradingDate = tradingDateDb.getBulk('tradingDate') self.totalUpdate = totalUpdate self.econ = econ self.crss = crss if self.crss == True: self.excelName = excelName + '_crss.xlsm' else: self.excelName = excelName + '.xlsm'
def __init__(self, dbName, dbLoc=dbLoc): self.dbLoc = dbLoc self.dbName = dbName[:dbName.find('.db')] self.db = df.dbLite(self.dbLoc, self.dbName) self.tableName = list(self.db.getTableName()[0]) tableDict = {} keys = self.tableName for k in keys: tableDict[k] = self.db.getColumnNames(k) self.tableColumnNames = tableDict
def __init__(self, dbLoc = dbLoc, saveLoc = saveLoc, shortProduct = '코스피 200'): self.trdDate = dh.getTradingDate() short = dh.getBenchmark(dbLoc) short.getBmTs(self.trdDate[0], self.trdDate[len(self.trdDate)-1], [shortProduct]) shortBp = short.pivoted().copy() self.shortProductRet = shortBp.pct_change() self.eom_trdDate = utils.getEomTrdDate(self.trdDate) k2db = df.dbLite(dbLoc,'k200') self.info_k2 = k2db.getBulk('k200').copy() self.eom_info_k2 = list(sorted(set(self.info_k2.date))) del k2db del short
def __init__(self, date, saveLoc = saveLoc): desc = df.dbLite(saveLoc,'descriptor') self.industry = desc.getList('descriptor', 'industry') data = desc.getDateBulk('descriptor', date).copy() self.info_columns = ['date', 'code', 'name', 'mkt', 'sector', 'industry', 'subIndustry', 'volume', 'tmv'] self.info_desc = [l for l in data.columns if l not in self.info_columns] data[['opGrowth', 'copGrowth','ebitdaGrowth']] add = data[['opGrowth', 'copGrowth','ebitdaGrowth']].replace(['흑전','적지▼', '적지▲', '적전'], [200,-100,-20,-200],regex = True).apply(pd.to_numeric) raw = data[['opGrowth', 'copGrowth','ebitdaGrowth']].applymap(lambda x : float((re.findall('\((.*?)\)', str(x))+['0'])[0])) data[['opGrowth', 'copGrowth','ebitdaGrowth']] = add + raw # remove too many same Values for desc in self.info_desc: if data.groupby(desc)[desc].count().max() > len(data)*0.05 and desc not in ['divPrp_spot', 'divRate_spot'] : data[desc] = np.nan self.data = data del desc
def getStocksTs(self, dateStart): dateStart = dateStart data = pd.DataFrame() for db in self.dbsTs: print('-----' + 'ts_' + str(db) + '------') dbInstance = df.dbLite(self.dbLoc, db) if self.dbsTs[db] == '': columns = dbInstance.getColumnNames(db) else: columns = self.dbsTs[db] instance = dbInstance.getTimeSeries(db, columns, self.stocksCode, dateStart, self.date) instance = instance.drop_duplicates() if data.empty: data = instance else: data = data.merge(instance, left_on=['date', 'code'], right_on=['date', 'code'], how='left') self.stockTsBulk = data
# short product base setting short = dh.getBenchmark(dbLoc) short.getBmTs(trdDate[0], trdDate[len(trdDate)-1], ['코스피 200']) shortBp = short.pivoted().copy() shortBpR = shortBp.pct_change() del short # benchmark basket for neutralization k2 = dh.settingk2(dbLoc) k2D = k2.k2Date k2.setk2(k2D[0]) neut = k2.k2Weight # starting 72 # descripotr desc = df.dbLite(saveLoc,'descriptor') industry = desc.getList('descriptor', 'industry') data = desc.getDateBulk('descriptor', eom[72]) info_columns = ['date', 'code', 'name', 'mkt', 'sector', 'industry', 'subIndustry', 'volume', 'tmv'] info_desc = [l for l in data.columns if l not in info_columns] data[['opGrowth', 'copGrowth','ebitdaGrowth']] add = data[['opGrowth', 'copGrowth','ebitdaGrowth']].replace(['흑전','적지▼', '적지▲', '적전'], [200,-100,-20,-200],regex = True).apply(pd.to_numeric) raw = data[['opGrowth', 'copGrowth','ebitdaGrowth']].applymap(lambda x : float((re.findall('\((.*?)\)', str(x))+['0'])[0])) data[['opGrowth', 'copGrowth','ebitdaGrowth']] = add + raw
def getTradingDate(): dbTd = df.dbLite(dbLoc, 'tradingDate') tradingDate = list(dbTd.getBulk('tradingDate').date) return tradingDate
def __init__(self, dbLoc): self.k2WeightB = df.dbLite(dbLoc, 'k200').getBulk('k200') self.k2Date = list(sorted(set(self.k2WeightB.date))) self.k2Weight = pd.DataFrame()
def __init__(self, dbLoc=dbLoc): self.dbInstance = df.dbLite(dbLoc, 'economyData') self.item = self.dbInstance.getList('economyData', 'code') self.econCs = pd.DataFrame() self.econTs = pd.DataFrame()
bulkC = bulkC.iloc[-1:, :].transpose().reset_index() bulkC.columns.values[1] = name + '_' + str(d) self.stocks.tradables = self.stocks.tradables.merge( bulkC, left_on='code', right_on='code', how='left') #%% if __name__ == '__main__': allDbs = getAlldatabase(dbLoc) trdDate = getTradingDate() eom = utils.getEomTrdDate(trdDate) des = df.dbLite(saveLoc, 'descriptor') industry = des.getList('descriptor', 'industry') data = des.getDateBulk('descriptor', eom[243]) # info_column and rank column info_columns = [ 'date', 'code', 'name', 'mkt', 'sector', 'industry', 'subIndustry', 'volume', 'tmv' ] info_desc = [l for l in data.columns if l not in info_columns] # string change of growth factor data[['opGrowth', 'copGrowth', 'ebitdaGrowth']] add = data[['opGrowth', 'copGrowth', 'ebitdaGrowth']].replace(['흑전', '적지▼', '적지▲', '적전'], [200, -100, -20, -200],
bmData = bmData.sort_values(by=['weight'], ascending = False).reset_index(drop = True) clst = bmData.groupby(clstName).sum() clst = clst.sort_values(by=['weight'],ascending = False).reset_index() self.bmBasket = bmData self.clstDf = clst self.clstName = clstName #%% if __name__ == '__main__': bm = df.dbLite(dbLoc,'k200') bmBasket = bm.getBulk('k200') # get benchmark data dateList = list(sorted(set(bmBasket.date))) dbFile = dh.getAlldatabase(saveLoc) trd = dh.getTradingDate() eom = utils.getEomTrdDate(trd) # get descriptor and adjust data raw = df.dbLite(saveLoc,'descriptor') rawData = raw.getDateBulk('descriptor', eom[72]) sFactor = dh.descriptorAdjustment(rawData) sFactor.toDataFrame() sFactor.conHandling() sFactor.winsorization()
pass print('-----------' + ret.index[j] + '-------------') self.factorRet = finalRet self.factorScore = finalScore #%% desc return bulk update if __name__ == '__main__': database1 = dh.getAlldatabase(saveLoc) database2 = dh.getAlldatabase(dbLoc) tradingDate = dh.getTradingDate() eom = utils.getEomTrdDate(tradingDate) # dbs = dh.stockDatabaseOrder(dbLoc) descriptor = df.dbLite(saveLoc, 'descriptor') dbsTech = ['stock_tech_ohlc'] dummyList = descriptor.getList('descriptor', 'industry') starting = 126 ending = len(eom) # get timeSeries for i in range(starting, ending - 1): # get timeSeries timeSeries = dh.getStockTimeSeries(eom[i], eom[i + 1], dbs=dbsTech) ret = timeSeries.getPivotedTimeSeries('ret') # co = timeSeries.getPivotedTimeSeries('co') # opc = timeSeries.getPivotedTimeSeries('opc')
def upload(self, thresh=4): files = utils.findFiles(dbLoc) if self.dbName + '.db' in files: # 부분업데이트 db = df.dbLite(dbLoc, self.dbName) lastUpdatedDate = db.getList(self.tableName, 'date')[-1] updateStart = self.tradingDate.index[ self.tradingDate['date'] == lastUpdatedDate].tolist()[0] + 1 start = updateStart end = len(self.tradingDate) if start < end: wb = xw.Book(self.excelLoc + self.excelName) sht = wb.sheets(self.worksheetName) sht.cells(7, 2).value = self.tradingDate.date[start] # if crss is false then update before looping if self.crss == False: if self.totalUpdate == True: wb.macro('QntWS_RefresAll')() for i in range(start, end): sht.cells(7, 2).value = self.tradingDate.date[i] if (self.econ == True) and (i > 0): sht.cells(10, 2).value = self.tradingDate.date[i - 1] if self.crss == True: wb.macro('QntWS_RefresAll')() data = pd.DataFrame(sht.range(self.dataLoc).value) header = data.iloc[0] data = pd.DataFrame(data.values[1:], columns=header) data.replace('', np.nan, inplace=True) data = data.dropna(thresh=thresh) data['date'] = data['date'].apply( lambda x: dt.datetime.strftime(x, '%Y-%m-%d')) df.insertDBLite(self.dbLoc, self.dbName, self.tableName, data) print('----------', self.dbName, str(i), len(self.tradingDate), '---------') wb.save() wb.close() else: print(self.dbName, 'do not need to update') else: #전체 업데이트 wb = xw.Book(self.excelLoc + self.excelName) sht = wb.sheets(self.worksheetName) sht.cells(7, 2).value = self.tradingDate.date[0] if (self.totalUpdate == True) and (self.crss == False): wb.macro('QntWS_RefreshAll')() for i in range(len(self.tradingDate)): sht.cells(7, 2).value = self.tradingDate.date[i] if (self.econ == True) and (i > 0): sht.cells(10, 2).value = self.tradingDate.date[i - 1] if self.crss == True: wb.macro('QntWS_RefreshAll')() data = pd.DataFrame(sht.range(self.dataLoc).value) header = data.iloc[0] data = pd.DataFrame(data.values[1:], columns=header) data.replace('', np.nan, inplace=True) data = data.dropna(thresh=thresh) data['date'] = data['date'].apply( lambda x: dt.datetime.strftime(x, '%Y-%m-%d')) df.insertDBLite(self.dbLoc, self.dbName, self.tableName, data) print('----------', self.dbName, str(i), len(self.tradingDate), '---------') # wb.macro('QntWS_RefreshAll')() wb.save() wb.close()
def getRet(data, starting, ending): ret = df.dbLite(dbLoc,'stock_information') retData = ret.getTimeSeries('stock_information', ['date','code', 'ret'], list(data.code),starting, ending) retData = retData.pivot(index = 'date', columns = 'code', values = 'ret').iloc[1:,:].copy() return retData
def __init__(self, dbLoc=dbLoc): self.dbInstance = df.dbLite(dbLoc, 'benchmark') self.item = self.dbInstance.getList('benchmark', 'name') self.bmCs = pd.DataFrame() self.bmTs = pd.DataFrame()
'---------') # wb.macro('QntWS_RefreshAll')() wb.save() wb.close() #%% database deletion if __name__ == '__main__': db = ['stock_tech_ohlc', 'previousAdjPrice'] dateList = ['2021-04-16'] for d in db: for date in dateList: print(str(d) + '_' + str(date)) dbM = df.dbLite(dbLoc, d) dbM.deleteDB2(d, date) # dbM = df.dbLite(dbLoc,'stock_con_fwd_fcf_roe_pe_pb_ps_pc') # date = dbM.getList('stock_con_fwd_fcf_roe_pe_pb_ps_pc','date') # data = dbM.getDateBulk('stock_con_fwd_fcf_roe_pe_pb_ps_pc','2021-03-30') # dbM.deleteDB2('stock_con_fwd_fcf_roe_pe_pb_ps_pc','2021-03-30') #%% # xw.Book(excelLoc+'stock.xlsx') # xw.Book(excelLoc+'sample/stock_information_crss.xlsm') #%%