def calcAllDay(residPath, stdPath, tradingDates, keyCols, includeCols, excludeCols, createDate, settleDate): """ residPath - 保存残差数据的目录 stdPath - 保存标准化数据的目录 tradingDates - 交易日列表,内部数据为datetime.date类型 keyCols - 结果中必须包含的列,list类型 includeCols - 需要处理的列名,list类型 excludeCols - 不需要处理的列名,list类型 createDate - 开始时间列名 settleDate - 结束时间列名 """ for td in tradingDates: start = datetime.datetime.now() df = fileutil.loadPickle(stdPath, td) if df is not None: print 'process: {0}'.format(td.strftime('%Y%m%d')) actualKeyCols = list(keyCols) if settleDate in df.columns: actualKeyCols.insert(0, settleDate) if createDate in df.columns: actualKeyCols.insert(0, createDate) newdf = calcResidual(td, df, actualKeyCols, includeCols, excludeCols) fileutil.savePickle(residPath, td, newdf) else: print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'), stdPath) end = datetime.datetime.now() print 'cost: {0} on: {1}'.format(end-start, td.strftime('%Y%m%d'))
def handleAllDay(filepath, tradingDays, dtype, removeCols, keyCols, includeCols, excludeCols, createDate, settleDate): """ filepath - 文件输出目录 tradingDays - 交易日列表 dtype - 字符类型表示交易日的类型,每日/周/月 removeCols - 需要删掉的无用列列名,为list类型 keyCols - 关键列列名,为list类型 includeCols - 数据处理列列名, 为list类型 excludeCols - 非数据处理列列名,为list类型 createDate - 开始时间列名 settleDate - 结束时间列名 return - DataFrame类型 """ for td in tradingDays: print 'handle {0}'.format(td.strftime('%Y%m%d')) start = datetime.datetime.now() #df = dataapi.getFactorDailyData(td) df = dataapi.getFactorData(dtype, td) actualKeyCols = list(keyCols) if settleDate in df.columns: actualKeyCols.insert(0, settleDate) if createDate in df.columns: actualKeyCols.insert(0, createDate) newdf = handleOneDay(df, removeCols, actualKeyCols, includeCols, excludeCols) fileutil.savePickle(filepath, td, newdf) end = datetime.datetime.now() print 'Cost: {0} on: {1}'.format(end - start, td.strftime('%Y%m%d'))
def handleOneDay(stdpath, residpath, dtype, td, removeCols, keyCols, includCols, excludeCols, createDate, settleDate): df = dataapi.getFactorData(dtype, td) actualKeyCols = list(keyCols) if settleDate in df.columns: actualKeyCols.insert(0, settleDate) if createDate in df.columns: actualKeyCols.insert(0, createDate) t1 = datetime.datetime.now() stddf = std.handleOneDay(df, removeCols, actualKeyCols, includeCols, excludeCols) t2 = datetime.datetime.now() print 'Cost std: {0} on {1}'.format(t2 - t1, td.strftime('%Y%m%d')) #TODO: save fileutil.savePickle(stdpath, td, stddf) residdf = residual.calcResidual(td, stddf, keyCols, includeCols, excludeCols) t3 = datetime.datetime.now() print 'Cost resid: {0} on {1}'.format(t3 - t2, td.strftime('%Y%m%d')) #TODO: save the residual data fileutil.savePickle(residpath, td, residdf) engine = dbaccessor.getdb('localhost', 'zhenggq', 'Yuzhong0931', 'advancedb', 1433) residdb.insertData(dtype, engine, residdf) #residdb.insertDataClosePrice(dtype, engine, residdf) t4 = datetime.datetime.now() print 'Cost insertdb: {0} on {1}'.format(t4 - t3, td.strftime('%Y%m%d'))
def calcAllDay(deltaPath, stdPath, tradingDays, keyCols, includeCols, excludeCols, createDate, settleDate): """ deltaPath - 保存前10%和后10%均值差数据的目录 stdPath - 保存标准化数据的目录 tradingDays - 交易日列表,内部数据为datetime.date类型 keyCols - 结果中必须包含的列,list类型 includeCols - 需要处理的列名,list类型 excludeCols - 不需要处理的列名,list类型 createDate - 开始时间列名 settleDate - 结束时间列名 """ for td in tradingDays: start = datetime.datetime.now() df = fileutil.loadPickle(stdPath, td) if df is not None: print 'process: {0}'.format(td.strftime('%Y%m%d')) actualKeyCols = list(keyCols) if settleDate in df.columns: actualKeyCols.insert(0, settleDate) if createDate in df.columns: actualKeyCols.insert(0, createDate) newdf = calcDelta(td, df, actualKeyCols, includeCols, excludeCols) fileutil.savePickle(deltaPath, td, newdf) #fileutil.saveCSV(deltaPath, td, newdf) #filename = td.strftime('%Y%m%d') #fullpath='{0}{1}.csv'.format(deltaPath, filename) #newdf.to_csv(fullpath, encoding='utf8') else: print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'), stdPath) end = datetime.datetime.now() print 'cost: {0} on: {1}'.format(end - start, td.strftime('%Y%m%d'))