def calcAllDay(residPath, stdPath, tradingDates, keyCols, includeCols, excludeCols, createDate, settleDate): """ residPath - 保存残差数据的目录 stdPath - 保存标准化数据的目录 tradingDates - 交易日列表,内部数据为datetime.date类型 keyCols - 结果中必须包含的列,list类型 includeCols - 需要处理的列名,list类型 excludeCols - 不需要处理的列名,list类型 createDate - 开始时间列名 settleDate - 结束时间列名 """ for td in tradingDates: start = datetime.datetime.now() df = fileutil.loadPickle(stdPath, td) if df is not None: print 'process: {0}'.format(td.strftime('%Y%m%d')) actualKeyCols = list(keyCols) if settleDate in df.columns: actualKeyCols.insert(0, settleDate) if createDate in df.columns: actualKeyCols.insert(0, createDate) newdf = calcResidual(td, df, actualKeyCols, includeCols, excludeCols) fileutil.savePickle(residPath, td, newdf) else: print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'), stdPath) end = datetime.datetime.now() print 'cost: {0} on: {1}'.format(end-start, td.strftime('%Y%m%d'))
def insertAllData(filepath, tradingDays, dtype): #engine = dbaccessor.getdb('176.1.11.55', 'zhenggq', 'Yuzhong0931', 'advancedb', 1433) #engine = dbaccessor.getdb('localhost', 'zhenggq', 'yuzhong', 'advancedb', 1433) engine = dbaccessor.getdb('localhost', 'zhenggq', 'Yuzhong0931', 'advancedb', 1433) for td in tradingDays: print 'insert: {0}'.format(td.strftime('%Y%m%d')) start = datetime.datetime.now() df = fileutil.loadPickle(filepath, td) if df is not None: #中文被错误处理成unicode编码,而实际上是gbk编码 df['SecuAbbr'] = df['SecuAbbr'].apply(lambda x:x.encode('raw-unicode-escape').decode('gbk')) insertData(dtype, engine, df) end = datetime.datetime.now() print 'Cost: {0} on {1}'.format(end-start, td.strftime('%Y%m%d'))
def testWeeklyData(td): filepath = 'D:/workspace/python/residual/resid/weekly/' df = fileutil.loadPickle(filepath, td) if df is not None: t1 = datetime.datetime.now() engine = dbaccessor.getdb('localhost', 'zhenggq', 'Yuzhong0931', 'advancedb', 1433) conn = engine.connect() t2 = datetime.datetime.now() print 'cost connect db: {0}'.format(t2 - t1) insertWeekly(conn, df) t3 = datetime.datetime.now() print 'cost insert db: {0}'.format(t3 - t2)
def test(): td = datetime.date(2017, 3, 16) stdPath = 'D:/workspace/python/residual/result/monthly/' residPath = 'D:/workspace/python/residual/residtest/' #csvpath = '{0}{1}.csv'.format(stdPath, td.strftime('%Y%m%d')) keyCols = dataapi.keyCols includeCols = dataapi.includeCols excludeCols = dataapi.excludeCols newIndusColumns = indusColumns df = fileutil.loadPickle(stdPath, td) if df is not None: #去掉行业为空的行 #df = df.dropna(subset=['IndustrySecuCode_I']) #newIndusColumns = preprocessData(df) newdf = calcResidual(df, keyCols, includeCols, excludeCols) else: pass
def calcAllDay(deltaPath, stdPath, tradingDays, keyCols, includeCols, excludeCols, createDate, settleDate): """ deltaPath - 保存前10%和后10%均值差数据的目录 stdPath - 保存标准化数据的目录 tradingDays - 交易日列表,内部数据为datetime.date类型 keyCols - 结果中必须包含的列,list类型 includeCols - 需要处理的列名,list类型 excludeCols - 不需要处理的列名,list类型 createDate - 开始时间列名 settleDate - 结束时间列名 """ for td in tradingDays: start = datetime.datetime.now() df = fileutil.loadPickle(stdPath, td) if df is not None: print 'process: {0}'.format(td.strftime('%Y%m%d')) actualKeyCols = list(keyCols) if settleDate in df.columns: actualKeyCols.insert(0, settleDate) if createDate in df.columns: actualKeyCols.insert(0, createDate) newdf = calcDelta(td, df, actualKeyCols, includeCols, excludeCols) fileutil.savePickle(deltaPath, td, newdf) #fileutil.saveCSV(deltaPath, td, newdf) #filename = td.strftime('%Y%m%d') #fullpath='{0}{1}.csv'.format(deltaPath, filename) #newdf.to_csv(fullpath, encoding='utf8') else: print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'), stdPath) end = datetime.datetime.now() print 'cost: {0} on: {1}'.format(end - start, td.strftime('%Y%m%d'))