Example #1
0
def calcAllDay(residPath, stdPath, tradingDates, keyCols, includeCols, excludeCols, createDate, settleDate):
	"""	residPath - 保存残差数据的目录
		stdPath - 保存标准化数据的目录
		tradingDates - 交易日列表,内部数据为datetime.date类型
		keyCols - 结果中必须包含的列,list类型
		includeCols - 需要处理的列名,list类型
		excludeCols - 不需要处理的列名,list类型
		createDate - 开始时间列名
		settleDate - 结束时间列名

	"""
	for td in tradingDates:
		start = datetime.datetime.now()
		
		df = fileutil.loadPickle(stdPath, td)
		if df is not None:
			print 'process: {0}'.format(td.strftime('%Y%m%d'))
			actualKeyCols = list(keyCols)

			if settleDate in df.columns:
				actualKeyCols.insert(0, settleDate)
			if createDate in df.columns:
				actualKeyCols.insert(0, createDate)

			newdf = calcResidual(td, df, actualKeyCols, includeCols, excludeCols)
			fileutil.savePickle(residPath, td, newdf)
		else:
			print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'), stdPath)
		
		end = datetime.datetime.now()
		print 'cost: {0} on: {1}'.format(end-start, td.strftime('%Y%m%d'))
Example #2
0
def handleAllDay(filepath, tradingDays, dtype, removeCols, keyCols,
                 includeCols, excludeCols, createDate, settleDate):
    """ filepath - 文件输出目录
		tradingDays - 交易日列表
		dtype - 字符类型表示交易日的类型,每日/周/月
		removeCols - 需要删掉的无用列列名,为list类型
		keyCols - 关键列列名,为list类型
		includeCols - 数据处理列列名, 为list类型
		excludeCols - 非数据处理列列名,为list类型
		createDate - 开始时间列名
		settleDate - 结束时间列名

		return - DataFrame类型
	"""
    for td in tradingDays:
        print 'handle {0}'.format(td.strftime('%Y%m%d'))
        start = datetime.datetime.now()
        #df = dataapi.getFactorDailyData(td)
        df = dataapi.getFactorData(dtype, td)
        actualKeyCols = list(keyCols)
        if settleDate in df.columns:
            actualKeyCols.insert(0, settleDate)
        if createDate in df.columns:
            actualKeyCols.insert(0, createDate)

        newdf = handleOneDay(df, removeCols, actualKeyCols, includeCols,
                             excludeCols)
        fileutil.savePickle(filepath, td, newdf)

        end = datetime.datetime.now()
        print 'Cost: {0} on: {1}'.format(end - start, td.strftime('%Y%m%d'))
Example #3
0
def handleOneDay(stdpath, residpath, dtype, td, removeCols, keyCols,
                 includCols, excludeCols, createDate, settleDate):
    df = dataapi.getFactorData(dtype, td)
    actualKeyCols = list(keyCols)
    if settleDate in df.columns:
        actualKeyCols.insert(0, settleDate)
    if createDate in df.columns:
        actualKeyCols.insert(0, createDate)

    t1 = datetime.datetime.now()

    stddf = std.handleOneDay(df, removeCols, actualKeyCols, includeCols,
                             excludeCols)

    t2 = datetime.datetime.now()
    print 'Cost std: {0} on {1}'.format(t2 - t1, td.strftime('%Y%m%d'))
    #TODO: save
    fileutil.savePickle(stdpath, td, stddf)

    residdf = residual.calcResidual(td, stddf, keyCols, includeCols,
                                    excludeCols)

    t3 = datetime.datetime.now()
    print 'Cost resid: {0} on {1}'.format(t3 - t2, td.strftime('%Y%m%d'))
    #TODO: save the residual data
    fileutil.savePickle(residpath, td, residdf)

    engine = dbaccessor.getdb('localhost', 'zhenggq', 'Yuzhong0931',
                              'advancedb', 1433)
    residdb.insertData(dtype, engine, residdf)
    #residdb.insertDataClosePrice(dtype, engine, residdf)

    t4 = datetime.datetime.now()
    print 'Cost insertdb: {0} on {1}'.format(t4 - t3, td.strftime('%Y%m%d'))
Example #4
0
def calcAllDay(deltaPath, stdPath, tradingDays, keyCols, includeCols,
               excludeCols, createDate, settleDate):
    """	deltaPath - 保存前10%和后10%均值差数据的目录
		stdPath - 保存标准化数据的目录
		tradingDays - 交易日列表,内部数据为datetime.date类型
		keyCols - 结果中必须包含的列,list类型
		includeCols - 需要处理的列名,list类型
		excludeCols - 不需要处理的列名,list类型
		createDate - 开始时间列名
		settleDate - 结束时间列名

	"""

    for td in tradingDays:
        start = datetime.datetime.now()

        df = fileutil.loadPickle(stdPath, td)
        if df is not None:
            print 'process: {0}'.format(td.strftime('%Y%m%d'))
            actualKeyCols = list(keyCols)

            if settleDate in df.columns:
                actualKeyCols.insert(0, settleDate)
            if createDate in df.columns:
                actualKeyCols.insert(0, createDate)

            newdf = calcDelta(td, df, actualKeyCols, includeCols, excludeCols)
            fileutil.savePickle(deltaPath, td, newdf)
            #fileutil.saveCSV(deltaPath, td, newdf)
            #filename = td.strftime('%Y%m%d')
            #fullpath='{0}{1}.csv'.format(deltaPath, filename)
            #newdf.to_csv(fullpath, encoding='utf8')
        else:
            print 'Fail to load data: {0}, {1}'.format(td.strftime('%Y%m%d'),
                                                       stdPath)

        end = datetime.datetime.now()
        print 'cost: {0} on: {1}'.format(end - start, td.strftime('%Y%m%d'))