Example #1
0
def ewstats(RetSeries, DecayFactor, WindowLength = None, **kwargs):
  converter = dsc.dsConvert()
  dimInfo = kwargs['input_dim'] 

  # obtain dimension information of input return matrix
  NumObs = dimInfo[0] # long type
  NumSeries = dimInfo[1] # long type

  # validate input parameters
  if WindowLength is None:
    WindowLength = NumObs
  if DecayFactor <= 0 or DecayFactor > 1:
    print ("finance:ewstats:invalidDecayFactor," +
           "must have 0 < decay factor <= 1")
    return -1
  if WindowLength > NumObs:
    print ("finance:ewstats:invalidWindowLength, Window Length must" +
           "be <= number of observations")
    return -1

  '''
  obtain input data within window only, CONVERTING from a
  DISTRIBUTED spark RDD to a LOCAL python list type
  '''
  RetSeries = RetSeries.top(WindowLength, key =
	        lambda RetSeries: RetSeries[0])
  RetSeries.reverse()
  RetSeries = converter.indexRowList_2_numpy(RetSeries)

  # calculate decay coefficients
  DecayPowers = numpy.arange(WindowLength - 1, -1, -1).reshape(
	        WindowLength, 1)
  VarWts = numpy.power(math.sqrt(DecayFactor), DecayPowers)
  RetWts = numpy.power(DecayFactor, DecayPowers)
 
  # RETURN - number of equivalent values in computation, a scalar
  NEff = numpy.sum(RetWts)

  # compute the exponentially weighted mean return
  WtSeries = numpy.multiply(numpy.repeat(RetWts, NumSeries, axis = 1),
		            RetSeries)
  
  # RETURN - estimated expected return (forward looking in time)
  ERet = WtSeries.sum(axis = 0)/NEff

  # Subtract the weighted mean from the original Series
  CenteredSeries = RetSeries - ERet 

  # compute the weighted variance
  WtSeries = numpy.multiply(numpy.repeat(VarWts, NumSeries, axis = 1),
		            CenteredSeries)

  # 2D matrix multiplication numpy.dot() - dot product equivalent
  # numpy.multiply() does element-wise multiplication
  ECov = numpy.dot(WtSeries.transpose(), WtSeries)/NEff

  # returning:
  # - estimated expected return - ERet
  # - estimated expectedcovariance - ECov
  # - number of effective observation - NEff
  return [ERet, ECov, NEff]
Example #2
0
def go():
    startTime = datetime.now()

    converter = ds.dsConvert()
    # converter.setInputFile("/home/howard_xie/risk/MrT/data/dev7.mat")
    converter.setInputFile("/home/howard_xie/risk/MrT/data/RiskTestData.mat")

    # using h5py.File() - only works for -v7.3 mat files
    if ds.h5py_available:
        # reading data elements from MAT file
        ret_m = converter.MATreader("FactorRtns")  # factor return AND/OR alpha matrix
        # ret_m = converter.MATreader("Alphas")
        date_m = converter.MATreader("DatesModel")
        lambda_m = converter.MATreader("Lambda")
        startD_m = converter.MATreader("StartDate")
        endD_m = converter.MATreader("EndDate")
        roll_m = converter.MATreader("RollingPeriod")
        startS_m = converter.MATreader("StartStress")
        endS_m = converter.MATreader("EndStress")
        qualcov_m = converter.MATreader("QualCov")

        # convert mat data objs to numpy array
        ret = converter.h5py_2_numpy(ret_m, 1)
        date = converter.h5py_2_numpy(date_m, 4)
        lambd = converter.h5py_2_numpy(lambda_m, 2)
        startD = converter.h5py_2_numpy(startD_m, 3)
        endD = converter.h5py_2_numpy(endD_m, 3)
        roll = converter.h5py_2_numpy(roll_m, 2)
        startS = converter.h5py_2_numpy(startS_m, 3)
        endS = converter.h5py_2_numpy(endS_m, 3)
        qualcov = converter.h5py_2_numpy(qualcov_m, 1)

        converter.closeMAT()
    # using scipy.loadmat() - only works for -v7 mat files
    else:
        ret = converter.matFile["FactorRtns"]  # FactorRtns AND/OR Alphas matrix
        date = converter.matFile["DatesModel"].tolist()
        lambd = converter.matFile["Lambda"][0][0]
        startD = converter.matFile["StartDate"][0]
        endD = converter.matFile["EndDate"][0]
        roll = converter.matFile["RollingPeriod"][0][0]
        startS = converter.matFile["StartStress"][0]
        endS = converter.matFile["EndStress"][0]
        qualcov = converter.matFile["QualCov"]

    # instantiate Spark Context and SparkSQL context objs
    sparkContextLaunchTime = datetime.now()
    scFactory = scf.SparkContextFactory()
    sparkContextUpTime = datetime.now()

    # call main function with data from MAT file
    startComputeTime = datetime.now()
    result = comp.ComputeCovHistory(scFactory.sc, ret, date, startD, endD, lambd, roll, startS, endS, qualcov)
    endComputeTime = datetime.now()

    # Clear Spark Context and SparkSQL Context
    scFactory.disconnect()

    converter.numpy_2_mat(result, "out.mat")

    endTime = datetime.now()
    print("Spark Context Launch took in seconds: " + str((sparkContextUpTime - sparkContextLaunchTime).total_seconds()))
    print("Computation Time took in seconds: " + str((endComputeTime - startComputeTime).total_seconds()))
    print("Overall time took in seconds: " + str((endTime - startTime).total_seconds()))