def test_singular(self):
     N = 1000
     K = 10
     y = np.random.random((N, 1))
     X = np.ones((N, K))
     with self.assertRaises(Exception):
         ols(y, X)
    def testinner(maxlen):
      nfft = [nextprod([2, 3], x) for x in np.array(h.shape) + maxlen - 1]
      hpre = prepareh(h, nfft)

      for xlen0 in range(maxlen):
        for ylen0 in range(maxlen):
          ylen, xlen = 1 + ylen0, 1 + xlen0
          dirt = np.vstack([
              np.hstack([
                  olsStep(x, hpre, [ystart, xstart], [ylen, xlen], nfft, h.shape)
                  for xstart in range(0, x.shape[0], xlen)
              ])
              for ystart in range(0, x.shape[1], ylen)
          ])
          assert np.allclose(dirt, gold)

          dirt2 = ols(x, h, [ylen, xlen], nfft)
          assert np.allclose(dirt2, gold)
          dirt3 = ols(x, h, [ylen, xlen])
          assert np.allclose(dirt3, gold)

          memx = np.lib.format.open_memmap('x.npy')
          memout = np.lib.format.open_memmap('out.npy', mode='w+', dtype=x.dtype, shape=x.shape)
          dirtmem = ols(memx, h, [ylen, xlen], out=memout)
          assert np.allclose(dirtmem, gold)
          del memout
          del memx

          dirtmem2 = np.load('out.npy')
          assert np.allclose(dirtmem2, gold)
def testPyFFTW_complex():
  nx = 21
  nh = 7
  x = np.random.randint(-30, 30, size=(nx, nx)) + 1j * np.random.randint(-30, 30, size=(nx, nx))
  h = np.random.randint(-20, 20, size=(nh, nh)) + 1j * np.random.randint(-20, 20, size=(nh, nh))
  gold = fftconvolve(x, h, mode='same')

  y = ols(x, h, rfftn=np.fft.fftn, irfftn=np.fft.ifftn)
  # Establish baseline
  assert np.allclose(gold, y)
  # Verify PyFFTW
  import pyfftw.interfaces.numpy_fft as fftw

  # standard, 1 thread
  y2 = ols(x, h, rfftn=fftw.fftn, irfftn=fftw.ifftn)
  assert np.allclose(gold, y2)

  # 2 threads
  def fft(*args, **kwargs):
    return fftw.fftn(*args, **kwargs, threads=2)

  def ifft(*args, **kwargs):
    return fftw.ifftn(*args, **kwargs, threads=2)

  y3 = ols(x, h, rfftn=fft, irfftn=ifft)
  assert np.allclose(gold, y3)
def pickBreakpointV2(response, x1, predictor):
    #print int(min(predictor))*10, int(max(predictor)+1)*10, int(max(predictor) - min(predictor) + 1)/2
    #bpChoices = geneBpChoices(min(predictor), max(predictor), 20)
    results = np.zeros((len(bpChoices)-1, 2))
    print bpChoices
    
    for i in range(len(bpChoices)-1):
        print i
        x2star = (predictor - bpChoices[i]) * np.greater(predictor, bpChoices[i])
        x1star = x1 * np.greater(predictor, bpChoices[i]) 
        tempPredictor = np.array(zip(x1, x1star, predictor, x2star))
        #fileLoc = filePath + 'temp.csv'
        #np.savetxt(fileLoc, tempPredictor, delimiter=',', fmt = '%s')
        #print tempPredictor
        tempmodel = ols.ols(response, tempPredictor,'y',['F1F2', 'F1F2star', 'dist', 'diststar'])
        results[i,0] = i
        #results[i,1] = tempmodel.sse
        results[i,1] = tempmodel.R2

    optBP = int(results[np.argmax(results, axis = 0)[1],0])
    print 'Optimal Index:', optBP
    print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, 1]

    #x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP])
    #optPredictor = np.array(zip(predictor, x2star))
    #optmodel = ols.ols(response, optPredictor,'y',['x1', 'x2'])
    x1star = x1 * np.greater(predictor, bpChoices[optBP])
    x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP])
    optPredictor = np.array(zip(x1, x1star, predictor, x2star))
    optmodel = ols.ols(response, optPredictor,'y',['F1F2', 'F1F2star', 'dist', 'diststar'])
    
    #return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star
    print results, optmodel.b
    print optmodel.summary()
    return results
def separateRegression(response, predictor, sepData, bpChoices, predictorName):
    results = np.zeros((len(bpChoices)-1, 4))

    for bpid in range(len(bpChoices)-1):
        print bpid
        responseLeft, responseRight, predictorLeft, predictorRight, dataleftIdx, datarightIdx = separateData(response, predictor, sepData, bpChoices[bpid])
        #print np.mean(responseLeft), np.mean(responseRight)
        #print predictorLeft, predictorRight
        leftmodel = ols.ols(responseLeft, predictorLeft,'y',predictorName)
        rightmodel = ols.ols(responseRight, predictorRight,'y',predictorName)
        results[bpid,0] = bpid
        results[bpid,1] = leftmodel.R2adj
        results[bpid,2] = rightmodel.R2adj
        #results[bpid,3] = 1 - (leftmodel.e.var() + rightmodel.e.var())/(leftmodel.y.var() + rightmodel.y.var())
        results[bpid,3] = calculateR2(leftmodel, rightmodel, np.mean(response))
        #results[bpid,3] = (leftmodel.R2 + rightmodel.R2)/2

    print results
        
    optBP = int(results[np.argmax(results, axis = 0)[-1],0])

    responseLeft, responseRight, predictorLeft, predictorRight, dataleftIdx, datarightIdx = separateData(response, predictor, sepData, bpChoices[optBP])
    leftmodel = ols.ols(responseLeft, predictorLeft,'y',predictorName)
    rightmodel = ols.ols(responseRight, predictorRight,'y',predictorName)
    #print calculateR2(leftmodel)
    #optmodel = ols.ols(response, optPredictor,'y',['F1F2', 'F1F2star', 'dist', 'diststar'])
    yhatL = np.dot(leftmodel.x, leftmodel.b)
    yhatR = np.dot(rightmodel.x, rightmodel.b)
    yhat = np.zeros(len(response))

    for i in range(len(yhatL)):
        yhat[dataleftIdx[i]] = yhatL[i]

    for i in range(len(yhatR)):
        yhat[datarightIdx[i]] = yhatR[i]

    yhat = np.exp(yhat)
    fileLoc = filepath + 'separateR_model2_y_hat.csv'
    #np.savetxt(fileLoc, yhat, delimiter=',', fmt = '%s')
    print 'Optimal Index:', optBP
    print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, -1]
   
    #return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star
    #print results, optmodel.b
    print '----------------------------- left model -----------------------------'
    print leftmodel.summary()
    print '----------------------------- right model -----------------------------'
    print rightmodel.summary()

    print 'Optimal Index:', optBP
    print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, -1]
   

    print 'before bp, b0 =',leftmodel.b[0], ', b1 =', leftmodel.b[1], ', bd =', leftmodel.b[2], ', MSE =', leftmodel.sse
    print 'after bp, b0 =', rightmodel.b[0], ', b1 =', rightmodel.b[1], ', bd =', rightmodel.b[2], ', MSE =', rightmodel.sse

    #calpredictedvalue(predictor, bpChoices[optBP], zip(leftmodel.b, rightmodel.b), 'exp_inoutflow_model2B.csv')
    #calconfidenceinterval(predictorLeft, predictorRight, [leftmodel.sse, rightmodel.sse], response, predictor, bpChoices[optBP], zip(leftmodel.b, rightmodel.b), 'ci_model2B.csv')
    return results, yhat
    def evaluateScore(self, chromosome):
        xMatrix = []
        yVector = []

        for metabolite in chromosome:
            knownProps = metabolite.props

            #blank, filler metabolites wont have a suspected scantime
            if 'SUSPECTED_SCANTIME' in knownProps:
                scanTime = knownProps['SUSPECTED_SCANTIME']
                xMatrixRow = []
                countThisMetabolite = True #those without the full set of properties wont be counted
                #note: we use this same mechanism for canceling out metabolites if it would be be beneficial to do so
                for prop in self.mlrPropCombo:
                    if prop in knownProps:
                        val = knownProps[prop]
                        xMatrixRow.append(val)
                    else:
                        #return False
                        countThisMetabolite = False

                #all properties extant and added to matrix row
                if countThisMetabolite == True:
                    xMatrix.append(xMatrixRow)
                    yVector.append(scanTime)

        try:
            m = ols(array(yVector), array(xMatrix))
        except:
            return 0

        rSquared = m.R2
        return rSquared
Example #7
0
def forward_stepwise(X, Y, feat_sel_num):
    train_size, feat_size = X.shape
    tX = np.mat(np.ones(train_size)).T
    feat_set = []
    selected = 0
    
    for i in range(feat_sel_num):
        tmp_idx = -1
        min_mse = 1e10
        tX = np.concatenate((tX, np.mat(np.zeros(train_size)).T), axis=1)
        for idx in range(feat_size):
            if idx in feat_set:
                continue
            else:
                tX[:, -1] = X[:, idx]
                Y_hat = np.matmul(tX, ols.ols(tX, Y))
                error = Y-Y_hat
                mse = 1/test_size * np.dot(error.T, error)[0, 0]
                if mse<min_mse:
                    tmp_idx = idx
                    min_mse = mse
            
        tX[:, -1] = X[:, tmp_idx]
        feat_set.append(tmp_idx)
            
    return feat_set
def main(events):

    data = get_event_data(events)

    if len(data) == 0:
        sys.exit("ERROR: Unable to retrieve data from Mixpanel")

    matrix = event_data_to_matrix(data, events)

    response_name = events.pop(0)
    response_data = matrix[:,0]

    predictors_data = matrix[:,1:]
    predictors_names = events

    model = ols(response_data, predictors_data, response_name, predictors_names)

    model.summary()

    ## Generate Equation

    # Gather list of coefficients so we can build our formula
    coeff_dict = dict(zip(model.x_varnm, model.b))

    equation = "%s = %.5f" % (response_name, coeff_dict['const']) # Start with constant coefficient

    for name in predictors_names:
        val = coeff_dict[name]
        equation += " + %.5f(%s)" % (val, name)

    print "Regression equation for response variable '%s'" % response_name
    print
    print equation
Example #9
0
def mlr(data, dependent_key, independent_keys=None) :
    import ols
    import numpy as np

    if independent_keys == None:
        independent_keys = ['Age', # 'CapersJones', 'Popularity',
                'C-based', 'OO', 'Compiled', 'DynamicTyping']

    all_keys = [dependent_key]
    all_keys.extend(independent_keys)

    values = extract_vals(data, all_keys)
    y = values[:,0]
    x = values[:,1:]

    try :
        model = ols.ols(y, x, dependent_key, independent_keys)
    except Exception :
        print 'Encountered a LinAlgError!'
        print 'dumping values...'
        print '\ny values'
        print y
        print '\nx values'
        print x
        raise

    model.summary()
def evaluateScore(chromosome):
    xMatrix = []
    yVector = []

    for keggID, properties in metabolites.iteritems():
        addThisItem = True
        xMatrixRow = []
        for prop in chromosome:
            if prop in properties:
                xMatrixRow.append(properties[prop])
            else:
                addThisItem = False
                break

        if addThisItem:
            measuredScanTime = properties["MEASURED_SCANTIME"]
            yVector.append(measuredScanTime)
            xMatrix.append(xMatrixRow)

    try:
        m = ols(array(yVector), array(xMatrix))
    except:
        return 0

    rSquared = m.R2
    if rSquared < 0:
        #dunno why this is happening, but when it does just disregard the result
        rSquared = 0

    if float(len(yVector)) / float(len(metabolites.values())) < 0.4:
        #disregard this combination if less than half the metabolites contain the needed properties
        rSquared = 0
    return rSquared
Example #11
0
def best_subset(trainX, trainY, idx, comb, label):
    global min_mpe, best_set
#    global trainX, trainY
    global testX, testY
    global train_size, test_size
    global beta, test_error
    
    if idx == comb.size:
#        print(comb)
        sub_trainX = np.concatenate((np.mat(np.ones(train_size)).T, trainX[:, comb]), axis=1)
        beta_hat = ols.ols(sub_trainX, trainY)
        mpe = ols.eval_mpe(sub_trainX, trainY, beta_hat)

        if mpe < min_mpe:
            min_mpe = mpe
            best_set = copy.deepcopy(comb)
            beta = copy.deepcopy(beta_hat)
#            testY_hat = np.matmul(np.concatenate((np.mat(np.ones(test_size)).T, testX[:, comb]), axis=1), beta_hat)
#            error = testY - testY_hat
#            test_error = 1/test_size * np.dot(error.T, error)[0, 0] 
    else:
        for i in range(0 if idx==0 else comb[idx-1], label.size):
            if label[i] == 0:
                comb[idx] = i
                label[i] = 1
                best_subset(trainX, trainY, idx+1, comb, label)
                label[i] = 0
 def testInner(nx, nh):
   x = np.random.randint(-30, 30, size=nx) + 1.0
   h = np.random.randint(-20, 20, size=nh) + 1.0
   gold = fftconvolve(x, h, mode='same')
   for size in [2, 3]:
     dirt = ols(x, h, [size])
     assert np.allclose(gold, dirt)
Example #13
0
    def test_strong_regression(self):
        seed = 1234567890
        np.random.seed(seed)

        # Due to the strong linearity of the regressions,
        # RuntimeWarnings may be raised when computing some
        # of the regression statistics. We can ignore these.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            for i in range(10):
                coeff = np.random.random_integers(1, 100)
                inter = np.random.random_integers(1, 100)

                x = np.random.rand(10)
                y = np.array([coeff * i + inter for i in x])

                reg = ols(x, y)

                expected = np.array([inter, coeff])
                diff = abs(expected - reg.b)

                self.assertTrue(np.all(diff < EPSILON))
                self.assertTrue(np.all(reg.p < EPSILON))
                self.assertTrue(np.all(abs(reg.t) > MAXINT))
Example #14
0
    def getCurrentRSquared(self):
        try:
            m = ols(array(self.yVector), array(self.xMatrix))
        except:
            return 0

        rSquared = m.R2
        return rSquared
def regression():
    """
    Multiple varibale regression takes place
    """
    x, y = variables()
    cachemodel = ols.ols(y, x, "Hit", ["timeper", "advance"])
    cachemodel.p
    cachemodel.summary()
def simpleRegression(response, predictor, predictorName):
    model = ols.ols(response, predictor,'y',predictorName)
    #model = ols.ols(response, predictor)
    print model.summary()
    print model.b
    print predictor
    #print 'b0 =',model.b[0], ', b1 =', model.b[1], ', bd =', model.b[2], ', MSE =', model.sse
    return np.dot(model.x, model.b)
Example #17
0
 def tryMLR(self):
     try:
         self.m = ols(array(self.yVector), array(self.xMatrix)) #, y_varnm = 'y', x_varnm = ['x1','x2','x3','x4','x5','x6','x7'])
         #self.m.summary()
         return True
     except:
         #print("error")
         return False
Example #18
0
def getTrend(t, y, mask = None):
    """ Find the trend in a time-series y
        With times vector t """
    if mask:
        t = applyMask1D(t, mask)
        y = applyMask1D(y, mask)
    assert(t.shape[0] == y.shape[0])
    mymodel = ols.ols(y,t,'y',['t'])
    return mymodel.b  # return coefficients
Example #19
0
File: stat601.py Project: di/school
def multiple_regression(y_array, x_array, round_to=5):
    # Where the x-values is a list of multiple x-coefficients
    y = np.array(y_array)
    x = np.transpose(np.array(x_array))
    model = ols.ols(y, x, 'y', ['x' + str(i+1) for i,x in enumerate(x_array)])
    model.summary()
    for i,b in enumerate(model.b):
        print "b" + str(i) + " = " + str(round(b,round_to))
    return [round(b, round_to) for b in model.b]
def calccoeff(grndata, reddata) :
    xGrn=grndata[:,1]
    yGrn=grndata[:,2]
    xRed=reddata[:,1]
    yRed=reddata[:,2]

    # Part I: Calculating Grn to Red transformation coefficients      
    # The function chosen is a third order polynomial function

    Grn=c_[xGrn, xGrn**2, xGrn**3, yGrn, yGrn**2, yGrn**3]

    mymodel = ols.ols(xRed,Grn,y_varnm='xRed',x_varnm=['x','x^2','x^3','y','y^2','y^3'])
    coeffGtoR_x = mymodel.b
    mymodel.summary()

    mymodel = ols.ols(yRed,Grn,y_varnm='xRed',x_varnm=['x','x^2','x^3','y','y^2','y^3'])
    coeffGtoR_y = mymodel.b
    mymodel.summary()

    coeffGtoR = hstack((coeffGtoR_x,coeffGtoR_y))

    GrnPlus=c_[repeat(1,Grn.shape[0]), Grn]
    errGtoR=sqrt((dot(GrnPlus,coeffGtoR_x)-xRed)**2+(dot(GrnPlus,coeffGtoR_y)-yRed)**2)

    # Part II: Calculating Red to Grn transformation coefficients
    # The function chosen is a third order polynomial function

    Red=c_[xRed, xRed**2, xRed**3, yRed, yRed**2, yRed**3]

    mymodel = ols.ols(xGrn, Red, y_varnm='Grn',x_varnm=['x','x^2','x^3','y','y^2','y^3'])
    coeffRtoG_x = mymodel.b
    mymodel.summary()

    mymodel = ols.ols(yGrn, Red, y_varnm='Grn',x_varnm=['x','x^2','x^3','y','y^2','y^3'])
    coeffRtoG_y = mymodel.b
    mymodel.summary()

    coeffRtoG = hstack((coeffRtoG_x,coeffRtoG_y))

    RedPlus=c_[repeat(1,Red.shape[0]), Red]
    errRtoG=sqrt((dot(RedPlus,coeffRtoG_x)-xGrn)**2+(dot(RedPlus,coeffRtoG_y)-yGrn)**2)
    
    return (coeffGtoR, coeffRtoG, errGtoR, errRtoG)
def pickBreakpoint(response, predictor, bpvarible, bpChoices, predictorName):
    results = np.zeros((len(bpChoices) - 1, 2))
    print bpChoices
    bpvarible = np.array(bpvarible)
    predictor = np.array(predictor)
    predictor_t = np.transpose(predictor)
    predictorName = np.append(predictorName, "bpvarible")

    for i in range(len(bpChoices) - 1):
        x2star = (bpvarible - bpChoices[i]) * np.greater(bpvarible, bpChoices[i])
        tempPredictor = np.append(predictor_t, x2star)
        tempPredictor.shape = (len(predictorName), -1)
        tempPredictor = np.transpose(tempPredictor)
        # print tempPredictor
        tempmodel = ols.ols(response, tempPredictor, "y", predictorName)
        results[i, 0] = i
        results[i, 1] = tempmodel.R2
    # print results

    optBP = int(results[np.argmax(results, axis=0)[1], 0])
    print "Optimal Index:", optBP
    print "Optimal changepoint: ", bpChoices[optBP], " exp value: ", np.exp(bpChoices[optBP]), " with R2 = ", results[
        optBP, 1
    ]

    x2star = (bpvarible - bpChoices[optBP]) * np.greater(bpvarible, bpChoices[optBP])
    tempPredictor = np.append(predictor_t, x2star)
    tempPredictor.shape = (len(predictorName), -1)
    optPredictor = np.transpose(tempPredictor)
    optmodel = ols.ols(response, optPredictor, "y", predictorName)
    # optmodel = ols.ols(response, optPredictor,'y',predictorName)

    y_hat = np.dot(optmodel.x, optmodel.b)
    # fileLoc = filepath + 'pieceR_model2_y_hat.csv'
    # np.savetxt(fileLoc, y_hat, delimiter=',', fmt = '%s')

    print optmodel.summary()
    print "MSE =", optmodel.sse
    # print 'before bp, b0 =',optmodel.b[0], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2]
    # print 'after bp, b0 =', optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2] + optmodel.b[3]

    # calpredictedvalue(zip(x1, predictor), bpChoices[optBP], zip(optmodel.b, [optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], optmodel.b[1], optmodel.b[2] + optmodel.b[3]]), 'exp_inoutflow_model2A.csv')
    return results, y_hat, optmodel, bpChoices[optBP]
Example #22
0
    def test_str_object(self):
        seed = 1234567890
        np.random.seed(seed)

        x = np.random.rand(10)
        y = np.random.rand(10)

        reg = ols(x, y)
        expected = "OLS Regression on 10 Observations"
        self.assertTrue(str(reg) == expected, "Strings don't match")
def pickBreakpoint(response, x1, predictor):
    #print int(min(predictor))*10, int(max(predictor)+1)*10, int(max(predictor) - min(predictor) + 1)/2
    #bpChoices = geneBpChoices(min(predictor), max(predictor), 20)
    results = np.zeros((len(bpChoices)-1, 2))
    print bpChoices
    predictor = np.array(predictor)
    
    for i in range(len(bpChoices)-1):
        #print i
        #print type((predictor - bpChoices[i]))
        x2star = (predictor - bpChoices[i]) * np.greater(predictor, bpChoices[i])
        tempPredictor = np.array(zip(x1, predictor, x2star))
        #fileLoc = filePath + 'temp.csv'
        #np.savetxt(fileLoc, tempPredictor, delimiter=',', fmt = '%s')
        tempmodel = ols.ols(response, tempPredictor,'y',['F1F2', 'dist', 'diststar'])
        results[i,0] = i
        #results[i,1] = tempmodel.sse
        results[i,1] = tempmodel.R2
    print results

    optBP = int(results[np.argmax(results, axis = 0)[1],0])
    print 'Optimal Index:', optBP
    print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, 1]

    #x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP])
    #optPredictor = np.array(zip(predictor, x2star))
    #optmodel = ols.ols(response, optPredictor,'y',['x1', 'x2'])
    x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP])
    optPredictor = np.array(zip(x1, predictor, x2star))
    optmodel = ols.ols(response, optPredictor,'y',['F1F2', 'dist', 'diststar'])
    #print optmodel.b
    #return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star
    y_hat = np.dot(optmodel.x, optmodel.b)
    fileLoc = filepath + 'pieceR_model2_y_hat.csv'
    #np.savetxt(fileLoc, y_hat, delimiter=',', fmt = '%s')
    
    print optmodel.summary()
    print 'MSE =', optmodel.sse
    print 'before bp, b0 =',optmodel.b[0], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2]
    print 'after bp, b0 =', optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2] + optmodel.b[3]

    calpredictedvalue(zip(x1, predictor), bpChoices[optBP], zip(optmodel.b, [optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], optmodel.b[1], optmodel.b[2] + optmodel.b[3]]), 'exp_inoutflow_model2A.csv')
    return results, y_hat
def pickBreakpoint(response, predictor):
    bpChoices = np.sort(predictor)
    results = np.zeros((len(predictor)-1, 2))
    
    for i in range(len(predictor)-1):
        x2star = (predictor - bpChoices[i]) * np.greater(predictor, bpChoices[i])   
        tempPredictor = np.array(zip(predictor, x2star))
        tempmodel = ols.ols(response, tempPredictor,'y',['x1', 'x2'])
        results[i,0] = i
        results[i,1] = tempmodel.sse

    optBP = int(results[np.argmin(results, axis = 0)[1],0])
    print optBP, 'Optimal changepoint: ', bpChoices[optBP], ' with SSE = ', results[optBP, 1]

    x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP])
    optPredictor = np.array(zip(predictor, x2star))
    optmodel = ols.ols(response, optPredictor,'y',['x1', 'x2'])
    
    return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star
def regression():
    """
    Multiple varibale regression takes place
    """
    x, y = variables()
    print y
    print x
    cachemodel = ols.ols(y,x,'Hit',['advanceperiod'])
    print cachemodel.p
    cachemodel.summary()
def regress_basic(db, tech_metric, pad_zeros=True, max_year=2012):
  datasets = get_basic_stats(db, tech_metric, pad_zeros=pad_zeros, max_year=max_year)
  GDP_8006 = scipy.array(datasets[0])
  independent_variables = scipy.array(datasets[1:])
  independent_variables = scipy.transpose(independent_variables)

  names = get_names(tech_metric)
  model = ols.ols(GDP_8006,independent_variables,
                  'GDP_8006',names)
  print "# Countries: %d" % len(db.select_countries_to_use())
  model.summary()
def testReflect():
  nx = 21
  nh = 7
  x = np.random.randint(-30, 30, size=(nx, nx)) + 1.0
  h = np.random.randint(-20, 20, size=(nh, nh)) + 1.0
  y = ols(x, h)
  gold = fftconvolve(x, h, mode='same')
  assert np.allclose(gold, y)

  px, py = 24, 28
  x0pad = np.pad(x, [(py, py), (px, px)], mode='constant')
  y0pad = ols(x0pad, h)
  assert np.allclose(y, y0pad[py:py + nx, px:px + nx])

  xpadRef = np.pad(x, [(py, py), (px, px)], mode='reflect')
  ypadRef = ols(xpadRef, h)
  for sizex in [2, 3, 4, 7, 8, 9, 10]:
    for sizey in [2, 3, 4, 7, 8, 9, 10]:
      yRef = ols(x, h, [sizey, sizex], mode='reflect')
      assert np.allclose(yRef, ypadRef[py:py + nx, px:px + nx])
Example #28
0
    def test_no_x_names_multi_x(self):
        seed = 1234567890
        np.random.seed(seed)

        x = np.random.rand(10, 2)
        y = np.random.rand(10)

        reg = ols(x, y)

        expectedNames = ['const', 'x1', 'x2']
        self.assertEqual(reg.x_varnm, expectedNames)
Example #29
0
def regressionAnalysis(percentRedds,
                       varArray,
                       nameArray,
                       siteRange=range(0, 3)):
    '''
        * percentRedds -> 2D array of sites vs. percent of redds constructed at
                          site.
                          Ex: [ [0.2,0.4,0.1,...], [0.1,0.6,0.2], ... ]
        * varArray -> 2D array of types of variables vs. 2D array of sites vs.
                      data of variable at site.
                      Ex: [ [ [13,26,...], ... ], .... ]
        * nameArray -> Array of names of varaibles defiend in `varArray`
                       Ex:
        * siteRange -> Range of what sites to use
                       Ex: range(0,2) or [2,3]

    '''
    # constructed redd percentage ([]->asarray)
    y = [asarray(arr) for arr in percentRedds]
    # empty list of "n-dimensions" [var1,var2,var3,...]
    nDim = len(varArray)
    x = [empty([len(arr), nDim]) for arr in y]

    # #use the total range of sites
    # if siteRange ="all":
    #     siteRange = range(0,len(percentRedds))
    # #use up to max site number
    # else:
    #     siteRange = range(0,siteRange)

    #perform the regression for all sites available
    for i in siteRange:
        j = 0
        # get just the variables for site `i` from `varArray`
        tempVarArray = []
        for vars in varArray:
            # save variable array for specific site `i`
            tempVarArray += [vars[i]]
        # create zipped array of variables in site `i`
        zipVarArray = zip(*tempVarArray)
        # iterate over each year in site
        for varTuple in zipVarArray:  #zipVarArray -> zip(var1[i],var2[i],...)
            # convert tuple of vars to an array of vars
            xTemp = [var for var in varTuple]
            x[i][j] = xTemp
            j += 1
        # use `ols` build for linear regression
        # possibly better to do different regression, like logistic?
        model = ols.ols(y[i], x[i], 'y:Precent of Redds', nameArray)
        # return coefficient p-values
        names = '[coeff,' + ','.join(nameArray) + ']'
        print names + ':\n', model.p
        # print results
        print model.summary()
def fix_badsnow(snow,mask):
    """
    find regions marked "bad" and fill them in using a linear regression
    """
    # from numpy.linalg import lstsq
    from ols import ols
    
    # find times when all of the snow data are "good"
    # allgood=where(np.min(snow,axis=1) >= 0)[0]
    
    # loop over all snow columns fixing bad data
    for i in range(len(snow[0,:])):
        # find bad data
        bad=where(mask[:,i] == 0)[0]
        # if there is any bad data fix it
        if len(bad) >0:
            # cursnow is data to be fixed
            cursnow=snow[:,i]
            # othersnow is data to use to fix it
            othersnow=np.delete(snow,i,1)
            othermask=np.delete(mask,i,1)
            
            for j in range(len(bad)):
                otherbadsnows=where(othermask[bad[j],:]==0)[0]
                if len(otherbadsnows)<len(othersnow[0,:]):
                    if len(otherbadsnows)>0:
                        useothersnow=np.delete(othersnow,otherbadsnows,1)
                        useothermask=np.delete(othermask,otherbadsnows,1)
                    else: 
                        useothersnow=othersnow
                        useothermask=othermask
                    
                    # print("available data="+str(useothersnow.shape))
                    # find times when all of the other snow data are "good"
                    allgood=where((np.min(useothermask,axis=1) > 0) & (cursnow>0))[0]
                    # print("good data points="+str(len(allgood)))
                    # perform linear regression on the remaining data using the last and next 10 days of data
                    usepoints= (24*4*10l)
                    if len(allgood)>24:
                        nearestpoints=where(np.abs(allgood-bad[j])<usepoints)[0]
                        if len(nearestpoints)<400:
                            nearestpoints=where(np.abs(allgood-bad[j])<usepoints*5)[0]
                            # print("using more points")
                        if len(nearestpoints)>=500:
                            line=ols(cursnow[allgood[nearestpoints]],useothersnow[allgood[nearestpoints],:])
                            snow[bad[j],i]=line.b[0]+np.sum(useothersnow[bad[j],:]*line.b[1:])
                            # print("Enough points: Station "+str(i)+"  time: "+str(bad[j]))
                        else:
                            # print("Station "+str(i)+"  time: "+str(bad[j])+"  oldval:"+str(snow[bad[j],i])+"  Newval:"+str(snow[bad[j]-1,i]))
                            snow[bad[j],i]=snow[bad[j]-1,i]
                    else:
                        snow[bad[j],i]=snow[bad[j]-1,i]
def regressionAnalysis( percentRedds, varArray, nameArray, siteRange=range(0,3) ):
    '''
        * percentRedds -> 2D array of sites vs. percent of redds constructed at
                          site.
                          Ex: [ [0.2,0.4,0.1,...], [0.1,0.6,0.2], ... ]
        * varArray -> 2D array of types of variables vs. 2D array of sites vs.
                      data of variable at site.
                      Ex: [ [ [13,26,...], ... ], .... ]
        * nameArray -> Array of names of varaibles defiend in `varArray`
                       Ex:
        * siteRange -> Range of what sites to use
                       Ex: range(0,2) or [2,3]

    '''
    # constructed redd percentage ([]->asarray)
    y = [ asarray(arr) for arr in percentRedds ]
    # empty list of "n-dimensions" [var1,var2,var3,...]
    nDim = len(varArray)
    x = [ empty( [len(arr),nDim] ) for arr in y ]

    # #use the total range of sites
    # if siteRange ="all":
    #     siteRange = range(0,len(percentRedds))
    # #use up to max site number
    # else:
    #     siteRange = range(0,siteRange)


    #perform the regression for all sites available
    for i in siteRange:
        j = 0
        # get just the variables for site `i` from `varArray`
        tempVarArray = []
        for vars in varArray:
            # save variable array for specific site `i`
            tempVarArray += [vars[i]]
        # create zipped array of variables in site `i`
        zipVarArray = zip(*tempVarArray)
        # iterate over each year in site
        for varTuple in zipVarArray: #zipVarArray -> zip(var1[i],var2[i],...)
            # convert tuple of vars to an array of vars
            xTemp = [ var for var in varTuple ]
            x[i][j] = xTemp
            j += 1
        # use `ols` build for linear regression
        # possibly better to do different regression, like logistic?
        model = ols.ols(y[i],x[i],'y:Precent of Redds',nameArray)
        # return coefficient p-values
        names = '[coeff,' + ','.join(nameArray) + ']'
        print names+':\n', model.p
        # print results
        print model.summary()
def calc(smc,ts,plot=None,force=None):
	n=smc.size
	dummy=np.arange(n)
	
	smc_init=ols(smc,dummy)
	smctest=smc-(smc_init.b[0]+smc_init.b[1]*dummy)
	
	ts_init=ols(ts,dummy)
	tstest=ts-(ts_init.b[0]+ts_init.b[1]*dummy)
	
	smc_ts=ols(smctest,tstest)
	
	if plot!=None:
		plt.clf()
		plt.plot(tstest/1000)
		plt.plot(smctest)
		if force==None:
			plt.plot(smctest-(tstest*smc_ts.b[1]))
		else:
			plt.plot(smctest-(tstest*force))
		print(smc_ts.R2)
	return smc_ts.b
def build_model(y_gen, x_gen, results):

	series = make_regression_series(y_gen, x_gen, results)
	y,x = make_x_y(series)

	m = ols.ols(y,x,"returns", ["Days out", "Days out * VIX", "Days out * Vix^2", "Days out * Prev Period", "Days out^2",
		                    "Wingspan", "Wingspan^2",
				    "Body Spread", "Body Spread^2", "Body Spread * Lag",
				    "Days out * Body Spread", "Wingspan * Days Out", 
				    "VIX", "VIX^2", "VIX 12-1 Month Growth", "VIX 36-12 month growth",
				    "Is Earnings?", "Is December?", "Expiration Year (CONTROL)"])
	m.summary()
	return m
Example #34
0
def chow(X,Y, X1, Y1, X2, Y2, alpha = 0.05):
	"""
	Performs a chow test.
	Split input matrix and output vector into two
	using specified breakpoint.
	  X - independent variables matrix
	  Y - dependent variable vector
	  breakpoint -index to split.
	  alpha is significance level
	"""
	if isinstance(X[0],int) or isinstance(X[0], float):
		k = 1
	else:
		k = len(X[0])
	k = k + 1
	n = len(X)

	# Perform separate three least squares.
	allfit   = ols.ols(Y,X)
	lowerfit = ols.ols(Y1, X1)
	upperfit = ols.ols(Y2, X2)

	rss = allfit.rss
	rss1 = lowerfit.rss
	rss2 = upperfit.rss

	df1 = k
	df2 = n - 2 *k

	rss_u = (rss1 + rss2)
	num = (rss - rss_u) /float(df1)
	den = rss_u / df2


	chow_ratio = num/den    
	Fcrit = scipy.stats.f.ppf(1 -0.05, df1, df2)
	p = scipy.stats.f.pdf(chow_ratio, df1, df2)
	return (chow_ratio, p, Fcrit, df1, df2, rss, rss1, rss2)
Example #35
0
def backward_stepwise(X, Y, feat_sel_num):
    sample_size, feat_size = X.shape
    idx_set = [i for i in range(feat_size)]
    
    while len(idx_set) > feat_sel_num:
        tX = np.concatenate((np.mat(np.ones(sample_size)).T, X[:, idx_set]), axis=1)
        beta_hat = ols.ols(tX, Y)
        beta_std_dev = ols.std_dev(tX, 1)
        Z_score = np.true_divide(np.array(beta_hat), np.array(beta_std_dev)).tolist()
        print(Z_score)
        print()
        idx = Z_score.index(min(Z_score)) - 1
        del idx_set[idx]

    return idx_set
Example #36
0
    def test_weak_regression(self):
        seed = 1234567890
        np.random.seed(seed)

        alpha = 0.1
        tStatMax = 1

        for i in range(10):
            x = np.random.random_integers(100, 200, 10)
            y = np.array([i * (-1)**index for index, i in enumerate(x)])

            reg = ols(x, y)

            self.assertTrue(np.all(reg.p > alpha))
            self.assertTrue(np.all(abs(reg.t) < tStatMax))
Example #37
0
def build_model(y_gen, x_gen, results):

    series = make_regression_series(y_gen, x_gen, results)
    y, x = make_x_y(series)

    m = ols.ols(y, x, "returns", [
        "Days out", "Days out * VIX", "Days out * Vix^2",
        "Days out * Prev Period", "Days out^2", "Wingspan", "Wingspan^2",
        "Body Spread", "Body Spread^2", "Body Spread * Lag",
        "Days out * Body Spread", "Wingspan * Days Out", "VIX", "VIX^2",
        "VIX 12-1 Month Growth", "VIX 36-12 month growth", "Is Earnings?",
        "Is December?", "Expiration Year (CONTROL)"
    ])
    m.summary()
    return m
Example #38
0
    def test_beta(self):
        # Generate fake data
        K = 10
        N = 100000
        mu = 5
        sigma = 5
        beta = np.random.randint(0, 5, (K, 1))
        X = np.random.normal(mu, sigma, (N, K))
        e = np.random.normal(0, 1, (N, 1))
        y = X @ beta + e

        # Find test beta_hat is close to beta
        tol = .01
        beta_hat, sigma_hat = ols(y, X)
        abs_diff = np.all(abs(beta - beta_hat) < .01)
        self.assertTrue(abs_diff)
Example #39
0
	def generate_profile(self,games):
		x = []
		y = []
		for g in games:
			for p in g.drives:
				if p.team == self.team_name:
					for play in p.plays:
						if play.down != 0:
							decision = [play.down,play.yards_togo,50-int(str(play.yardline)),self.determine_play_type(play.desc)]
							if decision[3]:
								x.append(decision[:3])
								y.append(decision[3])
		y = numpy.array(y)
		x = numpy.array(x)
		mymodel = ols.ols(y,x,'Play Call',['down','togo','distance'])
		return map(str,[mymodel.b[0],mymodel.b[1],mymodel.b[2],mymodel.b[3]])
Example #40
0
 def __init__(self, calcres, *names):
     if len(names) == 1:
         if isinstance(names[0], str):
             names = names[0].split(' ')
         elif isinstance(names[0], list):
             names = names[0]
         else:
             assert False, 'Unknown argument type'
     assert len(names) >= 2, 'Must have at least one X variable and one Y variable'
     Xnames = names[0:-1]
     Yname = names[-1]
     print 'FITTING' + str(names)
     # grab X and Y from calcres
     X = numpy.ma.masked_all((len(calcres.universe), len(calcres.dates), len(Xnames)))
     for i in range(len(Xnames)):
         try:
             X[:,:,i] = calcres.V[:,:,calcres.names_index[Xnames[i]]].copy()
         except KeyError:
             pass
     Y = numpy.ma.masked_all((len(calcres.universe), len(calcres.dates)))
     try:
         Y = calcres.V[:, :, calcres.names_index[Yname]].copy()
     except KeyError:
         pass
     # print counts
     for i in range(len(Xnames)):
         print 'X' + str(i), Xnames[i], 'count:', X[:, :, i].count()
     print 'Y ', Yname, 'count:', Y.count()
     mask = Y.mask
     for i in range(len(Xnames)):
         mask = mask | X[:, :, i].squeeze().mask
     Y.mask = mask
     Xc = numpy.zeros([numpy.sum(mask==False), len(Xnames)])
     #X.mask = numpy.tile(mask, (1, len(Xnames)))
     for i in range(len(Xnames)):
         X[:, :, i].mask = mask
         Xc[:, i] = X[:, :, i].compressed().squeeze()
     Yc = Y.compressed()
     try:
         self.m = ols(Yc, Xc, Yname, Xnames)
         self.summary()
     except:
         pass
     self.calcres = calcres
     self.X = X
     self.Y = Y
     self.Xnames = Xnames
Example #41
0
    def printSummary(self):
        self.m = ols(array(self.yVector), array(self.xMatrix))

        b = self.m.b
        summary = {}

        print("")
        print("summary of all ambiguous metabolites:")
        for ambiguityID, ambiguityProps in self.ambiguities.iteritems():
            metabolites = ambiguityProps["candidates"]

            for keggID, knownProps in metabolites.iteritems():
                addToSummary = True
                scanTime = knownProps['SUSPECTED_SCANTIME']
                lookedUpPropArr = []
                for prop in self.mlrPropCombo:
                    if prop in knownProps:
                        val = knownProps[prop]
                        lookedUpPropArr.append(val)
                    else:
                        addToSummary = False
                        break
                if addToSummary:
                    yPred = b[0]
                    for propIndex in range(0, len(lookedUpPropArr)):
                        propVal = lookedUpPropArr[propIndex]
                        propCoefficient = b[propIndex + 1]
                        yPred += propCoefficient * propVal

                    metaboliteSummary = {
                        "scan": scanTime,
                        "prediction": yPred,
                        "error": (math.fabs(scanTime - yPred) / scanTime)
                    }
                    if keggID in chosenKeggIDs and chosenKeggIDs[keggID] == scanTime:
                        metaboliteSummary["chosen"] = True

                    summaryKey = "ambiguity" + str(ambiguityID)
                    if summaryKey not in summary:
                        summary[summaryKey] = []

                    summary[summaryKey].append(metaboliteSummary)


        pp = pprint.PrettyPrinter()
        pp.pprint(summary)
def fix_badsmc(smc):
    # from numpy.linalg import lstsq
    from ols import ols
    smc_standard=smc.copy()
    # find times when all of the smc data are "good"
    allgood=where(np.min(smc,axis=1) >= 0)[0]
    while len(allgood)<(24*4*15):
        sumall=np.sum(smc_standard,axis=0)
        worse=np.min(sumall)
        betterprobes=np.where(sumall>sumall[worse])
        if len(betterprobes[0])<2:
            return
        smc_standard=smc_standard[:,betterprobes]
        allgood=where(np.min(smc,axis=1) >= 0)[0]
        
    
    # loop over all smc columns fixing bad data
    for i in range(len(smc[0,:])):
        # find bad data
        bad=where(smc[:,i] < 0)[0]
        # if there is any bad data fix it
        if len(bad) >0:
            print(len(allgood))
            # cursmc is data to be fixed
            cursmc=smc[:,i]
            # othersmc is data to use to fix it
            othersmc=np.delete(smc,i,1)
            # figure out which columns have bad data that overlap with cursmc's bad data
            otherbadsmcs=where(np.min(othersmc[bad,:], axis=0) < 0)[0]
            # if there are columns with bad data in this period, remove them
            if len(otherbadsmcs)>0:
                othersmc=np.delete(othersmc,otherbadsmcs,1)
                
            # perform linear regression on the remaining data using only the last 15days? of data
            # usepoints= (-1*24*4*15)
            usepoints=0
            if len(allgood)>50:
                print(len(allgood))
                line=ols(cursmc[allgood[usepoints:]],othersmc[allgood[usepoints:],:])
                # start with the offset
                smc[bad,i]=line.b[0]
                # loop over 
                for thatsmc in range(len(othersmc[0,:])):
                    smc[bad,i]+=othersmc[bad,thatsmc]*line.b[thatsmc+1]
Example #43
0
def asian_path_control(K):
    T=1.; J=252; dt=T/J
    mu=0.1; sigma=0.3; r=0.05
    s0=100.
    sim = 500
    level = 0.05
    path = [s0 * np.exp(np.linspace(dt,T,J)*(r-sigma**2/2) +
                   sigma*brownian_path(T,J)) for i in range(sim)]
    pricey = np.zeros(sim)
    pricex = np.zeros(sim)

    ii = 0
    for s in path:
        pricey[ii]=np.exp(-r*T)*np.maximum(0,np.mean(s)-K)
        pricex[ii]=np.exp(-r*T)*np.maximum(0,s[-1]-K)
        ii += 1
    m = ols(pricey,pricex)
    print m.b
    print m.R2
    sim = 9500
    path = [s0 * np.exp(np.linspace(dt,T,J)*(r-sigma**2/2) +
                   sigma*brownian_path(T,J)) for i in range(sim)]
    pricey = np.zeros(sim)
    pricex = np.zeros(sim)
    price = np.zeros(sim)    
    ii = 0
    z1 = (np.log(s0/K)+(r+0.5*sigma**2)*T)/(sigma*np.sqrt(T))
    z2 = z1-sigma*np.sqrt(T)
    x_mean = s0*sp.stats.norm.cdf(z1) - np.exp(-r*T)*K*sp.stats.norm.cdf(z2)
    for s in path:
        pricey[ii]=np.exp(-r*T)*np.maximum(0,np.mean(s)-K)
        pricex[ii]=np.exp(-r*T)*np.maximum(0,s[-1]-K)
        price[ii] = pricey[ii]-m.b[-1]*(pricex[ii]-x_mean)
        ii += 1
    
    plt.figure(2)
    plt.hist(price)
    results = np.sort(price)
    ci = (results[int(sim * (1 - level))],
                results[int(sim * level)])
    return np.mean(price),np.std(price),ci
def differentiated_regression(db, tech_metric, pad_zeros=True, max_year=2012):
  # We also divided the sample into developed and develop-
  # ing economies (the latter including both middle-income and low-income
  # countries according to the World Bank country classifications),
  # created dummy variables, and generated the new variables TELEPENH and
  # TELEPENL (the product of the dummy variables and the telecommunications
  # penetration variables)
  datasets = get_basic_stats(db, tech_metric, pad_zeros=pad_zeros, max_year=max_year)
  high_income = select_classification(db, "high")
  datasets.append(high_income)
  # TODO(cs): tried adding in low income too, but that totally throws off the
  #           results
  GDP_8006 = scipy.array(datasets[0])
  independent_variables = scipy.array(datasets[1:])
  independent_variables = scipy.transpose(independent_variables)

  names = get_names(tech_metric)
  model = ols.ols(GDP_8006,independent_variables,
                  'GDP_8006',names)
  print "# Countries: %d" % len(db.select_countries_to_use())
  model.summary()
Example #45
0
    def removeHighErrorCandidates(self):
        self.m = ols(array(self.yVector), array(self.xMatrix))
        quantity = len(self.inUseCandidates)
        i = 0
        while i < quantity:
            candidate = self.inUseCandidates[i]
            b = self.m.b
            validCandidate = True
            props = candidate.props
            scanID = props["SUSPECTED_SCANTIME"]
            lookedUpPropArr = []
            for prop in self.mlrPropCombo:
                if prop in props:
                    val = props[prop]
                    lookedUpPropArr.append(val)
                else:
                    validCandidate = False
                    del self.inUseCandidates[i]
                    del self.yVector[i]
                    del self.xMatrix[i]
                    quantity -= 1
                    i -= 1
                    break

            if validCandidate:
                predScanID = b[0]
                for propIndex in range(0, len(lookedUpPropArr)):
                    propVal = lookedUpPropArr[propIndex]
                    propCoefficient = b[propIndex + 1]
                    predScanID += propCoefficient * propVal
            
                errorPct = fabs(predScanID - scanID) / float(scanID)
                if errorPct > self.maxScanIDPredictionError:
                    del self.inUseCandidates[i]
                    del self.yVector[i]
                    del self.xMatrix[i]
                    quantity -= 1
                    i -= 1

            i += 1
Example #46
0
def main():
    r = re.compile(r'\s{1,}')
    data  = []
    label = []
    for line in sys.stdin:
        d = map(float, r.split(line.rstrip()))
        if not label:
            for i in range(len(d) - 1):
                label.append('x%d' % (i + 1))
        elif 1 < len(d) and len(label) != len(d) - 1:
            raise BaseException
        data.append(d)

    ary = numpy.array(data)
    y = ary[:,0]
    #x = ary[:,1:3]
    #x = ary[:,1:]
    #x = ary[:,1:6]
    #x = ary[:,1:4]
    x = ary[:,1:]
    model = ols.ols(y, x, 'y', label)
    model.summary()
Example #47
0
    def findSol(self, windowSize):
        y = np.array(self.prices)
        x = np.vstack([self.ema, self.rsi, self.macd]).T

        mymodel = ols.ols(y, x, 'price', ['EMA', 'RSI', 'MACD'])

        labels = ['constant', 'EMA', 'RSI', 'MACD']

        equation = "price(t+1) = "

        stats = {}

        for i in range(len(mymodel.b)):
            l = labels[i]
            c = mymodel.b[i]
            stats[l] = c
            if l == 'constant':
                equation += str(c) + " + "
            else:
                equation += str(c) + "*" + l + "(t) + "
        equation = equation[:-2]

        return stats
    def findSol(self, windowSize):
        prices, ema, rsi, macd = self.tf.getTimewindow(windowSize)
        y = np.array(prices)
        x = np.vstack([ema]).T

        mymodel = ols.ols(y, x, 'price', ['EMA'])

        labels = ['constant', 'EMA']

        equation = "price(t+1) = "

        stats = {}

        for i in range(len(mymodel.b)):
            l = labels[i]
            c = mymodel.b[i]
            stats[l] = c
            if l == 'constant':
                equation += str(c) + " + "
            else:
                equation += str(c) + "*" + l + "(t) + "
        equation = equation[:-2]

        return stats
    Cl1 = numpy.append(Cl1, DiffLnAnnMeansCl1)
    Cl4 = numpy.append(Cl4, DiffLnAnnMeansCl4)
    Cl5 = numpy.append(Cl5, DiffLnAnnMeansCl5)
    Cl1prev = numpy.append(Cl1prev, DiffLnAnnMeansCl1prev)
    Cl4prev = numpy.append(Cl4prev, DiffLnAnnMeansCl4prev)
    Cl5prev = numpy.append(Cl5prev, DiffLnAnnMeansCl5prev)

UnitsVec = numpy.repeat(1, numpy.size(BAprev))

import statsmodels.api as sm
import ols

X = pandas.DataFrame({
    'BA': BA,
    'BAprev': BAprev,
    'BAprev2': BAprev2,
    'BAprev3': BAprev3,
    'BAnext': BAnext,
    'Cl1prev': Cl1prev,
    'Cl4prev': Cl4prev,
    'Cl5prev': Cl5prev,
    'Cl1': Cl1,
    'Cl4': Cl4,
    'Cl5': Cl5,
})

reg = ols.ols(formula='BAnext ~ BAprev', data=X).fit()
print(reg.summary())

plt.scatter(BAnext, BAprev)
Example #50
0
beta_list = []
sigma_list = []
mu_list = []
proportion_list = []

for s in range(nsim):

    mu_draw = 0
    sigma_draw = 1
    beta_draw = np.random.random((nParams, 1))

    #Generate different Ys
    E = np.random.normal(mu_draw, sigma_draw, nObs).reshape(nObs, 1)
    Y = (XX @ beta_draw).reshape(nObs, 1) + E

    #Estimate betas and sigmas:
    beta, se, vcv = ols.ols(Y, XX)

    beta_hat_draw = np.random.multivariate_normal(beta.reshape(nParams), vcv,
                                                  (nsim, 1))

    proportion = np.mean(
        beta_draw.reshape(1, nParams) > beta_hat_draw.reshape(nsim, nParams),
        0)

    proportion_list.append(proportion)

stuff = np.array(proportion_list)

plt.hist(stuff[:, 0])
Example #51
0
# <codecell>


#!python
import ols

# <markdowncell>

# After importing the class you can estimate a model by passing data to it
# as follows
# 
# <codecell>


#!python
mymodel = ols.ols(y,x,y_varnm,x_varnm)

# <markdowncell>

# where y is an array with data for the dependent variable, x contains the
# independent variables, y\_varnm, is a string with the variable label for
# the dependent variable, and x\_varnm is a list of variable labels for
# the independent variables. Note: An intercept term and variable label is
# automatically added to the model.
# 
# Example Usage
# -------------
# 
# <codecell>

Example #52
0
ypll_arr, measures_arr = get_arrs(dependent_cols, independent_cols)
print ypll_arr.shape
print measures_arr[:, 1].shape

import matplotlib.pyplot as plt

fig = plt.figure(figsize=(6, 10))

subplot = fig.add_subplot(411)
subplot.scatter(measures_arr[:, 6], ypll_arr)
subplot.set_title("ypll vs. % of population with diabetes")

subplot = fig.add_subplot(412)
subplot.scatter(measures_arr[:, 1], ypll_arr, color="#1f77b4")  # 1 = age
subplot.set_title("ypll vs. % population less than 18 years of age")

subplot = fig.add_subplot(413)
subplot.scatter(measures_arr[:, 10], ypll_arr, color="#1f77b4")  # 10 = income
subplot.set_title("ypll vs. median household income")

subplot = fig.add_subplot(414)
subplot.scatter(measures_arr[:, 12], ypll_arr, color="#1f77b4")  # 10 = income
subplot.set_title("ypll vs. Free lunch")

plt.savefig('four-scatters.png', format='png')

import ols

model = ols.ols(ypll_arr, measures_arr[:, 6], "YPLL RATE", ["% Diabetes"])
model.summary()
Example #53
0
        RH.append(float(RH1[t]))
        WS.append(float(WS1[t]))
        Sin.append(float(Sin1[t]))

Ta_m = np.ma.masked_values(Ta, -999.99, atol=0.09)
#X = ts_inter.interp_masked1d(Ta_m, 'cubic')  #{‘constant’, ‘linear’, ‘cubic’, quintic’}
#plt.plot(datenum,X)
#plt.plot(datenum,Ta_m, 'o')
#plt.show()

x = np.zeros((len(datenum), 4), dtype=np.float)
x[:, 0] = Ta_m
x[:, 1] = RH
x[:, 2] = WS
x[:, 3] = Sin
mymodel = ols.ols(x[:, 0], x[:, 2:], y_varnm='Ta', x_varnm=['WS', 'Sin'])
mymodel.p  # return coefficient p-values
mymodel.summary()  # print results
print
mymodel = ols.ols(x[:, 1], x[:, 2:], y_varnm='RH', x_varnm=['WS', 'Sin'])
mymodel.p  # return coefficient p-values
mymodel.summary()  # print results

##X = np.fft.fft(Ta_m)
##Y = np.zeros(len(Ta))
###Y[important frequencies] = X[important frequencies]
##plt.plot(datenum,X)
##plt.show()
##
##X1 = np.fft.fft(WS)
##Y1 = np.zeros(len(WS))
Example #54
0
	def printOlsSummary(self):
		m = ols.ols(self.getPrices(), self.getTimes(), "Price", ["Time"])
		m.summary()