def test_singular(self): N = 1000 K = 10 y = np.random.random((N, 1)) X = np.ones((N, K)) with self.assertRaises(Exception): ols(y, X)
def testinner(maxlen): nfft = [nextprod([2, 3], x) for x in np.array(h.shape) + maxlen - 1] hpre = prepareh(h, nfft) for xlen0 in range(maxlen): for ylen0 in range(maxlen): ylen, xlen = 1 + ylen0, 1 + xlen0 dirt = np.vstack([ np.hstack([ olsStep(x, hpre, [ystart, xstart], [ylen, xlen], nfft, h.shape) for xstart in range(0, x.shape[0], xlen) ]) for ystart in range(0, x.shape[1], ylen) ]) assert np.allclose(dirt, gold) dirt2 = ols(x, h, [ylen, xlen], nfft) assert np.allclose(dirt2, gold) dirt3 = ols(x, h, [ylen, xlen]) assert np.allclose(dirt3, gold) memx = np.lib.format.open_memmap('x.npy') memout = np.lib.format.open_memmap('out.npy', mode='w+', dtype=x.dtype, shape=x.shape) dirtmem = ols(memx, h, [ylen, xlen], out=memout) assert np.allclose(dirtmem, gold) del memout del memx dirtmem2 = np.load('out.npy') assert np.allclose(dirtmem2, gold)
def testPyFFTW_complex(): nx = 21 nh = 7 x = np.random.randint(-30, 30, size=(nx, nx)) + 1j * np.random.randint(-30, 30, size=(nx, nx)) h = np.random.randint(-20, 20, size=(nh, nh)) + 1j * np.random.randint(-20, 20, size=(nh, nh)) gold = fftconvolve(x, h, mode='same') y = ols(x, h, rfftn=np.fft.fftn, irfftn=np.fft.ifftn) # Establish baseline assert np.allclose(gold, y) # Verify PyFFTW import pyfftw.interfaces.numpy_fft as fftw # standard, 1 thread y2 = ols(x, h, rfftn=fftw.fftn, irfftn=fftw.ifftn) assert np.allclose(gold, y2) # 2 threads def fft(*args, **kwargs): return fftw.fftn(*args, **kwargs, threads=2) def ifft(*args, **kwargs): return fftw.ifftn(*args, **kwargs, threads=2) y3 = ols(x, h, rfftn=fft, irfftn=ifft) assert np.allclose(gold, y3)
def pickBreakpointV2(response, x1, predictor): #print int(min(predictor))*10, int(max(predictor)+1)*10, int(max(predictor) - min(predictor) + 1)/2 #bpChoices = geneBpChoices(min(predictor), max(predictor), 20) results = np.zeros((len(bpChoices)-1, 2)) print bpChoices for i in range(len(bpChoices)-1): print i x2star = (predictor - bpChoices[i]) * np.greater(predictor, bpChoices[i]) x1star = x1 * np.greater(predictor, bpChoices[i]) tempPredictor = np.array(zip(x1, x1star, predictor, x2star)) #fileLoc = filePath + 'temp.csv' #np.savetxt(fileLoc, tempPredictor, delimiter=',', fmt = '%s') #print tempPredictor tempmodel = ols.ols(response, tempPredictor,'y',['F1F2', 'F1F2star', 'dist', 'diststar']) results[i,0] = i #results[i,1] = tempmodel.sse results[i,1] = tempmodel.R2 optBP = int(results[np.argmax(results, axis = 0)[1],0]) print 'Optimal Index:', optBP print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, 1] #x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP]) #optPredictor = np.array(zip(predictor, x2star)) #optmodel = ols.ols(response, optPredictor,'y',['x1', 'x2']) x1star = x1 * np.greater(predictor, bpChoices[optBP]) x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP]) optPredictor = np.array(zip(x1, x1star, predictor, x2star)) optmodel = ols.ols(response, optPredictor,'y',['F1F2', 'F1F2star', 'dist', 'diststar']) #return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star print results, optmodel.b print optmodel.summary() return results
def separateRegression(response, predictor, sepData, bpChoices, predictorName): results = np.zeros((len(bpChoices)-1, 4)) for bpid in range(len(bpChoices)-1): print bpid responseLeft, responseRight, predictorLeft, predictorRight, dataleftIdx, datarightIdx = separateData(response, predictor, sepData, bpChoices[bpid]) #print np.mean(responseLeft), np.mean(responseRight) #print predictorLeft, predictorRight leftmodel = ols.ols(responseLeft, predictorLeft,'y',predictorName) rightmodel = ols.ols(responseRight, predictorRight,'y',predictorName) results[bpid,0] = bpid results[bpid,1] = leftmodel.R2adj results[bpid,2] = rightmodel.R2adj #results[bpid,3] = 1 - (leftmodel.e.var() + rightmodel.e.var())/(leftmodel.y.var() + rightmodel.y.var()) results[bpid,3] = calculateR2(leftmodel, rightmodel, np.mean(response)) #results[bpid,3] = (leftmodel.R2 + rightmodel.R2)/2 print results optBP = int(results[np.argmax(results, axis = 0)[-1],0]) responseLeft, responseRight, predictorLeft, predictorRight, dataleftIdx, datarightIdx = separateData(response, predictor, sepData, bpChoices[optBP]) leftmodel = ols.ols(responseLeft, predictorLeft,'y',predictorName) rightmodel = ols.ols(responseRight, predictorRight,'y',predictorName) #print calculateR2(leftmodel) #optmodel = ols.ols(response, optPredictor,'y',['F1F2', 'F1F2star', 'dist', 'diststar']) yhatL = np.dot(leftmodel.x, leftmodel.b) yhatR = np.dot(rightmodel.x, rightmodel.b) yhat = np.zeros(len(response)) for i in range(len(yhatL)): yhat[dataleftIdx[i]] = yhatL[i] for i in range(len(yhatR)): yhat[datarightIdx[i]] = yhatR[i] yhat = np.exp(yhat) fileLoc = filepath + 'separateR_model2_y_hat.csv' #np.savetxt(fileLoc, yhat, delimiter=',', fmt = '%s') print 'Optimal Index:', optBP print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, -1] #return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star #print results, optmodel.b print '----------------------------- left model -----------------------------' print leftmodel.summary() print '----------------------------- right model -----------------------------' print rightmodel.summary() print 'Optimal Index:', optBP print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, -1] print 'before bp, b0 =',leftmodel.b[0], ', b1 =', leftmodel.b[1], ', bd =', leftmodel.b[2], ', MSE =', leftmodel.sse print 'after bp, b0 =', rightmodel.b[0], ', b1 =', rightmodel.b[1], ', bd =', rightmodel.b[2], ', MSE =', rightmodel.sse #calpredictedvalue(predictor, bpChoices[optBP], zip(leftmodel.b, rightmodel.b), 'exp_inoutflow_model2B.csv') #calconfidenceinterval(predictorLeft, predictorRight, [leftmodel.sse, rightmodel.sse], response, predictor, bpChoices[optBP], zip(leftmodel.b, rightmodel.b), 'ci_model2B.csv') return results, yhat
def evaluateScore(self, chromosome): xMatrix = [] yVector = [] for metabolite in chromosome: knownProps = metabolite.props #blank, filler metabolites wont have a suspected scantime if 'SUSPECTED_SCANTIME' in knownProps: scanTime = knownProps['SUSPECTED_SCANTIME'] xMatrixRow = [] countThisMetabolite = True #those without the full set of properties wont be counted #note: we use this same mechanism for canceling out metabolites if it would be be beneficial to do so for prop in self.mlrPropCombo: if prop in knownProps: val = knownProps[prop] xMatrixRow.append(val) else: #return False countThisMetabolite = False #all properties extant and added to matrix row if countThisMetabolite == True: xMatrix.append(xMatrixRow) yVector.append(scanTime) try: m = ols(array(yVector), array(xMatrix)) except: return 0 rSquared = m.R2 return rSquared
def forward_stepwise(X, Y, feat_sel_num): train_size, feat_size = X.shape tX = np.mat(np.ones(train_size)).T feat_set = [] selected = 0 for i in range(feat_sel_num): tmp_idx = -1 min_mse = 1e10 tX = np.concatenate((tX, np.mat(np.zeros(train_size)).T), axis=1) for idx in range(feat_size): if idx in feat_set: continue else: tX[:, -1] = X[:, idx] Y_hat = np.matmul(tX, ols.ols(tX, Y)) error = Y-Y_hat mse = 1/test_size * np.dot(error.T, error)[0, 0] if mse<min_mse: tmp_idx = idx min_mse = mse tX[:, -1] = X[:, tmp_idx] feat_set.append(tmp_idx) return feat_set
def main(events): data = get_event_data(events) if len(data) == 0: sys.exit("ERROR: Unable to retrieve data from Mixpanel") matrix = event_data_to_matrix(data, events) response_name = events.pop(0) response_data = matrix[:,0] predictors_data = matrix[:,1:] predictors_names = events model = ols(response_data, predictors_data, response_name, predictors_names) model.summary() ## Generate Equation # Gather list of coefficients so we can build our formula coeff_dict = dict(zip(model.x_varnm, model.b)) equation = "%s = %.5f" % (response_name, coeff_dict['const']) # Start with constant coefficient for name in predictors_names: val = coeff_dict[name] equation += " + %.5f(%s)" % (val, name) print "Regression equation for response variable '%s'" % response_name print print equation
def mlr(data, dependent_key, independent_keys=None) : import ols import numpy as np if independent_keys == None: independent_keys = ['Age', # 'CapersJones', 'Popularity', 'C-based', 'OO', 'Compiled', 'DynamicTyping'] all_keys = [dependent_key] all_keys.extend(independent_keys) values = extract_vals(data, all_keys) y = values[:,0] x = values[:,1:] try : model = ols.ols(y, x, dependent_key, independent_keys) except Exception : print 'Encountered a LinAlgError!' print 'dumping values...' print '\ny values' print y print '\nx values' print x raise model.summary()
def evaluateScore(chromosome): xMatrix = [] yVector = [] for keggID, properties in metabolites.iteritems(): addThisItem = True xMatrixRow = [] for prop in chromosome: if prop in properties: xMatrixRow.append(properties[prop]) else: addThisItem = False break if addThisItem: measuredScanTime = properties["MEASURED_SCANTIME"] yVector.append(measuredScanTime) xMatrix.append(xMatrixRow) try: m = ols(array(yVector), array(xMatrix)) except: return 0 rSquared = m.R2 if rSquared < 0: #dunno why this is happening, but when it does just disregard the result rSquared = 0 if float(len(yVector)) / float(len(metabolites.values())) < 0.4: #disregard this combination if less than half the metabolites contain the needed properties rSquared = 0 return rSquared
def best_subset(trainX, trainY, idx, comb, label): global min_mpe, best_set # global trainX, trainY global testX, testY global train_size, test_size global beta, test_error if idx == comb.size: # print(comb) sub_trainX = np.concatenate((np.mat(np.ones(train_size)).T, trainX[:, comb]), axis=1) beta_hat = ols.ols(sub_trainX, trainY) mpe = ols.eval_mpe(sub_trainX, trainY, beta_hat) if mpe < min_mpe: min_mpe = mpe best_set = copy.deepcopy(comb) beta = copy.deepcopy(beta_hat) # testY_hat = np.matmul(np.concatenate((np.mat(np.ones(test_size)).T, testX[:, comb]), axis=1), beta_hat) # error = testY - testY_hat # test_error = 1/test_size * np.dot(error.T, error)[0, 0] else: for i in range(0 if idx==0 else comb[idx-1], label.size): if label[i] == 0: comb[idx] = i label[i] = 1 best_subset(trainX, trainY, idx+1, comb, label) label[i] = 0
def testInner(nx, nh): x = np.random.randint(-30, 30, size=nx) + 1.0 h = np.random.randint(-20, 20, size=nh) + 1.0 gold = fftconvolve(x, h, mode='same') for size in [2, 3]: dirt = ols(x, h, [size]) assert np.allclose(gold, dirt)
def test_strong_regression(self): seed = 1234567890 np.random.seed(seed) # Due to the strong linearity of the regressions, # RuntimeWarnings may be raised when computing some # of the regression statistics. We can ignore these. with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(10): coeff = np.random.random_integers(1, 100) inter = np.random.random_integers(1, 100) x = np.random.rand(10) y = np.array([coeff * i + inter for i in x]) reg = ols(x, y) expected = np.array([inter, coeff]) diff = abs(expected - reg.b) self.assertTrue(np.all(diff < EPSILON)) self.assertTrue(np.all(reg.p < EPSILON)) self.assertTrue(np.all(abs(reg.t) > MAXINT))
def getCurrentRSquared(self): try: m = ols(array(self.yVector), array(self.xMatrix)) except: return 0 rSquared = m.R2 return rSquared
def regression(): """ Multiple varibale regression takes place """ x, y = variables() cachemodel = ols.ols(y, x, "Hit", ["timeper", "advance"]) cachemodel.p cachemodel.summary()
def simpleRegression(response, predictor, predictorName): model = ols.ols(response, predictor,'y',predictorName) #model = ols.ols(response, predictor) print model.summary() print model.b print predictor #print 'b0 =',model.b[0], ', b1 =', model.b[1], ', bd =', model.b[2], ', MSE =', model.sse return np.dot(model.x, model.b)
def tryMLR(self): try: self.m = ols(array(self.yVector), array(self.xMatrix)) #, y_varnm = 'y', x_varnm = ['x1','x2','x3','x4','x5','x6','x7']) #self.m.summary() return True except: #print("error") return False
def getTrend(t, y, mask = None): """ Find the trend in a time-series y With times vector t """ if mask: t = applyMask1D(t, mask) y = applyMask1D(y, mask) assert(t.shape[0] == y.shape[0]) mymodel = ols.ols(y,t,'y',['t']) return mymodel.b # return coefficients
def multiple_regression(y_array, x_array, round_to=5): # Where the x-values is a list of multiple x-coefficients y = np.array(y_array) x = np.transpose(np.array(x_array)) model = ols.ols(y, x, 'y', ['x' + str(i+1) for i,x in enumerate(x_array)]) model.summary() for i,b in enumerate(model.b): print "b" + str(i) + " = " + str(round(b,round_to)) return [round(b, round_to) for b in model.b]
def calccoeff(grndata, reddata) : xGrn=grndata[:,1] yGrn=grndata[:,2] xRed=reddata[:,1] yRed=reddata[:,2] # Part I: Calculating Grn to Red transformation coefficients # The function chosen is a third order polynomial function Grn=c_[xGrn, xGrn**2, xGrn**3, yGrn, yGrn**2, yGrn**3] mymodel = ols.ols(xRed,Grn,y_varnm='xRed',x_varnm=['x','x^2','x^3','y','y^2','y^3']) coeffGtoR_x = mymodel.b mymodel.summary() mymodel = ols.ols(yRed,Grn,y_varnm='xRed',x_varnm=['x','x^2','x^3','y','y^2','y^3']) coeffGtoR_y = mymodel.b mymodel.summary() coeffGtoR = hstack((coeffGtoR_x,coeffGtoR_y)) GrnPlus=c_[repeat(1,Grn.shape[0]), Grn] errGtoR=sqrt((dot(GrnPlus,coeffGtoR_x)-xRed)**2+(dot(GrnPlus,coeffGtoR_y)-yRed)**2) # Part II: Calculating Red to Grn transformation coefficients # The function chosen is a third order polynomial function Red=c_[xRed, xRed**2, xRed**3, yRed, yRed**2, yRed**3] mymodel = ols.ols(xGrn, Red, y_varnm='Grn',x_varnm=['x','x^2','x^3','y','y^2','y^3']) coeffRtoG_x = mymodel.b mymodel.summary() mymodel = ols.ols(yGrn, Red, y_varnm='Grn',x_varnm=['x','x^2','x^3','y','y^2','y^3']) coeffRtoG_y = mymodel.b mymodel.summary() coeffRtoG = hstack((coeffRtoG_x,coeffRtoG_y)) RedPlus=c_[repeat(1,Red.shape[0]), Red] errRtoG=sqrt((dot(RedPlus,coeffRtoG_x)-xGrn)**2+(dot(RedPlus,coeffRtoG_y)-yGrn)**2) return (coeffGtoR, coeffRtoG, errGtoR, errRtoG)
def pickBreakpoint(response, predictor, bpvarible, bpChoices, predictorName): results = np.zeros((len(bpChoices) - 1, 2)) print bpChoices bpvarible = np.array(bpvarible) predictor = np.array(predictor) predictor_t = np.transpose(predictor) predictorName = np.append(predictorName, "bpvarible") for i in range(len(bpChoices) - 1): x2star = (bpvarible - bpChoices[i]) * np.greater(bpvarible, bpChoices[i]) tempPredictor = np.append(predictor_t, x2star) tempPredictor.shape = (len(predictorName), -1) tempPredictor = np.transpose(tempPredictor) # print tempPredictor tempmodel = ols.ols(response, tempPredictor, "y", predictorName) results[i, 0] = i results[i, 1] = tempmodel.R2 # print results optBP = int(results[np.argmax(results, axis=0)[1], 0]) print "Optimal Index:", optBP print "Optimal changepoint: ", bpChoices[optBP], " exp value: ", np.exp(bpChoices[optBP]), " with R2 = ", results[ optBP, 1 ] x2star = (bpvarible - bpChoices[optBP]) * np.greater(bpvarible, bpChoices[optBP]) tempPredictor = np.append(predictor_t, x2star) tempPredictor.shape = (len(predictorName), -1) optPredictor = np.transpose(tempPredictor) optmodel = ols.ols(response, optPredictor, "y", predictorName) # optmodel = ols.ols(response, optPredictor,'y',predictorName) y_hat = np.dot(optmodel.x, optmodel.b) # fileLoc = filepath + 'pieceR_model2_y_hat.csv' # np.savetxt(fileLoc, y_hat, delimiter=',', fmt = '%s') print optmodel.summary() print "MSE =", optmodel.sse # print 'before bp, b0 =',optmodel.b[0], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2] # print 'after bp, b0 =', optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2] + optmodel.b[3] # calpredictedvalue(zip(x1, predictor), bpChoices[optBP], zip(optmodel.b, [optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], optmodel.b[1], optmodel.b[2] + optmodel.b[3]]), 'exp_inoutflow_model2A.csv') return results, y_hat, optmodel, bpChoices[optBP]
def test_str_object(self): seed = 1234567890 np.random.seed(seed) x = np.random.rand(10) y = np.random.rand(10) reg = ols(x, y) expected = "OLS Regression on 10 Observations" self.assertTrue(str(reg) == expected, "Strings don't match")
def pickBreakpoint(response, x1, predictor): #print int(min(predictor))*10, int(max(predictor)+1)*10, int(max(predictor) - min(predictor) + 1)/2 #bpChoices = geneBpChoices(min(predictor), max(predictor), 20) results = np.zeros((len(bpChoices)-1, 2)) print bpChoices predictor = np.array(predictor) for i in range(len(bpChoices)-1): #print i #print type((predictor - bpChoices[i])) x2star = (predictor - bpChoices[i]) * np.greater(predictor, bpChoices[i]) tempPredictor = np.array(zip(x1, predictor, x2star)) #fileLoc = filePath + 'temp.csv' #np.savetxt(fileLoc, tempPredictor, delimiter=',', fmt = '%s') tempmodel = ols.ols(response, tempPredictor,'y',['F1F2', 'dist', 'diststar']) results[i,0] = i #results[i,1] = tempmodel.sse results[i,1] = tempmodel.R2 print results optBP = int(results[np.argmax(results, axis = 0)[1],0]) print 'Optimal Index:', optBP print 'Optimal changepoint: ', bpChoices[optBP], ' exp value: ', np.exp(bpChoices[optBP]), ' with R2 = ', results[optBP, 1] #x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP]) #optPredictor = np.array(zip(predictor, x2star)) #optmodel = ols.ols(response, optPredictor,'y',['x1', 'x2']) x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP]) optPredictor = np.array(zip(x1, predictor, x2star)) optmodel = ols.ols(response, optPredictor,'y',['F1F2', 'dist', 'diststar']) #print optmodel.b #return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star y_hat = np.dot(optmodel.x, optmodel.b) fileLoc = filepath + 'pieceR_model2_y_hat.csv' #np.savetxt(fileLoc, y_hat, delimiter=',', fmt = '%s') print optmodel.summary() print 'MSE =', optmodel.sse print 'before bp, b0 =',optmodel.b[0], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2] print 'after bp, b0 =', optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], ', b1 =', optmodel.b[1], ', bd =', optmodel.b[2] + optmodel.b[3] calpredictedvalue(zip(x1, predictor), bpChoices[optBP], zip(optmodel.b, [optmodel.b[0] - optmodel.b[3] * bpChoices[optBP], optmodel.b[1], optmodel.b[2] + optmodel.b[3]]), 'exp_inoutflow_model2A.csv') return results, y_hat
def pickBreakpoint(response, predictor): bpChoices = np.sort(predictor) results = np.zeros((len(predictor)-1, 2)) for i in range(len(predictor)-1): x2star = (predictor - bpChoices[i]) * np.greater(predictor, bpChoices[i]) tempPredictor = np.array(zip(predictor, x2star)) tempmodel = ols.ols(response, tempPredictor,'y',['x1', 'x2']) results[i,0] = i results[i,1] = tempmodel.sse optBP = int(results[np.argmin(results, axis = 0)[1],0]) print optBP, 'Optimal changepoint: ', bpChoices[optBP], ' with SSE = ', results[optBP, 1] x2star = (predictor - bpChoices[optBP]) * np.greater(predictor, bpChoices[optBP]) optPredictor = np.array(zip(predictor, x2star)) optmodel = ols.ols(response, optPredictor,'y',['x1', 'x2']) return bpChoices[optBP], results, optmodel, optmodel.b[0]+optmodel.b[1]*predictor+optmodel.b[2]*x2star
def regression(): """ Multiple varibale regression takes place """ x, y = variables() print y print x cachemodel = ols.ols(y,x,'Hit',['advanceperiod']) print cachemodel.p cachemodel.summary()
def regress_basic(db, tech_metric, pad_zeros=True, max_year=2012): datasets = get_basic_stats(db, tech_metric, pad_zeros=pad_zeros, max_year=max_year) GDP_8006 = scipy.array(datasets[0]) independent_variables = scipy.array(datasets[1:]) independent_variables = scipy.transpose(independent_variables) names = get_names(tech_metric) model = ols.ols(GDP_8006,independent_variables, 'GDP_8006',names) print "# Countries: %d" % len(db.select_countries_to_use()) model.summary()
def testReflect(): nx = 21 nh = 7 x = np.random.randint(-30, 30, size=(nx, nx)) + 1.0 h = np.random.randint(-20, 20, size=(nh, nh)) + 1.0 y = ols(x, h) gold = fftconvolve(x, h, mode='same') assert np.allclose(gold, y) px, py = 24, 28 x0pad = np.pad(x, [(py, py), (px, px)], mode='constant') y0pad = ols(x0pad, h) assert np.allclose(y, y0pad[py:py + nx, px:px + nx]) xpadRef = np.pad(x, [(py, py), (px, px)], mode='reflect') ypadRef = ols(xpadRef, h) for sizex in [2, 3, 4, 7, 8, 9, 10]: for sizey in [2, 3, 4, 7, 8, 9, 10]: yRef = ols(x, h, [sizey, sizex], mode='reflect') assert np.allclose(yRef, ypadRef[py:py + nx, px:px + nx])
def test_no_x_names_multi_x(self): seed = 1234567890 np.random.seed(seed) x = np.random.rand(10, 2) y = np.random.rand(10) reg = ols(x, y) expectedNames = ['const', 'x1', 'x2'] self.assertEqual(reg.x_varnm, expectedNames)
def regressionAnalysis(percentRedds, varArray, nameArray, siteRange=range(0, 3)): ''' * percentRedds -> 2D array of sites vs. percent of redds constructed at site. Ex: [ [0.2,0.4,0.1,...], [0.1,0.6,0.2], ... ] * varArray -> 2D array of types of variables vs. 2D array of sites vs. data of variable at site. Ex: [ [ [13,26,...], ... ], .... ] * nameArray -> Array of names of varaibles defiend in `varArray` Ex: * siteRange -> Range of what sites to use Ex: range(0,2) or [2,3] ''' # constructed redd percentage ([]->asarray) y = [asarray(arr) for arr in percentRedds] # empty list of "n-dimensions" [var1,var2,var3,...] nDim = len(varArray) x = [empty([len(arr), nDim]) for arr in y] # #use the total range of sites # if siteRange ="all": # siteRange = range(0,len(percentRedds)) # #use up to max site number # else: # siteRange = range(0,siteRange) #perform the regression for all sites available for i in siteRange: j = 0 # get just the variables for site `i` from `varArray` tempVarArray = [] for vars in varArray: # save variable array for specific site `i` tempVarArray += [vars[i]] # create zipped array of variables in site `i` zipVarArray = zip(*tempVarArray) # iterate over each year in site for varTuple in zipVarArray: #zipVarArray -> zip(var1[i],var2[i],...) # convert tuple of vars to an array of vars xTemp = [var for var in varTuple] x[i][j] = xTemp j += 1 # use `ols` build for linear regression # possibly better to do different regression, like logistic? model = ols.ols(y[i], x[i], 'y:Precent of Redds', nameArray) # return coefficient p-values names = '[coeff,' + ','.join(nameArray) + ']' print names + ':\n', model.p # print results print model.summary()
def fix_badsnow(snow,mask): """ find regions marked "bad" and fill them in using a linear regression """ # from numpy.linalg import lstsq from ols import ols # find times when all of the snow data are "good" # allgood=where(np.min(snow,axis=1) >= 0)[0] # loop over all snow columns fixing bad data for i in range(len(snow[0,:])): # find bad data bad=where(mask[:,i] == 0)[0] # if there is any bad data fix it if len(bad) >0: # cursnow is data to be fixed cursnow=snow[:,i] # othersnow is data to use to fix it othersnow=np.delete(snow,i,1) othermask=np.delete(mask,i,1) for j in range(len(bad)): otherbadsnows=where(othermask[bad[j],:]==0)[0] if len(otherbadsnows)<len(othersnow[0,:]): if len(otherbadsnows)>0: useothersnow=np.delete(othersnow,otherbadsnows,1) useothermask=np.delete(othermask,otherbadsnows,1) else: useothersnow=othersnow useothermask=othermask # print("available data="+str(useothersnow.shape)) # find times when all of the other snow data are "good" allgood=where((np.min(useothermask,axis=1) > 0) & (cursnow>0))[0] # print("good data points="+str(len(allgood))) # perform linear regression on the remaining data using the last and next 10 days of data usepoints= (24*4*10l) if len(allgood)>24: nearestpoints=where(np.abs(allgood-bad[j])<usepoints)[0] if len(nearestpoints)<400: nearestpoints=where(np.abs(allgood-bad[j])<usepoints*5)[0] # print("using more points") if len(nearestpoints)>=500: line=ols(cursnow[allgood[nearestpoints]],useothersnow[allgood[nearestpoints],:]) snow[bad[j],i]=line.b[0]+np.sum(useothersnow[bad[j],:]*line.b[1:]) # print("Enough points: Station "+str(i)+" time: "+str(bad[j])) else: # print("Station "+str(i)+" time: "+str(bad[j])+" oldval:"+str(snow[bad[j],i])+" Newval:"+str(snow[bad[j]-1,i])) snow[bad[j],i]=snow[bad[j]-1,i] else: snow[bad[j],i]=snow[bad[j]-1,i]
def regressionAnalysis( percentRedds, varArray, nameArray, siteRange=range(0,3) ): ''' * percentRedds -> 2D array of sites vs. percent of redds constructed at site. Ex: [ [0.2,0.4,0.1,...], [0.1,0.6,0.2], ... ] * varArray -> 2D array of types of variables vs. 2D array of sites vs. data of variable at site. Ex: [ [ [13,26,...], ... ], .... ] * nameArray -> Array of names of varaibles defiend in `varArray` Ex: * siteRange -> Range of what sites to use Ex: range(0,2) or [2,3] ''' # constructed redd percentage ([]->asarray) y = [ asarray(arr) for arr in percentRedds ] # empty list of "n-dimensions" [var1,var2,var3,...] nDim = len(varArray) x = [ empty( [len(arr),nDim] ) for arr in y ] # #use the total range of sites # if siteRange ="all": # siteRange = range(0,len(percentRedds)) # #use up to max site number # else: # siteRange = range(0,siteRange) #perform the regression for all sites available for i in siteRange: j = 0 # get just the variables for site `i` from `varArray` tempVarArray = [] for vars in varArray: # save variable array for specific site `i` tempVarArray += [vars[i]] # create zipped array of variables in site `i` zipVarArray = zip(*tempVarArray) # iterate over each year in site for varTuple in zipVarArray: #zipVarArray -> zip(var1[i],var2[i],...) # convert tuple of vars to an array of vars xTemp = [ var for var in varTuple ] x[i][j] = xTemp j += 1 # use `ols` build for linear regression # possibly better to do different regression, like logistic? model = ols.ols(y[i],x[i],'y:Precent of Redds',nameArray) # return coefficient p-values names = '[coeff,' + ','.join(nameArray) + ']' print names+':\n', model.p # print results print model.summary()
def calc(smc,ts,plot=None,force=None): n=smc.size dummy=np.arange(n) smc_init=ols(smc,dummy) smctest=smc-(smc_init.b[0]+smc_init.b[1]*dummy) ts_init=ols(ts,dummy) tstest=ts-(ts_init.b[0]+ts_init.b[1]*dummy) smc_ts=ols(smctest,tstest) if plot!=None: plt.clf() plt.plot(tstest/1000) plt.plot(smctest) if force==None: plt.plot(smctest-(tstest*smc_ts.b[1])) else: plt.plot(smctest-(tstest*force)) print(smc_ts.R2) return smc_ts.b
def build_model(y_gen, x_gen, results): series = make_regression_series(y_gen, x_gen, results) y,x = make_x_y(series) m = ols.ols(y,x,"returns", ["Days out", "Days out * VIX", "Days out * Vix^2", "Days out * Prev Period", "Days out^2", "Wingspan", "Wingspan^2", "Body Spread", "Body Spread^2", "Body Spread * Lag", "Days out * Body Spread", "Wingspan * Days Out", "VIX", "VIX^2", "VIX 12-1 Month Growth", "VIX 36-12 month growth", "Is Earnings?", "Is December?", "Expiration Year (CONTROL)"]) m.summary() return m
def chow(X,Y, X1, Y1, X2, Y2, alpha = 0.05): """ Performs a chow test. Split input matrix and output vector into two using specified breakpoint. X - independent variables matrix Y - dependent variable vector breakpoint -index to split. alpha is significance level """ if isinstance(X[0],int) or isinstance(X[0], float): k = 1 else: k = len(X[0]) k = k + 1 n = len(X) # Perform separate three least squares. allfit = ols.ols(Y,X) lowerfit = ols.ols(Y1, X1) upperfit = ols.ols(Y2, X2) rss = allfit.rss rss1 = lowerfit.rss rss2 = upperfit.rss df1 = k df2 = n - 2 *k rss_u = (rss1 + rss2) num = (rss - rss_u) /float(df1) den = rss_u / df2 chow_ratio = num/den Fcrit = scipy.stats.f.ppf(1 -0.05, df1, df2) p = scipy.stats.f.pdf(chow_ratio, df1, df2) return (chow_ratio, p, Fcrit, df1, df2, rss, rss1, rss2)
def backward_stepwise(X, Y, feat_sel_num): sample_size, feat_size = X.shape idx_set = [i for i in range(feat_size)] while len(idx_set) > feat_sel_num: tX = np.concatenate((np.mat(np.ones(sample_size)).T, X[:, idx_set]), axis=1) beta_hat = ols.ols(tX, Y) beta_std_dev = ols.std_dev(tX, 1) Z_score = np.true_divide(np.array(beta_hat), np.array(beta_std_dev)).tolist() print(Z_score) print() idx = Z_score.index(min(Z_score)) - 1 del idx_set[idx] return idx_set
def test_weak_regression(self): seed = 1234567890 np.random.seed(seed) alpha = 0.1 tStatMax = 1 for i in range(10): x = np.random.random_integers(100, 200, 10) y = np.array([i * (-1)**index for index, i in enumerate(x)]) reg = ols(x, y) self.assertTrue(np.all(reg.p > alpha)) self.assertTrue(np.all(abs(reg.t) < tStatMax))
def build_model(y_gen, x_gen, results): series = make_regression_series(y_gen, x_gen, results) y, x = make_x_y(series) m = ols.ols(y, x, "returns", [ "Days out", "Days out * VIX", "Days out * Vix^2", "Days out * Prev Period", "Days out^2", "Wingspan", "Wingspan^2", "Body Spread", "Body Spread^2", "Body Spread * Lag", "Days out * Body Spread", "Wingspan * Days Out", "VIX", "VIX^2", "VIX 12-1 Month Growth", "VIX 36-12 month growth", "Is Earnings?", "Is December?", "Expiration Year (CONTROL)" ]) m.summary() return m
def test_beta(self): # Generate fake data K = 10 N = 100000 mu = 5 sigma = 5 beta = np.random.randint(0, 5, (K, 1)) X = np.random.normal(mu, sigma, (N, K)) e = np.random.normal(0, 1, (N, 1)) y = X @ beta + e # Find test beta_hat is close to beta tol = .01 beta_hat, sigma_hat = ols(y, X) abs_diff = np.all(abs(beta - beta_hat) < .01) self.assertTrue(abs_diff)
def generate_profile(self,games): x = [] y = [] for g in games: for p in g.drives: if p.team == self.team_name: for play in p.plays: if play.down != 0: decision = [play.down,play.yards_togo,50-int(str(play.yardline)),self.determine_play_type(play.desc)] if decision[3]: x.append(decision[:3]) y.append(decision[3]) y = numpy.array(y) x = numpy.array(x) mymodel = ols.ols(y,x,'Play Call',['down','togo','distance']) return map(str,[mymodel.b[0],mymodel.b[1],mymodel.b[2],mymodel.b[3]])
def __init__(self, calcres, *names): if len(names) == 1: if isinstance(names[0], str): names = names[0].split(' ') elif isinstance(names[0], list): names = names[0] else: assert False, 'Unknown argument type' assert len(names) >= 2, 'Must have at least one X variable and one Y variable' Xnames = names[0:-1] Yname = names[-1] print 'FITTING' + str(names) # grab X and Y from calcres X = numpy.ma.masked_all((len(calcres.universe), len(calcres.dates), len(Xnames))) for i in range(len(Xnames)): try: X[:,:,i] = calcres.V[:,:,calcres.names_index[Xnames[i]]].copy() except KeyError: pass Y = numpy.ma.masked_all((len(calcres.universe), len(calcres.dates))) try: Y = calcres.V[:, :, calcres.names_index[Yname]].copy() except KeyError: pass # print counts for i in range(len(Xnames)): print 'X' + str(i), Xnames[i], 'count:', X[:, :, i].count() print 'Y ', Yname, 'count:', Y.count() mask = Y.mask for i in range(len(Xnames)): mask = mask | X[:, :, i].squeeze().mask Y.mask = mask Xc = numpy.zeros([numpy.sum(mask==False), len(Xnames)]) #X.mask = numpy.tile(mask, (1, len(Xnames))) for i in range(len(Xnames)): X[:, :, i].mask = mask Xc[:, i] = X[:, :, i].compressed().squeeze() Yc = Y.compressed() try: self.m = ols(Yc, Xc, Yname, Xnames) self.summary() except: pass self.calcres = calcres self.X = X self.Y = Y self.Xnames = Xnames
def printSummary(self): self.m = ols(array(self.yVector), array(self.xMatrix)) b = self.m.b summary = {} print("") print("summary of all ambiguous metabolites:") for ambiguityID, ambiguityProps in self.ambiguities.iteritems(): metabolites = ambiguityProps["candidates"] for keggID, knownProps in metabolites.iteritems(): addToSummary = True scanTime = knownProps['SUSPECTED_SCANTIME'] lookedUpPropArr = [] for prop in self.mlrPropCombo: if prop in knownProps: val = knownProps[prop] lookedUpPropArr.append(val) else: addToSummary = False break if addToSummary: yPred = b[0] for propIndex in range(0, len(lookedUpPropArr)): propVal = lookedUpPropArr[propIndex] propCoefficient = b[propIndex + 1] yPred += propCoefficient * propVal metaboliteSummary = { "scan": scanTime, "prediction": yPred, "error": (math.fabs(scanTime - yPred) / scanTime) } if keggID in chosenKeggIDs and chosenKeggIDs[keggID] == scanTime: metaboliteSummary["chosen"] = True summaryKey = "ambiguity" + str(ambiguityID) if summaryKey not in summary: summary[summaryKey] = [] summary[summaryKey].append(metaboliteSummary) pp = pprint.PrettyPrinter() pp.pprint(summary)
def fix_badsmc(smc): # from numpy.linalg import lstsq from ols import ols smc_standard=smc.copy() # find times when all of the smc data are "good" allgood=where(np.min(smc,axis=1) >= 0)[0] while len(allgood)<(24*4*15): sumall=np.sum(smc_standard,axis=0) worse=np.min(sumall) betterprobes=np.where(sumall>sumall[worse]) if len(betterprobes[0])<2: return smc_standard=smc_standard[:,betterprobes] allgood=where(np.min(smc,axis=1) >= 0)[0] # loop over all smc columns fixing bad data for i in range(len(smc[0,:])): # find bad data bad=where(smc[:,i] < 0)[0] # if there is any bad data fix it if len(bad) >0: print(len(allgood)) # cursmc is data to be fixed cursmc=smc[:,i] # othersmc is data to use to fix it othersmc=np.delete(smc,i,1) # figure out which columns have bad data that overlap with cursmc's bad data otherbadsmcs=where(np.min(othersmc[bad,:], axis=0) < 0)[0] # if there are columns with bad data in this period, remove them if len(otherbadsmcs)>0: othersmc=np.delete(othersmc,otherbadsmcs,1) # perform linear regression on the remaining data using only the last 15days? of data # usepoints= (-1*24*4*15) usepoints=0 if len(allgood)>50: print(len(allgood)) line=ols(cursmc[allgood[usepoints:]],othersmc[allgood[usepoints:],:]) # start with the offset smc[bad,i]=line.b[0] # loop over for thatsmc in range(len(othersmc[0,:])): smc[bad,i]+=othersmc[bad,thatsmc]*line.b[thatsmc+1]
def asian_path_control(K): T=1.; J=252; dt=T/J mu=0.1; sigma=0.3; r=0.05 s0=100. sim = 500 level = 0.05 path = [s0 * np.exp(np.linspace(dt,T,J)*(r-sigma**2/2) + sigma*brownian_path(T,J)) for i in range(sim)] pricey = np.zeros(sim) pricex = np.zeros(sim) ii = 0 for s in path: pricey[ii]=np.exp(-r*T)*np.maximum(0,np.mean(s)-K) pricex[ii]=np.exp(-r*T)*np.maximum(0,s[-1]-K) ii += 1 m = ols(pricey,pricex) print m.b print m.R2 sim = 9500 path = [s0 * np.exp(np.linspace(dt,T,J)*(r-sigma**2/2) + sigma*brownian_path(T,J)) for i in range(sim)] pricey = np.zeros(sim) pricex = np.zeros(sim) price = np.zeros(sim) ii = 0 z1 = (np.log(s0/K)+(r+0.5*sigma**2)*T)/(sigma*np.sqrt(T)) z2 = z1-sigma*np.sqrt(T) x_mean = s0*sp.stats.norm.cdf(z1) - np.exp(-r*T)*K*sp.stats.norm.cdf(z2) for s in path: pricey[ii]=np.exp(-r*T)*np.maximum(0,np.mean(s)-K) pricex[ii]=np.exp(-r*T)*np.maximum(0,s[-1]-K) price[ii] = pricey[ii]-m.b[-1]*(pricex[ii]-x_mean) ii += 1 plt.figure(2) plt.hist(price) results = np.sort(price) ci = (results[int(sim * (1 - level))], results[int(sim * level)]) return np.mean(price),np.std(price),ci
def differentiated_regression(db, tech_metric, pad_zeros=True, max_year=2012): # We also divided the sample into developed and develop- # ing economies (the latter including both middle-income and low-income # countries according to the World Bank country classifications), # created dummy variables, and generated the new variables TELEPENH and # TELEPENL (the product of the dummy variables and the telecommunications # penetration variables) datasets = get_basic_stats(db, tech_metric, pad_zeros=pad_zeros, max_year=max_year) high_income = select_classification(db, "high") datasets.append(high_income) # TODO(cs): tried adding in low income too, but that totally throws off the # results GDP_8006 = scipy.array(datasets[0]) independent_variables = scipy.array(datasets[1:]) independent_variables = scipy.transpose(independent_variables) names = get_names(tech_metric) model = ols.ols(GDP_8006,independent_variables, 'GDP_8006',names) print "# Countries: %d" % len(db.select_countries_to_use()) model.summary()
def removeHighErrorCandidates(self): self.m = ols(array(self.yVector), array(self.xMatrix)) quantity = len(self.inUseCandidates) i = 0 while i < quantity: candidate = self.inUseCandidates[i] b = self.m.b validCandidate = True props = candidate.props scanID = props["SUSPECTED_SCANTIME"] lookedUpPropArr = [] for prop in self.mlrPropCombo: if prop in props: val = props[prop] lookedUpPropArr.append(val) else: validCandidate = False del self.inUseCandidates[i] del self.yVector[i] del self.xMatrix[i] quantity -= 1 i -= 1 break if validCandidate: predScanID = b[0] for propIndex in range(0, len(lookedUpPropArr)): propVal = lookedUpPropArr[propIndex] propCoefficient = b[propIndex + 1] predScanID += propCoefficient * propVal errorPct = fabs(predScanID - scanID) / float(scanID) if errorPct > self.maxScanIDPredictionError: del self.inUseCandidates[i] del self.yVector[i] del self.xMatrix[i] quantity -= 1 i -= 1 i += 1
def main(): r = re.compile(r'\s{1,}') data = [] label = [] for line in sys.stdin: d = map(float, r.split(line.rstrip())) if not label: for i in range(len(d) - 1): label.append('x%d' % (i + 1)) elif 1 < len(d) and len(label) != len(d) - 1: raise BaseException data.append(d) ary = numpy.array(data) y = ary[:,0] #x = ary[:,1:3] #x = ary[:,1:] #x = ary[:,1:6] #x = ary[:,1:4] x = ary[:,1:] model = ols.ols(y, x, 'y', label) model.summary()
def findSol(self, windowSize): y = np.array(self.prices) x = np.vstack([self.ema, self.rsi, self.macd]).T mymodel = ols.ols(y, x, 'price', ['EMA', 'RSI', 'MACD']) labels = ['constant', 'EMA', 'RSI', 'MACD'] equation = "price(t+1) = " stats = {} for i in range(len(mymodel.b)): l = labels[i] c = mymodel.b[i] stats[l] = c if l == 'constant': equation += str(c) + " + " else: equation += str(c) + "*" + l + "(t) + " equation = equation[:-2] return stats
def findSol(self, windowSize): prices, ema, rsi, macd = self.tf.getTimewindow(windowSize) y = np.array(prices) x = np.vstack([ema]).T mymodel = ols.ols(y, x, 'price', ['EMA']) labels = ['constant', 'EMA'] equation = "price(t+1) = " stats = {} for i in range(len(mymodel.b)): l = labels[i] c = mymodel.b[i] stats[l] = c if l == 'constant': equation += str(c) + " + " else: equation += str(c) + "*" + l + "(t) + " equation = equation[:-2] return stats
Cl1 = numpy.append(Cl1, DiffLnAnnMeansCl1) Cl4 = numpy.append(Cl4, DiffLnAnnMeansCl4) Cl5 = numpy.append(Cl5, DiffLnAnnMeansCl5) Cl1prev = numpy.append(Cl1prev, DiffLnAnnMeansCl1prev) Cl4prev = numpy.append(Cl4prev, DiffLnAnnMeansCl4prev) Cl5prev = numpy.append(Cl5prev, DiffLnAnnMeansCl5prev) UnitsVec = numpy.repeat(1, numpy.size(BAprev)) import statsmodels.api as sm import ols X = pandas.DataFrame({ 'BA': BA, 'BAprev': BAprev, 'BAprev2': BAprev2, 'BAprev3': BAprev3, 'BAnext': BAnext, 'Cl1prev': Cl1prev, 'Cl4prev': Cl4prev, 'Cl5prev': Cl5prev, 'Cl1': Cl1, 'Cl4': Cl4, 'Cl5': Cl5, }) reg = ols.ols(formula='BAnext ~ BAprev', data=X).fit() print(reg.summary()) plt.scatter(BAnext, BAprev)
beta_list = [] sigma_list = [] mu_list = [] proportion_list = [] for s in range(nsim): mu_draw = 0 sigma_draw = 1 beta_draw = np.random.random((nParams, 1)) #Generate different Ys E = np.random.normal(mu_draw, sigma_draw, nObs).reshape(nObs, 1) Y = (XX @ beta_draw).reshape(nObs, 1) + E #Estimate betas and sigmas: beta, se, vcv = ols.ols(Y, XX) beta_hat_draw = np.random.multivariate_normal(beta.reshape(nParams), vcv, (nsim, 1)) proportion = np.mean( beta_draw.reshape(1, nParams) > beta_hat_draw.reshape(nsim, nParams), 0) proportion_list.append(proportion) stuff = np.array(proportion_list) plt.hist(stuff[:, 0])
# <codecell> #!python import ols # <markdowncell> # After importing the class you can estimate a model by passing data to it # as follows # # <codecell> #!python mymodel = ols.ols(y,x,y_varnm,x_varnm) # <markdowncell> # where y is an array with data for the dependent variable, x contains the # independent variables, y\_varnm, is a string with the variable label for # the dependent variable, and x\_varnm is a list of variable labels for # the independent variables. Note: An intercept term and variable label is # automatically added to the model. # # Example Usage # ------------- # # <codecell>
ypll_arr, measures_arr = get_arrs(dependent_cols, independent_cols) print ypll_arr.shape print measures_arr[:, 1].shape import matplotlib.pyplot as plt fig = plt.figure(figsize=(6, 10)) subplot = fig.add_subplot(411) subplot.scatter(measures_arr[:, 6], ypll_arr) subplot.set_title("ypll vs. % of population with diabetes") subplot = fig.add_subplot(412) subplot.scatter(measures_arr[:, 1], ypll_arr, color="#1f77b4") # 1 = age subplot.set_title("ypll vs. % population less than 18 years of age") subplot = fig.add_subplot(413) subplot.scatter(measures_arr[:, 10], ypll_arr, color="#1f77b4") # 10 = income subplot.set_title("ypll vs. median household income") subplot = fig.add_subplot(414) subplot.scatter(measures_arr[:, 12], ypll_arr, color="#1f77b4") # 10 = income subplot.set_title("ypll vs. Free lunch") plt.savefig('four-scatters.png', format='png') import ols model = ols.ols(ypll_arr, measures_arr[:, 6], "YPLL RATE", ["% Diabetes"]) model.summary()
RH.append(float(RH1[t])) WS.append(float(WS1[t])) Sin.append(float(Sin1[t])) Ta_m = np.ma.masked_values(Ta, -999.99, atol=0.09) #X = ts_inter.interp_masked1d(Ta_m, 'cubic') #{‘constant’, ‘linear’, ‘cubic’, quintic’} #plt.plot(datenum,X) #plt.plot(datenum,Ta_m, 'o') #plt.show() x = np.zeros((len(datenum), 4), dtype=np.float) x[:, 0] = Ta_m x[:, 1] = RH x[:, 2] = WS x[:, 3] = Sin mymodel = ols.ols(x[:, 0], x[:, 2:], y_varnm='Ta', x_varnm=['WS', 'Sin']) mymodel.p # return coefficient p-values mymodel.summary() # print results print mymodel = ols.ols(x[:, 1], x[:, 2:], y_varnm='RH', x_varnm=['WS', 'Sin']) mymodel.p # return coefficient p-values mymodel.summary() # print results ##X = np.fft.fft(Ta_m) ##Y = np.zeros(len(Ta)) ###Y[important frequencies] = X[important frequencies] ##plt.plot(datenum,X) ##plt.show() ## ##X1 = np.fft.fft(WS) ##Y1 = np.zeros(len(WS))
def printOlsSummary(self): m = ols.ols(self.getPrices(), self.getTimes(), "Price", ["Time"]) m.summary()