def matrix_regression(input_vals, output_vals, funcs, categorical = False): '''Linear regression on a matrix of input properties and input values. Produces a linear model for factors influencing output values. Functions must be tuples (title, func). Input_vals and output_vals must be the same size ''' assert(len(input_vals) == len(output_vals)) #Add a constant function for the intercept if 'Intercept' not in map(lambda x: x[0], funcs): funcs.append(('Intercept', lambda x: 1)) #Build up a matrix of each input function on each input val m = r.matrix(1.0, nrow=len(input_vals), ncol=len(funcs)) for i in xrange(len(input_vals)): for j in xrange(len(funcs)): m[i][j] = funcs[j][1](i) if categorical and m[i][j]: m[i][j] = 1 #Regression, done in R fit = r.lsfit(m,output_vals, intercept = False, ) #Extract values coefficients = map(lambda x: x[1], sorted(fit['coefficients'].items(), key = lambda x: int(x[0][1:]))) regression_func = lambda x,c=coefficients: sum([c[j] * funcs[j][1](x) for j in xrange(len(funcs))]) model_vals = map(regression_func, input_vals) least_squares_quality = least_squares_error(model_vals,output_vals) linear_quality = linear_error(model_vals,output_vals) return coefficients, regression_func, model_vals, least_squares_quality, linear_quality
def fit_line(x1, y1): myline = None if (len(x1) == 2): tmp_line = line(0, 0) myline = tmp_line.get_line_from_points(point(x1[0], y1[0], 0), point(x1[1], y1[1], 0)) elif (len(x1) > 2): retHash = r.lsfit(x1, y1)['coefficients'] slope1 = retHash['X'] intercept1 = retHash['Intercept'] #slope1, intercept1, = stats.linregress(x1,y1)[0:2] myline = line(slope1, intercept1) return myline
from rpy import r my_x = [5.05, 6.75, 3.21, 2.66] my_y = [1.65, 26.5, -5.93, 7.96] ls_fit = r.lsfit(my_x,my_y) gradient = ls_fit['coefficients']['X'] yintercept= ls_fit['coefficients']['Intercept'] r.png("scatter_regression.png", width=400, height=350) r.plot(x=my_x, y=my_y, xlab="x", ylab="y", xlim=(0,7), ylim=(-16,27), main="Example Scatter with regression") r.abline(a=yintercept, b=gradient, col="red") r.dev_off()
income = [125000,100000,40000, 35000, 41000, 29000, 35000, 24000, 50000, 60000]; # do the scatter plot # Specify the output picture size and format. print "Writing output graph to file: scatterplot.png" print "To view it in Linux, you can use the File Browser and double-click." print "For command-line viewing, use the gthumb image viewer as follows:" print "% gthumb scatterplot.png" r.png("scatterplot.png", width=400, height=350); # Draw the plot. xlim and ylim specify the range for the axis. We # prefer to keep the graph origin at 0,0. r.plot(x=education_years, y=income, xlab="Education Years", ylab = "Income", main = "Scatter Plot with Least Squares Fit"); # Compute the least-square fit object between education_years and income. leastsquarefit = r.lsfit(education_years, income); #print "leastsquarefit = ", leastsquarefit; # for debugging. # Compute the slope of the line and the y-intercept from the least-square # fit object. gradient = leastsquarefit['coefficients']['X']; yintercept = leastsquarefit['coefficients']['Intercept']; # Now plot the least square fit line. r.abline(a=yintercept, b=gradient, col="red"); # Now turn off the graphics device. This is a R operation to "close" # the graph. r.dev_off();