Beispiel #1
0
 def test_regress(self):
     """regression slope, intercept should match p 459 Sokal and Rohlf"""
     x = [0, 12, 29.5,43,53,62.5,75.5,85,93]
     y = [8.98, 8.14, 6.67, 6.08, 5.90, 5.83, 4.68, 4.20, 3.72]
     self.assertFloatEqual(regress(x, y), (-0.05322, 8.7038), 0.001)
     #higher precision from OpenOffice
     self.assertFloatEqual(regress(x, y), (-0.05322215,8.70402730))
Beispiel #2
0
 def test_regress(self):
     """regression slope, intercept should match p 459 Sokal and Rohlf"""
     x = [0, 12, 29.5, 43, 53, 62.5, 75.5, 85, 93]
     y = [8.98, 8.14, 6.67, 6.08, 5.90, 5.83, 4.68, 4.20, 3.72]
     self.assertFloatEqual(regress(x, y), (-0.05322, 8.7038), 0.001)
     #higher precision from OpenOffice
     self.assertFloatEqual(regress(x, y), (-0.05322215, 8.70402730))
Beispiel #3
0
def plot_regression_line(x,y,line_color='r', axes=None, prob_axes=False, \
    axis_range=None):
    """Plots the regression line, and returns the equation.
    
    x and y are the x and y data for a single series
    line_color is a matplotlib color, will be used for the line
    axes is the name of the axes the regression will be plotted against
    prob_axes, if true, forces the axes to be between 0 and 1
    range, if not None, forces the axes to be between (xmin, xmax, ymin, ymax).
    """
    if axes is None:
        axes = gca()
    m, b = regress(x, y)
    r, significance = correlation(x,y)
    #set the a, b, and r values. a is the slope, b is the intercept.
    r_str = '%0.3g'% (r**2)
    m_str ='%0.3g' % m
    b_str = '%0.3g' % b

    #want to clip the line so it's contained entirely within the graph
    #coordinates. Basically, we need to find the values of y where x
    #is at x_min and x_max, and the values of x where y is at y_min and
    #y_max.

    #if we didn't set prob_axis or axis_range, just find empirical x and y
    if (not prob_axes) and (axis_range is None):
       x1, x2 = min(x), max(x)
       y1, y2 = m*x1 + b, m*x2 + b
       x_min, x_max = x1, x2
    else:
        if prob_axes:
            x_min, x_max = 0, 1
            y_min, y_max = 0, 1
        else: #axis range must have been set
            x_min, x_max, y_min, y_max = axis_range
        #figure out bounds for x_min and y_min
        y_at_x_min = m*x_min + b
        if y_at_x_min < y_min:  #too low: find x at y_min
            y1 = y_min
            x1 = (y_min-b)/m
        elif y_at_x_min > y_max: #too high: find x at y_max
            y1 = y_max
            x1 = (y_max-b)/m
        else:   #just right
            x1, y1 = x_min, y_at_x_min

        y_at_x_max = m*x_max + b
        if y_at_x_max < y_min:  #too low: find x at y_min
            y2 = y_min
            x2 = (y_min-b)/m
        elif y_at_x_max > y_max: #too high: find x at y_max
            y2 = y_max
            x2 = (y_max-b)/m
        else:   #just right
            x2, y2 = x_max, y_at_x_max

        #need to check that the series wasn't entirely in range
    if (x_min <= x1 <= x_max) and (x_min <= x2 <= x_max):
        axes.plot([x1,x2],[y1,y2], color=line_color, linewidth=0.5)

    if b >= 0:
        sign_str = ' + '
    else:
        sign_str = ' '
    
    equation=''.join(['y= ',m_str,'x',sign_str,b_str,'\nr$^2$=',r_str])
    return equation, line_color
Beispiel #4
0
def plot_regression_line(x,y,line_color='r', axes=None, prob_axes=False, \
    axis_range=None):
    """Plots the regression line, and returns the equation.
    
    x and y are the x and y data for a single series
    line_color is a matplotlib color, will be used for the line
    axes is the name of the axes the regression will be plotted against
    prob_axes, if true, forces the axes to be between 0 and 1
    range, if not None, forces the axes to be between (xmin, xmax, ymin, ymax).
    """
    if axes is None:
        axes = gca()
    m, b = regress(x, y)
    r, significance = correlation(x,y)
    #set the a, b, and r values. a is the slope, b is the intercept.
    r_str = '%0.3g'% (r**2)
    m_str ='%0.3g' % m
    b_str = '%0.3g' % b

    #want to clip the line so it's contained entirely within the graph
    #coordinates. Basically, we need to find the values of y where x
    #is at x_min and x_max, and the values of x where y is at y_min and
    #y_max.

    #if we didn't set prob_axis or axis_range, just find empirical x and y
    if (not prob_axes) and (axis_range is None):
       x1, x2 = min(x), max(x)
       y1, y2 = m*x1 + b, m*x2 + b
       x_min, x_max = x1, x2
    else:
        if prob_axes:
            x_min, x_max = 0, 1
            y_min, y_max = 0, 1
        else: #axis range must have been set
            x_min, x_max, y_min, y_max = axis_range
        #figure out bounds for x_min and y_min
        y_at_x_min = m*x_min + b
        if y_at_x_min < y_min:  #too low: find x at y_min
            y1 = y_min
            x1 = (y_min-b)/m
        elif y_at_x_min > y_max: #too high: find x at y_max
            y1 = y_max
            x1 = (y_max-b)/m
        else:   #just right
            x1, y1 = x_min, y_at_x_min

        y_at_x_max = m*x_max + b
        if y_at_x_max < y_min:  #too low: find x at y_min
            y2 = y_min
            x2 = (y_min-b)/m
        elif y_at_x_max > y_max: #too high: find x at y_max
            y2 = y_max
            x2 = (y_max-b)/m
        else:   #just right
            x2, y2 = x_max, y_at_x_max

        #need to check that the series wasn't entirely in range
    if (x_min <= x1 <= x_max) and (x_min <= x2 <= x_max):
        axes.plot([x1,x2],[y1,y2], color=line_color, linewidth=0.5)

    if b >= 0:
        sign_str = ' + '
    else:
        sign_str = ' '
    
    equation=''.join(['y= ',m_str,'x',sign_str,b_str,'\nr$^2$=',r_str])
    return equation, line_color