def construct_inverse_graph(graph):
    """Construct graph with y values = 1/y of input graph"""
    n = graph.GetN()
    x, y = cu.get_xy(graph)
    # y = np.ndarray(n, 'd', graph.GetY())
    new_y = array('d', [1 / old_y if old_y != 0 else 0 for old_y in y])
    ex = array('d', [0] * n)
    ey = array('d', [0] * n)
    gr = ROOT.TGraphErrors(n, x, new_y, ex, ey)
    return gr
def construct_graph_func_ratio_graph(graph, func):
    """Construct a graph of function / graph for each (x, y) point in graph"""
    x, y = cu.get_xy(graph)
    n = len(x)
    diff = array('d',
                 [func.Eval(this_x) / this_y for this_x, this_y in zip(x, y)])
    ex = array('d', [0] * n)
    ey = array('d', [0] * n)
    gr = ROOT.TGraphErrors(n, x, diff, ex, ey)
    return gr
Ejemplo n.º 3
0
def construct_difference_graph(graph, other_graph):
    x, y = cu.get_xy(graph)
    x_other, y_other = cu.get_xy(other_graph)
    n = graph.GetN()
    if len(y) != len(y_other):
        # If different # points, just use the smaller
        if len(x) < len(x_other):
            n = len(x)
            x_other = x_other[:len(x)]
            y_other = y_other[:len(y)]
        elif len(x) > len(x_other):
            n = len(x_other)
            x = x[:len(x_other)]
            y = y[:len(y_other)]
    if x != x_other:
        raise RuntimeError("x values different")
    diff_y = [y1 - y2 for y1, y2 in zip(y, y_other)]
    ex = [0] * n
    ey = [0] * n
    gr = ROOT.TGraphErrors(n, array('d', x), array('d', diff_y),
                           array('d', ex), array('d', ey))
    return gr
def process_file(filename, eta_bins=binning.eta_bins_forward):
    """Process a ROOT file with graphs, print a mean & mean histogram for each.

    Parameters
    ----------
    filename : str
        Name of ROOT file to process (from runCalibration.py)
    eta_bins : list[[float, float]]
        Eta bin edges.
    """
    f = cu.open_root_file(filename)

    for eta_min, eta_max in binning.pairwise(eta_bins):
        gr = cu.get_from_file(f, generate_eta_graph_name(eta_min, eta_max))
        if not gr:
            raise RuntimeError("Can't get graph")

        xarr, yarr = cu.get_xy(gr)
        xarr, yarr = np.array(xarr), np.array(
            yarr)  # use numpy array for easy slicing

        # Loop over all possible subgraphs, and calculate a mean for each
        end = len(yarr)
        means = []
        while end > 0:
            start = 0
            while start < end:
                means.append(yarr[start:end].mean())
                start += 1
            end -= 1

        # Jackknife means
        jack_means = [np.delete(yarr, i).mean() for i in range(len(yarr))]

        # Do plotting & peak finding in both ROOT and MPL...not sure which is better?
        # peak = plot_find_peak_mpl(means, eta_min, eta_max, os.path.dirname(os.path.realpath(filename)))
        peak = plot_find_peak_root(means, eta_min, eta_max,
                                   os.path.dirname(os.path.realpath(filename)))
        jackpeak = plot_jacknife_root(
            jack_means, eta_min, eta_max,
            os.path.dirname(os.path.realpath(filename)))
        print 'Eta bin:', eta_min, '-', eta_max
        print peak
        print 'jackknife mean:'
        print np.array(jack_means).mean()

    f.Close()
def process_file(filename, eta_bins=binning.eta_bins_forward):
    """Process a ROOT file with graphs, print a mean & mean histogram for each.

    Parameters
    ----------
    filename : str
        Name of ROOT file to process (from runCalibration.py)
    eta_bins : list[[float, float]]
        Eta bin edges.
    """
    f = cu.open_root_file(filename)

    for eta_min, eta_max in binning.pairwise(eta_bins):
        gr = cu.get_from_file(f, generate_eta_graph_name(eta_min, eta_max))
        if not gr:
            raise RuntimeError("Can't get graph")

        xarr, yarr = cu.get_xy(gr)
        xarr, yarr = np.array(xarr), np.array(yarr)  # use numpy array for easy slicing

        # Loop over all possible subgraphs, and calculate a mean for each
        end = len(yarr)
        means = []
        while end > 0:
            start = 0
            while start < end:
                means.append(yarr[start:end].mean())
                start += 1
            end -= 1

        # Jackknife means
        jack_means = [np.delete(yarr, i).mean() for i in range(len(yarr))]

        # Do plotting & peak finding in both ROOT and MPL...not sure which is better?
        # peak = plot_find_peak_mpl(means, eta_min, eta_max, os.path.dirname(os.path.realpath(filename)))
        peak = plot_find_peak_root(means, eta_min, eta_max, os.path.dirname(os.path.realpath(filename)))
        jackpeak = plot_jacknife_root(jack_means, eta_min, eta_max, os.path.dirname(os.path.realpath(filename)))
        print 'Eta bin:', eta_min, '-', eta_max
        print peak
        print 'jackknife mean:'
        print np.array(jack_means).mean()

    f.Close()
def choose_file():
    """Get graph & fit fcn from ROOT file via tkFileDialog, plot them on the canvas"""
    ftypes = [('ROOT files', '*.root'), ('All files', '*')]
    dlg = tkFileDialog.Open(filetypes=ftypes)
    fl = dlg.show()
    if fl != '':
        root_file = ROOT.TFile(fl, "READ")
        gr = root_file.Get(graph_entry.get())
        fn = root_file.Get(fn_entry.get())
        if gr and fn:
            tkMessageBox.showinfo("Got graph & fit", "Got graph %s and function %s" % (graph_entry.get(), fn_entry.get()))
            # store xy points so properly drawn when canvas updated
            global graph_x, graph_errx, graph_y, graph_erry
            graph_x, graph_y = cu.get_xy(gr)
            graph_errx, graph_erry = cu.get_exey(gr)
            new_params = [fn.GetParameter(i) for i in xrange(fn.GetNumberFreeParameters())]
            for slider, box, param in zip(sliders, multiplier_boxes, new_params):
                set_slider_box_values(slider, box, param)
        else:
            if not gr:
                tkMessageBox.showwarning("No graph", "Graph with name %s does not exist" % graph_entry.get())
            if not fn:
                tkMessageBox.showwarning("No function", "Function with name %s does not exist" % graph_entry.get())
Ejemplo n.º 7
0
def do_low_pt_plateau_fit(fit, graph, condition=0.1, look_ahead=4):
    """Make low pt plateau fit, by checking for deviations between graph and fit at low pT.
    Then below the pT where they differ, just use the last good correction
    factor as a constant correction factor.

    This decision can also take lower pT point into account to avoid breaking
    early due to fluctuations (see `look_ahead` arg)

    This generates a new set of correction functions, represented by MultiFunc objects.

    Parameters
    ----------
    fits : list[TF1]
        List of fit functions, one per eta bin.
    graphs : list[TGraph]
        List of graphs, one per eta bin.
    condition : float
        Absolute difference between graph & curve to determine where curve
        becomes a constant value.
    look_ahead : int, optional
        Number of lower points to also consider when calculating
        where plateau should occur

    Returns
    -------
    MultiFunc

    """
    print "Making fancy fit, using condition %f with look-ahead %d" % (condition, look_ahead)

    x_arr, y_arr = cu.get_xy(graph)

    pt_merge, corr_merge = 0, 0

    for j, (pt, corr) in enumerate(izip(x_arr[::-1], y_arr[::-1])):
        # Loop through each point of the graph in reverse,
        # only considering points with pt < 70.
        # Determine where the function and graph separate by
        # looking at the difference.
        if pt > 70:
            continue

        def get_nth_lower_point(n):
            """Return the nth lower point (x, y).
            eg n=1 returns the next lowest graph x,y"""
            return x_arr[len(x_arr) - 1 - j - n], y_arr[len(y_arr) - 1 - j - n]

        # Test the next N lowest point(s) to see if they also fulfills condition.
        # This stops a random fluctation from making the plateau too low
        # We require that all the Nth lower points also fail the condition.
        lower_points = [get_nth_lower_point(x) for x in range(1, 1 + look_ahead)]
        lower_fit_vals = [fit.Eval(x[0]) for x in lower_points]
        lower_conditions = [abs(x[1] - y) > condition for x, y in zip(lower_points, lower_fit_vals)]
        if all(lower_conditions):
            break
        else:
            pt_merge = pt
            corr_merge = fit.Eval(pt)

    print "pt_merge:", pt_merge, "corr fn value:", fit.Eval(pt_merge)

    # Make our new 'frankenstein' function: constant for pt < pt_merge,
    # then the original function for pt > pt_merge
    constant = ROOT.TF1("constant", "[0]", 0, pt_merge)
    constant.SetParameter(0, corr_merge)

    function_str = "[0]+[1]/(pow(log10(x),2)+[2])+[3]*exp(-[4]*(log10(x)-[5])*(log10(x)-[5]))"
    fit_new = ROOT.TF1("fitfcn", function_str, pt_merge * 0.75, 1024)
    for p in xrange(fit.GetNumberFreeParameters()):
        fit_new.SetParameter(p, fit.GetParameter(p))
    # set lower range below pt_merge just for drawing purposes - MultiFunc ignores it

    # add a constant above 1023.5 as truncated there
    constant_highpT = ROOT.TF1("constant_highpT", "[0]", 1023.5, ((2**16) - 1) * 0.5)
    constant_highpT.SetParameter(0, fit_new.Eval(1023.5))

    # Make a MultiFunc object to handle the different functions operating
    # over different ranges since TF1 can't do this.
    # Maybe ROOFIT can?
    functions_dict = {(0, pt_merge): constant,
                      (pt_merge, 1023.5): fit_new,
                      (1023.4, np.inf): constant_highpT}
    total_fit = MultiFunc(functions_dict)
    return total_fit
Ejemplo n.º 8
0
def do_constant_fit(graph, eta_min, eta_max, output_dir):
    """Do constant-value fit to graph and plot the jackknife procedure.

    We derive the constant fit value by jack-knifing. There are 2 forms here:
    - "my jackknifing": where we loop over all possible subgraphs, and calculate
    the mean for each.
    - "proper jackknifing": where we loop over all N-1 subgraphs, and calulate
    the mean for each.

    Using these, we can then find the peak mean, or the average mean.
    By default, we use the peak of "my jackknife" as it ignores the
    high-correction tail better, and gives the better-sampled low pT
    end more importance.

    Parameters
    ----------
    graph : TGraph
        Graph to fit
    eta_min, eta_max : float
        Eta bin boundaries, purely for the plots
    output_dir : str
        Output directory for plots.

    Returns
    -------
    MultiFunc
        MultiFunc object with a const-value function for the whole pT range.
    """
    print 'Doing constant-value fit'

    xarr, yarr = cu.get_xy(graph)
    xarr, yarr = np.array(xarr), np.array(yarr)  # use numpy array for easy slicing

    # "my jackknifing": Loop over all possible subgraphs, and calculate a mean for each
    end = len(yarr)
    means = []
    while end > 0:
        start = 0
        while start < end:
            means.append(yarr[start:end].mean())
            start += 1
        end -= 1

    # "proper" Jackknife means
    jack_means = [np.delete(yarr, i).mean() for i in range(len(yarr))]

    # Do plotting & peak finding, for both methods
    plot_name = os.path.join(output_dir, 'means_hist_%g_%g_myjackknife.pdf' % (eta_min, eta_max))
    peak, mean = find_peak_and_average_plot(means, eta_min, eta_max, plot_name, 'My jackknife')

    plot_name = os.path.join(output_dir, 'means_hist_%g_%g_root_jackknife.pdf' % (eta_min, eta_max))
    jackpeak, jackmean = find_peak_and_average_plot(jack_means, eta_min, eta_max, plot_name, 'Proper jackknife')

    print 'my jackknife peak:', peak
    print 'my jackknife mean:', mean
    print 'jackknife peak:', jackpeak
    print 'jackknfe mean:', jackmean
    const_fn = ROOT.TF1("constant", '[0]', 0, 1024)
    const_fn.SetParameter(0, peak)
    const_multifn = MultiFunc({(0, np.inf): const_fn})
    return const_multifn
def fit_correction(graph, function, fit_min=-1, fit_max=-1):
    """
    Fit response curve with given correction function, within given bounds.
    If fit_min and fit_max are < 0, then use the range of the function supplied.

    Note that sometime the fit fails - if so, we try raising the lower
    bound of the fit until it suceeds (sometimes it works at e.g. 45, but not 40).
    If that fails, then we lower the upper bound and try fitting, raising
    the lower bound again if necessary. Iterative process, so fairly slow.

    Note that the 'stepping' is done in terms of the graph points, so non-uniform.

    We stop when the upper bound of the fit approaches the original lower bound.

    Returns graph (with fitted function) and parameters of successful fit if
    successful (otherwise an empty list).
    """
    # Get the min and max of the fit function if the user didn't define it
    if fit_min < 0 and fit_max < 0:
        fit_min, fit_max = ROOT.Double(), ROOT.Double()
        function.GetRange(fit_min, fit_max)

    print "Fitting", fit_min, fit_max

    # Now do the fitting, incrementing the fit min if failure
    fit_result = -1

    xarr, yarr = cu.get_xy(graph)

    # Keep the points in the graph closest to the min/max values
    # (and the index of the point in the graph array) for reference
    orig_fit_min_ind, orig_fit_min = closest_element(xarr, fit_min)
    orig_fit_max_ind, orig_fit_max = closest_element(xarr, fit_max)
    fit_min_ind, fit_max_ind = orig_fit_min_ind, orig_fit_max_ind
    print 'Starting with fit range:', orig_fit_min, orig_fit_max

    while fit_max_ind - orig_fit_min_ind >= 5:
        fit_min_ind = orig_fit_min_ind
        while fit_min_ind + 5 < fit_max_ind:
            fit_min = xarr[fit_min_ind]
            fit_max = xarr[fit_max_ind]
            function.SetRange(fit_min, fit_max)

            mode = "QR"
            if str(function.GetExpFormula()).startswith("pol"):
                mode += "F"
            fit_result = int(graph.Fit(function.GetName(), mode, "", fit_min, fit_max))
            if fit_result != 0:
                fit_min_ind += 1
                continue

            # sanity check - sometimes will have status = 0 even though rubbish,
            if not check_sensible_function(function):
                fit_result = -1

            if fit_result == 0:
                print "Fit result:", fit_result, "for fit min", fit_min, "to max", fit_max
                break
            else:
                fit_min_ind += 1

        if fit_result == 0:
            break

        fit_max_ind -= 1
        print 'Trying with lowered fit_max:', xarr[fit_max_ind]

    params = []

    if fit_result != 0:
        print "Couldn't fit"
    else:
        for i in range(function.GetNumberFreeParameters()):
            params.append(function.GetParameter(i))

    return graph, params
def setup_fit(graph, function, absetamin, absetamax, outputfile):
    """Setup for fitting (auto-calculate sensible range).

    Returns a sub-graph of only sensible points (chop off turnover at low pT,
    and any high pT tail), along with a corresponding fit function
    whose range has been set to match the sub graph.
    """
    print 'Setting up fit'
    xarr, yarr = cu.get_xy(graph)
    exarr, eyarr = cu.get_exey(graph)
    # first test out graph isn't empty
    if len(xarr) == 0:
        raise RuntimeError("graph in setup_fit() is empty")

    fit_max = max(xarr)  # Maxmimum pt for upper bound of fit
    # fit_min = 10 if absetamin > 2.9 else 10
    fit_min = min(xarr) # Minimum pt for lower bound of fit

    # For lower bound of fit, use either fit_min or the pt
    # of the maximum correction value, whichever has the larger pT.
    # Check to make sure it's not the last point on the graph
    # (e.g. if no turnover), in which case just use the default fit_min
    # Then find the index of the closest corresponding value in xarr (since
    # fit_min could correspond to a pT that isn't in the list of x-points)
    # Note that we want the maximum in the first half of the graph to avoid
    # the 'flick' at high pT in HF
    # JOE EDIT: (un)comment the next 4 lines to (not) have the flick
    max_corr = max(yarr[:len(yarr) / 2])
    max_corr_ind = yarr.index(max_corr)
    max_corr_pt = xarr[max_corr_ind]
    fit_min = max(fit_min, max_corr_pt) if (max_corr_pt != xarr[-1]) and (max_corr_pt != xarr[-1]) else fit_min
    min_ind = next(i for i, x in enumerate(xarr) if x >= fit_min)

    # To find upper limit of fit, we need to detect if & where there is a turnover
    # (i.e. where the gradient goes from -ve to +ve)
    # This is made difficult by the fact the graph may be 'noisy' and simply
    # taking the gradient may not be enough (and give multiple points where
    # the gradient changes). To counter this, we smooth the gradient by
    # averaging over several points
    def moving_average(arr, n):
        """Returns a np.array of moving-averages of array arr, where each
        point in the rerurned array is the average of the consecutive n points
        in the original array.

        By definition, this will return an array of length len(arr) + 1 - n
        """
        return np.array([np.mean(arr[i:i+n]) for i in range(0, len(arr) - n + 1)])

    def calc_crossing(arr):
        """Calculate value at which value the array crosses 0.

        Looks at points in groups of 4, and finds the smallest group
        where the first 2 points < 0, and the next 2 points > 0.

        This ignores values which peak above 0 for 1 point.

        Returns the array (index, value) of the point closest to 0.
        """
        for i in range(2, len(arr)):
            group = np.concatenate((-1 * arr[i-2: i], arr[i: i+2]))
            if np.all(group > 0):
                return i - 2 + list(group).index(np.min(group)), np.min(group)
        return None, None

    grad = np.gradient(yarr, 1)
    n_sample = 5
    intercept_ind, intercept = None, None
    # keep incrementing the smooting value until we get a clean intercept
    while not intercept_ind and not intercept:
        x_ave = moving_average(xarr, n_sample)
        grad_ave = moving_average(grad, n_sample)
        intercept_ind, intercept = calc_crossing(grad_ave)
        n_sample += 1
        # quit if we've got stuck
        if n_sample == 12:
            break

    if intercept and intercept_ind:
        print 'Found minima'
        print 'Smoothing param:', n_sample
        # find closest x value to intercept
        max_ind, fit_max = closest_element(xarr, x_ave[intercept_ind])
    else:
        print '! Could not find minima, falling back to just using min()'
        # Here we assume a failure to find any minima, so fallback and use
        # the smallest point.
        max_ind = list(yarr).index(min(yarr))
        fit_max = xarr[max_ind]
    if fit_min > fit_max:
        raise RuntimeError('fit_min > fit_max! (%f > %f)' % (fit_min, fit_max))

    print "Correction fn fit range:", fit_min, fit_max

    # Generate a correction function with suitable range
    this_fit = function.Clone(function.GetName() + 'eta_%g_%g' % (absetamin, absetamax))
    this_fit.SetRange(fit_min, fit_max)

    # Make a sub-graph with only the points used for fitting
    # Do not user graph.RemovePoint()! It doesn't work, and only removes every other point
    # Instead make a graph with the bit of array we want
    fit_graph = ROOT.TGraphErrors(max_ind + 1 - min_ind,
                                  np.array(xarr[min_ind:max_ind + 1]),
                                  np.array(yarr[min_ind:max_ind + 1]),
                                  np.array(exarr[min_ind:max_ind + 1]),
                                  np.array(eyarr[min_ind:max_ind + 1]))
    fit_graph.SetName(graph.GetName() + "_fit")

    return fit_graph, this_fit
def do_constant_fit(graph, eta_min, eta_max, output_dir):
    """Do constant-value fit to graph and plot the jackknife procedure.

    We derive the constant fit value by jack-knifing. There are 2 forms here:
    - "my jackknifing": where we loop over all possible subgraphs, and calculate
    the mean for each.
    - "proper jackknifing": where we loop over all N-1 subgraphs, and calulate
    the mean for each.

    Using these, we can then find the peak mean, or the average mean.
    By default, we use the peak of "my jackknife" as it ignores the
    high-correction tail better, and gives the better-sampled low pT
    end more importance.

    Parameters
    ----------
    graph : TGraph
        Graph to fit
    eta_min, eta_max : float
        Eta bin boundaries, purely for the plots
    output_dir : str
        Output directory for plots.

    Returns
    -------
    MultiFunc
        MultiFunc object with a const-value function for the whole pT range.
    """
    print 'Doing constant-value fit'

    xarr, yarr = cu.get_xy(graph)
    xarr, yarr = np.array(xarr), np.array(yarr)  # use numpy array for easy slicing

    # "my jackknifing": Loop over all possible subgraphs, and calculate a mean for each
    end = len(yarr)
    means = []
    while end > 0:
        start = 0
        while start < end:
            means.append(yarr[start:end].mean())
            start += 1
        end -= 1

    # "proper" Jackknife means
    jack_means = [np.delete(yarr, i).mean() for i in range(len(yarr))]

    # Do plotting & peak finding, for both methods
    plot_name = os.path.join(output_dir, 'means_hist_%g_%g_myjackknife.pdf' % (eta_min, eta_max))
    peak, mean = find_peak_and_average_plot(means, eta_min, eta_max, plot_name, 'My jackknife')

    plot_name = os.path.join(output_dir, 'means_hist_%g_%g_root_jackknife.pdf' % (eta_min, eta_max))
    jackpeak, jackmean = find_peak_and_average_plot(jack_means, eta_min, eta_max, plot_name, 'Proper jackknife')

    print 'my jackknife peak:', peak
    print 'my jackknife mean:', mean
    print 'jackknife peak:', jackpeak
    print 'jackknfe mean:', jackmean
    const_fn = ROOT.TF1("constant", '[0]', 0, 1024)
    const_fn.SetParameter(0, peak)
    const_multifn = MultiFunc({(0, np.inf): const_fn})
    return const_multifn
def fit_correction(graph, function, fit_min=-1, fit_max=-1):
    """
    Fit response curve with given correction function, within given bounds.
    If fit_min and fit_max are < 0, then use the range of the function supplied.

    Note that sometime the fit fails - if so, we try raising the lower
    bound of the fit until it suceeds (sometimes it works at e.g. 45, but not 40).
    If that fails, then we lower the upper bound and try fitting, raising
    the lower bound again if necessary. Iterative process, so fairly slow.

    Note that the 'stepping' is done in terms of the graph points, so non-uniform.

    We stop when the upper bound of the fit approaches the original lower bound.

    Returns graph (with fitted function) and parameters of successful fit if
    successful (otherwise an empty list).
    """
    # Get the min and max of the fit function if the user didn't define it
    if fit_min < 0 and fit_max < 0:
        fit_min, fit_max = ROOT.Double(), ROOT.Double()
        function.GetRange(fit_min, fit_max)

    print "Fitting", fit_min, fit_max

    # Now do the fitting, incrementing the fit min if failure
    fit_result = -1

    xarr, yarr = cu.get_xy(graph)

    # Keep the points in the graph closest to the min/max values
    # (and the index of the point in the graph array) for reference
    orig_fit_min_ind, orig_fit_min = closest_element(xarr, fit_min)
    orig_fit_max_ind, orig_fit_max = closest_element(xarr, fit_max)
    fit_min_ind, fit_max_ind = orig_fit_min_ind, orig_fit_max_ind
    print 'Starting with fit range:', orig_fit_min, orig_fit_max

    while fit_max_ind - orig_fit_min_ind >= 5:
        fit_min_ind = orig_fit_min_ind
        while fit_min_ind + 5 < fit_max_ind:
            fit_min = xarr[fit_min_ind]
            fit_max = xarr[fit_max_ind]
            function.SetRange(fit_min, fit_max)

            mode = "QR"
            if str(function.GetExpFormula()).startswith("pol"):
                mode += "F"
            fit_result = int(
                graph.Fit(function.GetName(), mode, "", fit_min, fit_max))
            if fit_result != 0:
                fit_min_ind += 1
                continue

            # sanity check - sometimes will have status = 0 even though rubbish,
            if not check_sensible_function(function):
                fit_result = -1

            if fit_result == 0:
                print "Fit result:", fit_result, "for fit min", fit_min, "to max", fit_max
                break
            else:
                fit_min_ind += 1

        if fit_result == 0:
            break

        fit_max_ind -= 1
        print 'Trying with lowered fit_max:', xarr[fit_max_ind]

    params = []

    if fit_result != 0:
        print "Couldn't fit"
    else:
        for i in range(function.GetNumberFreeParameters()):
            params.append(function.GetParameter(i))

    return graph, params
def setup_fit(graph, function, absetamin, absetamax, outputfile):
    """Setup for fitting (auto-calculate sensible range).

    Returns a sub-graph of only sensible points (chop off turnover at low pT,
    and any high pT tail), along with a corresponding fit function
    whose range has been set to match the sub graph.
    """
    print 'Setting up fit'
    xarr, yarr = cu.get_xy(graph)
    exarr, eyarr = cu.get_exey(graph)
    # first test out graph isn't empty
    if len(xarr) == 0:
        raise RuntimeError("graph in setup_fit() is empty")

    fit_max = max(xarr)  # Maxmimum pt for upper bound of fit

    # fit_min = 10 if absetamin > 2.9 else 10
    fit_min = min(xarr)  # Minimum pt for lower bound of fit

    # For lower bound of fit, use either fit_min or the pt
    # of the maximum correction value, whichever has the larger pT.
    # Check to make sure it's not the last point on the graph
    # (e.g. if no turnover), in which case just use the default fit_min
    # Then find the index of the closest corresponding value in xarr (since
    # fit_min could correspond to a pT that isn't in the list of x-points)
    # Note that we want the maximum in the first half of the graph to avoid
    # the 'flick' at high pT in HF
    # JOE EDIT: (un)comment the next 4 lines to (not) have the flick
    max_corr = max(yarr[:len(yarr) / 2])
    max_corr_ind = yarr.index(max_corr)
    max_corr_pt = xarr[max_corr_ind]
    fit_min = max(fit_min, max_corr_pt) if (max_corr_pt != xarr[-1]) and (
        max_corr_pt != xarr[-1]) else fit_min
    min_ind = next(i for i, x in enumerate(xarr) if x >= fit_min)

    # To find upper limit of fit, we need to detect if & where there is a turnover
    # (i.e. where the gradient goes from -ve to +ve)
    # This is made difficult by the fact the graph may be 'noisy' and simply
    # taking the gradient may not be enough (and give multiple points where
    # the gradient changes). To counter this, we smooth the gradient by
    # averaging over several points
    def moving_average(arr, n):
        """Returns a np.array of moving-averages of array arr, where each
        point in the rerurned array is the average of the consecutive n points
        in the original array.

        By definition, this will return an array of length len(arr) + 1 - n
        """
        return np.array(
            [np.mean(arr[i:i + n]) for i in range(0,
                                                  len(arr) - n + 1)])

    def calc_crossing(arr):
        """Calculate value at which value the array crosses 0.

        Looks at points in groups of 4, and finds the smallest group
        where the first 2 points < 0, and the next 2 points > 0.

        This ignores values which peak above 0 for 1 point.

        Returns the array (index, value) of the point closest to 0.
        """
        for i in range(2, len(arr)):
            group = np.concatenate((-1 * arr[i - 2:i], arr[i:i + 2]))
            if np.all(group > 0):
                return i - 2 + list(group).index(np.min(group)), np.min(group)
        return None, None

    grad = np.gradient(yarr, 1)
    n_sample = 5
    intercept_ind, intercept = None, None
    # keep incrementing the smooting value until we get a clean intercept
    while not intercept_ind and not intercept:
        x_ave = moving_average(xarr, n_sample)
        grad_ave = moving_average(grad, n_sample)
        intercept_ind, intercept = calc_crossing(grad_ave)
        n_sample += 1
        # quit if we've got stuck
        if n_sample == 12:
            break

    if intercept and intercept_ind:
        print 'Found minima'
        print 'Smoothing param:', n_sample
        # find closest x value to intercept
        max_ind, fit_max = closest_element(xarr, x_ave[intercept_ind])
    else:
        print '! Could not find minima, falling back to just using min()'
        # Here we assume a failure to find any minima, so fallback and use
        # the smallest point.
        max_ind = list(yarr).index(min(yarr))
        fit_max = xarr[max_ind]

    #############
    # JOE_HACKs #
    # 1. to fix the maximum pt for all fits (high pt saturation issue)
    # helps ease things into the right solution space
    # if fit_max > 650.0:
    #     print "*** WARNING: about to apply a JOE_HACK ***"
    #     print "*** lowers the upper limit of the fits to 650GeV ***"
    #     fit_max = 650.0
    # 2. to change the auto settings for a troublesome eta bin
    #if absetamin == 2.5:
    #    print "*** WARNING: about to apply a JOE_HACK ***"
    #    print "*** messing with fit limits for 2.500<|eta|<2.964 ***"
    #    max_ind = 80
    #    fit_max = xarr[max_ind]
    #    print max_ind
    #    print fit_max
    #    fit_min = 40.0
    #    min_ind = 0
    if absetamin == 2.964:
        print "* WARNING: about to apply a JOE_HACK *"
        print "* messing with fit limits for 2.964<|eta|<3.489 *"
        max_ind = 17
        fit_max = xarr[max_ind]
        print max_ind
        print fit_max
        fit_min = 40.0
        min_ind = 0
    if absetamin == 3.489:
        print "* WARNING: about to apply a JOE_HACK *"
        print "* messing with fit limits for 3.489<|eta|<4.191 *"
        max_ind = 17
        fit_max = xarr[max_ind]
        print max_ind
        print fit_max
        fit_min = 40.0
        min_ind = 0
    if absetamin == 4.191:
        print "* WARNING: about to apply a JOE_HACK *"
        print "* messing with fit limits for 4.191<|eta|<5.191 *"
        max_ind = 17
        fit_max = xarr[max_ind]
        print max_ind
        print fit_max
        fit_min = 40.0
        min_ind = 0
    ###############
    ###############

    if fit_min > fit_max:
        raise RuntimeError('fit_min > fit_max! (%f > %f)' % (fit_min, fit_max))

    print "Correction fn fit range:", fit_min, fit_max

    # Generate a correction function with suitable range
    this_fit = function.Clone(function.GetName() + 'eta_%g_%g' %
                              (absetamin, absetamax))
    this_fit.SetRange(fit_min, fit_max)

    # Make a sub-graph with only the points used for fitting
    # Do not user graph.RemovePoint()! It doesn't work, and only removes every other point
    # Instead make a graph with the bit of array we want
    fit_graph = ROOT.TGraphErrors(max_ind + 1 - min_ind,
                                  np.array(xarr[min_ind:max_ind + 1]),
                                  np.array(yarr[min_ind:max_ind + 1]),
                                  np.array(exarr[min_ind:max_ind + 1]),
                                  np.array(eyarr[min_ind:max_ind + 1]))
    fit_graph.SetName(graph.GetName() + "_fit")

    return fit_graph, this_fit