Beispiel #1
0
    def execute(self):
        parser = argparse.ArgumentParser()
        parser.add_argument(
            'CSVFILE', help='path to csv file with historical data.')
        parser.add_argument(
            'ESTVAL', help='an estimated value')
        args = parser.parse_args()
        file_path = args.CSVFILE
        estimated_value = float(args.ESTVAL)
        csv_data = io.read_csv_file(file_path)

        if not csv_data:
            print 'ERROR: No data'
            sys.exit(1)

        columns = csv_data[0].keys()
        x_column = io.choose_from_list('X Column:', columns)
        y_column = io.choose_from_list('Y Column:', columns)
        x_data = [float(each[x_column]) for each in csv_data if each[x_column]]
        y_data = [float(each[y_column]) for each in csv_data if each[y_column]]
        print

        x_data, y_data = probe.trim_to_equal_length(x_data, y_data)
        print 'X DATA: {}'.format(x_data)
        print 'Y DATA: {}'.format(y_data)
        print

        beta_0 = statistics.beta_0(x_data, y_data)
        print u'\u03B20: {}'.format(beta_0)

        beta_1 = statistics.beta_1(x_data, y_data)
        print u'\u03B21: {}'.format(beta_1)

        integ = integration.Integrator(20, 0.000001)
        tdist = statistics.make_t_distribution(len(x_data) - 2)
        itdist = lambda x: integ.integrate_minus_infinity_to(tdist, x)

        std_dev = (
            statistics.standard_deviation_around_regression(x_data, y_data)
        )
        print "StdDev: ", std_dev

        projection = beta_0 + beta_1 * estimated_value
        print 'Projection: ', projection

        print 't(70 percent): ', integration.approximate_inverse(itdist, 0.85)
        print 't(90 percent): ', integration.approximate_inverse(itdist, 0.95)

        range70 = statistics.prediction_range(
            estimated_value, 0.85, x_data, y_data
        )
        range90 = statistics.prediction_range(
            estimated_value, 0.95, x_data, y_data
        )
        print 'Range(70 percent) =', projection + range70, \
            'UPI =', projection - range70, 'LPI =', range70
        print 'Range(90 percent) =', projection + range90, \
            'UPI =', projection - range90, 'LPI =', range90
Beispiel #2
0
def prediction_range(x_k, alpha, xvalues, yvalues):
    """Computes the prediction range for the given alpha value.

    Arguments:
        x_k(float): An estimated value
        alpha(float): The t-distribution alpha value.
        xvalues(list): A list of values
        yvalues(list): A list of values

    Returns:
        float: The prediction range
    """
    if len(xvalues) < 3 or len(yvalues) < 3:
        raise RuntimeError('Too few values to compute prediction interval')

    n = len(xvalues)
    tdist = make_t_distribution(n - 2)
    integ = integration.Integrator(20, 0.00001)
    h = lambda x: integ.integrate_minus_infinity_to(tdist, x)
    std_dev = standard_deviation_around_regression(xvalues, yvalues)
    t_value = integration.approximate_inverse(h, alpha)
    x_avg = mean(xvalues)

    const = t_value * std_dev
    result = 1 + 1.0/n
    result += (x_k - x_avg)**2 / sum([(x - x_avg)**2 for x in xvalues])

    return const * math.sqrt(result)
Beispiel #3
0
    def get_normal_distribution_buckets(self, num_segments):
        """This routine returns a dict with segment ranges corresponding to
        each of the buckets the normal distribution is divided into.

        Arguments:
            num_segments(float): The number of buckets required.

        Returns:
            dict: A hash map with a segment range for each bucket.
        """
        segment_probability = 1.0 / num_segments
        integrator = integration.Integrator(20, 1E-10)
        func = lambda x: integrator.integrate_minus_infinity_to(
            statistics.normal_distribution, x)

        results = {}
        previous_upper_bound = None
        for i in range(1, int(num_segments)):
            next_upper_bound = integration.approximate_inverse(
                func, i * segment_probability)
            results[SegmentRange(previous_upper_bound, next_upper_bound)] = 0
            previous_upper_bound = next_upper_bound

        results[SegmentRange(previous_upper_bound, None)] = 0
        return results
Beispiel #4
0
    def get_normal_distribution_buckets(self, num_items, num_segments):
        """This routine divides the normal distribution into the given number
        of segments. It then creates a segment range for each segment based on
        the number of items expected to fall within it. It returns a list of
        segment ranges.

        Arguments:
            num_items(int): The number of items to be tested.
            num_segments(int): The number of segments to divide the normal
                distribution into.

        Returns:
            list: A list of SegmentRange objects for each segment.
        """
        assert(num_segments > 0,
               "number of segments is less than 1: {}".format(num_segments))

        integrator = integration.Integrator(20, 1E-10)
        func = lambda x: integrator.integrate_minus_infinity_to(
            statistics.normal_distribution, x)

        results = []
        cumulative_probability = 0
        previous_upper_bound = None
        segment_allocation = self.get_segment_allocation(
            num_items, num_segments)

        for items_in_segment in segment_allocation[:-1]:
            cumulative_probability += items_in_segment / float(num_items)
            upper_bound = integration.approximate_inverse(
                func, cumulative_probability)
            results.append(SegmentRange(previous_upper_bound, upper_bound))
            previous_upper_bound = upper_bound

        results.append(SegmentRange(previous_upper_bound, None))
        return results