Example #1
0
    def satisfies_preconditions(cls, historical_data, proxy_value):
        """Indicates whether or not the historical data allows this method to
        be used for the given proxy value.

        Arguments:
            historical_data(HistoricalData): The historical estimation data
            proxy_value(float): The proxy size estimate

        Returns:
            bool: True if this method can be used, False otherwise.
        """
        proxy_sizes, actual_sizes = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_sizes)
        # Too few data points
        if len(actual_sizes) < 3:
            return False
        regression = cls(historical_data).get_regression()
        estimated_size = regression.estimate(proxy_value)
        # Beta0 is not close to zero
        if regression.beta0 > 0.25 * estimated_size:
            return False
        # Beta1 is out of bounds
        if regression.beta1 < 0.5 or regression.beta1 > 2.0:
            return False
        # Weakly correlated
        if statistics.correlation(proxy_sizes, actual_sizes) ** 2 < 0.5:
            return False
        # Weak statistical significance
        if statistics.significance(proxy_sizes, actual_sizes) > 0.05:
            return False
        return True
Example #2
0
    def satisfies_preconditions(cls, historical_data, proxy_value):
        """Indicates whether or not the historical data allows this method to
        be used for the given proxy value.

        Arguments:
            historical_data(HistoricalData): The historical estimation data
            proxy_value(float): The proxy size estimate

        Returns:
            bool: True if this method can be used, False otherwise.
        """
        planned_sizes, actual_times = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_times)
        if len(planned_sizes) < 3:
            return False
        regression = cls(historical_data).get_regression()
        expected_time = regression.estimate(proxy_value)
        if regression.beta0 > 0.25 * expected_time:
            return False
        productivity = 1.0 / (sum(planned_sizes) / sum(actual_times))
        beta1_range = 0.5 * productivity
        if regression.beta1 < (productivity - beta1_range) or regression.beta > (productivity + beta1_range):
            return False
        if statistics.correlation(planned_sizes, actual_times) ** 2 < 0.5:
            return False
        if statistics.significance(planned_sizes, actual_times) > 0.05:
            return False
        return True
Example #3
0
    def get_correlation(self):
        """Returns the correlation between estimation data values.

        Returns:
            float: The correlation (R^2) value.
        """
        return statistics.correlation(self.x_values, self.y_values) ** 2
Example #4
0
File: 7A.py Project: etscrivner/dse
    def execute(self):
        """Run the program"""
        parser = argparse.ArgumentParser()
        parser.add_argument('CSVFILE', help='path to csv file with data.')
        args = parser.parse_args()
        csv_data = io.read_csv_file(args.CSVFILE)

        if not csv_data:
            print 'ERROR: Invalid csv data file.'
            sys.exit(1)

        columns = csv_data[0].keys()
        x_column = io.choose_from_list('X Column:', columns)
        y_column = io.choose_from_list('Y Column:', columns)
        x_data = [float(each[x_column]) for each in csv_data if each[x_column]]
        y_data = [float(each[y_column]) for each in csv_data if each[x_column]]

        print 'R:', statistics.correlation(x_data, y_data)
        print 'T:', statistics.t_value(x_data, y_data)
        print 'Significance:', statistics.significance(x_data, y_data)
Example #5
0
 def matrix_entry(i, j):
     return correlation(get_column(data, i), get_column(data, j))
Example #6
0
    for _ in range(num_components):
        component = first_principal_component(X)
        components.append(component)
        X = remove_projection(X, component)

    return components

def transform_vector(v, components):
    return [dot(v, w) for w in components]

def transform(X, components):
    return [transform_vector(x_i, components) for x_i in X]

if __name__ == "__main__":

    print("correlation(xs, ys1)", correlation(xs, ys1))
    print("correlation(xs, ys2)", correlation(xs, ys2))

    # safe parsing

    data = []

    with open("data\\comma_delimited_stock_prices.csv", "r", encoding='utf8', newline='') as f:
        reader = csv.reader(f)
        for line in parse_rows_with(reader, [dateutil.parser.parse, None, float]):
            data.append(line)

    for row in data:
        if any(x is None for x in row):
            print(row)
def least_squares_fit(x, y):
    """при заданных обучающих значениях x и y,
    найти значения alpha и beta на основе МНК"""
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
Example #9
0
 def test_should_correctly_compute_correlation(self):
     result = statistics.correlation(self.x_data, self.y_data)
     self.assertAlmostEqual(result, 0.9543158)
     self.assertAlmostEqual(result**2, 0.9107, 4)