def satisfies_preconditions(cls, historical_data, proxy_value): """Indicates whether or not the historical data allows this method to be used for the given proxy value. Arguments: historical_data(HistoricalData): The historical estimation data proxy_value(float): The proxy size estimate Returns: bool: True if this method can be used, False otherwise. """ planned_sizes, actual_times = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_times) if len(planned_sizes) < 3: return False regression = cls(historical_data).get_regression() expected_time = regression.estimate(proxy_value) if regression.beta0 > 0.25 * expected_time: return False productivity = 1.0 / (sum(planned_sizes) / sum(actual_times)) beta1_range = 0.5 * productivity if regression.beta1 < (productivity - beta1_range) or regression.beta > (productivity + beta1_range): return False if statistics.correlation(planned_sizes, actual_times) ** 2 < 0.5: return False if statistics.significance(planned_sizes, actual_times) > 0.05: return False return True
def satisfies_preconditions(cls, historical_data, proxy_value): """Indicates whether or not the historical data allows this method to be used for the given proxy value. Arguments: historical_data(HistoricalData): The historical estimation data proxy_value(float): The proxy size estimate Returns: bool: True if this method can be used, False otherwise. """ proxy_sizes, actual_sizes = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_sizes) # Too few data points if len(actual_sizes) < 3: return False regression = cls(historical_data).get_regression() estimated_size = regression.estimate(proxy_value) # Beta0 is not close to zero if regression.beta0 > 0.25 * estimated_size: return False # Beta1 is out of bounds if regression.beta1 < 0.5 or regression.beta1 > 2.0: return False # Weakly correlated if statistics.correlation(proxy_sizes, actual_sizes) ** 2 < 0.5: return False # Weak statistical significance if statistics.significance(proxy_sizes, actual_sizes) > 0.05: return False return True
def get_significance(self): """Returns the correlation significance. Returns: float: The percent chance that values were generated randomly. """ return statistics.significance(self.x_values, self.y_values)
def execute(self): """Run the program""" parser = argparse.ArgumentParser() parser.add_argument('CSVFILE', help='path to csv file with data.') args = parser.parse_args() csv_data = io.read_csv_file(args.CSVFILE) if not csv_data: print 'ERROR: Invalid csv data file.' sys.exit(1) columns = csv_data[0].keys() x_column = io.choose_from_list('X Column:', columns) y_column = io.choose_from_list('Y Column:', columns) x_data = [float(each[x_column]) for each in csv_data if each[x_column]] y_data = [float(each[y_column]) for each in csv_data if each[x_column]] print 'R:', statistics.correlation(x_data, y_data) print 'T:', statistics.t_value(x_data, y_data) print 'Significance:', statistics.significance(x_data, y_data)
def test_should_correctly_compute_significance(self): result = statistics.significance(self.x_data, self.y_data) self.assertAlmostEqual(result, 2 * (1 - 0.99999), 4)