예제 #1
0
파일: probe.py 프로젝트: etscrivner/dse
    def satisfies_preconditions(cls, historical_data, proxy_value):
        """Indicates whether or not the historical data allows this method to
        be used for the given proxy value.

        Arguments:
            historical_data(HistoricalData): The historical estimation data
            proxy_value(float): The proxy size estimate

        Returns:
            bool: True if this method can be used, False otherwise.
        """
        planned_sizes, actual_times = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_times)
        if len(planned_sizes) < 3:
            return False
        regression = cls(historical_data).get_regression()
        expected_time = regression.estimate(proxy_value)
        if regression.beta0 > 0.25 * expected_time:
            return False
        productivity = 1.0 / (sum(planned_sizes) / sum(actual_times))
        beta1_range = 0.5 * productivity
        if regression.beta1 < (productivity - beta1_range) or regression.beta > (productivity + beta1_range):
            return False
        if statistics.correlation(planned_sizes, actual_times) ** 2 < 0.5:
            return False
        if statistics.significance(planned_sizes, actual_times) > 0.05:
            return False
        return True
예제 #2
0
파일: probe.py 프로젝트: etscrivner/dse
    def satisfies_preconditions(cls, historical_data, proxy_value):
        """Indicates whether or not the historical data allows this method to
        be used for the given proxy value.

        Arguments:
            historical_data(HistoricalData): The historical estimation data
            proxy_value(float): The proxy size estimate

        Returns:
            bool: True if this method can be used, False otherwise.
        """
        proxy_sizes, actual_sizes = trim_to_equal_length(historical_data.proxy_sizes, historical_data.actual_sizes)
        # Too few data points
        if len(actual_sizes) < 3:
            return False
        regression = cls(historical_data).get_regression()
        estimated_size = regression.estimate(proxy_value)
        # Beta0 is not close to zero
        if regression.beta0 > 0.25 * estimated_size:
            return False
        # Beta1 is out of bounds
        if regression.beta1 < 0.5 or regression.beta1 > 2.0:
            return False
        # Weakly correlated
        if statistics.correlation(proxy_sizes, actual_sizes) ** 2 < 0.5:
            return False
        # Weak statistical significance
        if statistics.significance(proxy_sizes, actual_sizes) > 0.05:
            return False
        return True
예제 #3
0
파일: probe.py 프로젝트: etscrivner/dse
    def get_significance(self):
        """Returns the correlation significance.

        Returns:
            float: The percent chance that values were generated randomly.
        """
        return statistics.significance(self.x_values, self.y_values)
예제 #4
0
파일: 7A.py 프로젝트: etscrivner/dse
    def execute(self):
        """Run the program"""
        parser = argparse.ArgumentParser()
        parser.add_argument('CSVFILE', help='path to csv file with data.')
        args = parser.parse_args()
        csv_data = io.read_csv_file(args.CSVFILE)

        if not csv_data:
            print 'ERROR: Invalid csv data file.'
            sys.exit(1)

        columns = csv_data[0].keys()
        x_column = io.choose_from_list('X Column:', columns)
        y_column = io.choose_from_list('Y Column:', columns)
        x_data = [float(each[x_column]) for each in csv_data if each[x_column]]
        y_data = [float(each[y_column]) for each in csv_data if each[x_column]]

        print 'R:', statistics.correlation(x_data, y_data)
        print 'T:', statistics.t_value(x_data, y_data)
        print 'Significance:', statistics.significance(x_data, y_data)
예제 #5
0
 def test_should_correctly_compute_significance(self):
     result = statistics.significance(self.x_data, self.y_data)
     self.assertAlmostEqual(result, 2 * (1 - 0.99999), 4)