def test_sorts_points(self):
        def vk(pt): return pt.value

        pts = [MockPoint(n) for n in range(10, 1, -1)]
        pvs = get_percentiles_for_points(pts)

        self.assertFalse(is_sorted(pts, key=vk))

        assign_percentiles_to_points(pts, pvs)
        self.assertTrue(is_sorted(pts, key=vk))
    def test_adds_percentiles(self):
        pts = [MockPoint(n) for n in range(10, 1, -1)]
        pvs = get_percentiles_for_points(pts)

        none_have_percentile = reduce(lambda sofar, this: sofar and (this.rank is None), pts)
        self.assertTrue(none_have_percentile)

        assign_percentiles_to_points(pts, pvs)

        all_have_percentiles = reduce(lambda sofar, this: sofar and (this.rank is not None), pts)
        self.assertTrue(all_have_percentiles)
    def test_uses_given_percentiles(self):
        ps = [n / 1000 for n in range(1, 1000)]
        ps_check = set(ps)
        
        def checkset(v):
            return v is None or v in ps_check

        pts = [MockPoint(n) for n in range(100, 3, -4)]
        pvs = get_percentiles_for_points(pts)

        assign_percentiles_to_points(pts, pvs)

        all_in_set = reduce(lambda sofar, this: sofar and checkset(this.rank), pts)
        self.assertTrue(all_in_set)
    def values_less_than_their_percentile(self):
        # create points for our values (1-100)
        pts = [MockPoint(v) for v in range(1, 101)]
        pvs = get_percentiles_for_points(pts)

        # turn these into into a dictionary for fast lookups
        percentile_values = {p: pv for (p, pv) in pvs}

        assign_percentiles_to_points(pts)

        # now, for each point, we can lookup the cut off for the percentile it was assigned,
        # and check that this value is larger than the value of the point (i.e. confirm that
        # the point fits inside the bucket it was assigned to)
        for pt in pts:
            with self.subTest(value=pt.value):
                value = pt.value
                rank = pt.rank
                if rank is not None:  # because this can happen...
                    percentile_value = percentile_values[rank]
                    self.assertLessEqual(value, percentile_value)
Exemple #5
0
    def handle(self, *args, **options):
        indicator_name = options['indicator']
        year = options['year']
        max_points = options['count'] if options[
            'count'] else US_County.objects.count()
        mean = options['mean']
        stddev = options['sigma'] if options['sigma'] else mean / 5

        self.stdout.write(f"Creating random data for {indicator_name}, {year}")

        # https://docs.djangoproject.com/en/2.1/ref/models/querysets/#get-or-create
        indicator, _ = Health_Indicator.objects.get_or_create(
            name=indicator_name)
        data_set = _create_data_set(indicator, year)

        self.stdout.write(
            f"Using indicator w/ id {indicator.id}, data set w/ id {data_set.id}"
        )

        random.seed()

        points = _create_data_points(data_set, max_points, mean, stddev)
        self.stdout.write(f"Created {len(points)} new data points")

        self.stdout.write("Adding percentiles...")
        percentile_values = get_percentiles_for_points(points)
        assign_percentiles_to_points(points, percentile_values)

        pv_models = [
            Percentile(rank=p, value=pv, data_set=data_set)
            for (p, pv) in percentile_values
        ]

        # https://docs.djangoproject.com/en/2.1/ref/models/querysets/#bulk-create
        # it mentions several caveats, but it seems sufficient for this use case
        self.stdout.write("Saving data points")
        Data_Point.objects.bulk_create(points)
        Percentile.objects.bulk_create(pv_models)