Beispiel #1
0
def test_fred(x=2, y=2, z=50, ndv=-999):

    print "Testing..."

    data = numpy.load("/tmp/random_{x}_{y}_{z}.npy".format(x=x, y=y, z=z))
    # print numpy.shape(data), data

    # Calculate the percentiles stripping out no data values

    p = numpy.empty((y, x), dtype=numpy.int16)

    from itertools import product

    for y, x in product(range(0, y), range(0, x)):
        d = data[:, x, y]
        # print "data", x, y, numpy.shape(d), d
        # print "stripped data", numpy.shape(d[d != ndv]), d[d != ndv]
        p[x, y] = numpy.percentile(d[d != ndv], 75, axis=0, interpolation="nearest")
        # print "stripped percentiles", p[x, y]

    print "percentiles\n", p

    from datacube.api.utils import calculate_stack_statistic_percentile

    print "calculated percentiles\n", calculate_stack_statistic_percentile(data, 75)

    print data[:, 0, 0]

    print numpy.apply_along_axis(do_percentile_1d, axis=0, arr=data)
    def run(self):

        _log.info("Calculating statistics for chunk")
        filtile = []

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()
        filtile = tiles
        if self.tidal_workflow:
            filtile = []
            lines = self.load_filterfile()
            cnt = 0
            _log.info("\tlength of original tiles is %d", len(tiles))
            for tile in tiles:
                #import pdb; pdb.set_trace()
                cnt = cnt + 1
                dataset = tile.datasets[self.dataset_type]
                tdate = str(tile.end_datetime.strftime("%Y-%m-%d"))
                if tdate in lines:
                    filtile.append(tile)

            _log.info("\tlength of new filtered tiles is %d", len(filtile))

        stack = get_dataset_data_stack(filtile,
                                       self.dataset_type,
                                       self.band.name,
                                       ndv=ndv,
                                       x=self.x_offset,
                                       y=self.y_offset,
                                       x_size=self.x_chunk_size,
                                       y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply,
                                       mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            #log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT),
                       stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack,
                                                       ndv=ndv,
                                                       dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack,
                                                       ndv=ndv,
                                                       dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack,
                                                        ndv=ndv,
                                                        dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.MEDIAN in self.statistics:
            #log_mem("Before MEDIAN")

            # MEAN
            stack_stat = calculate_stack_statistic_median(stack=stack,
                                                          ndv=ndv,
                                                          dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEDIAN),
                       stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack,
                                                            ndv=ndv,
                                                            dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE),
                       stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            #log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(
                stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(
                self.get_statistic_filename(Statistic.STANDARD_DEVIATION),
                stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))
                stack_stat = calculate_stack_statistic_percentile(
                    stack=stack,
                    percentile=PERCENTILE[percentile],
                    ndv=ndv,
                    interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            #log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack,
                                                                  ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED),
                       stack_stat)
            del stack_stat
    def run(self):

        _log.info("Calculating statistics for chunk")
        filtile = [ ]

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()
        filtile = tiles
        if self.tidal_workflow:
            filtile = [ ]
            lines = self.load_filterfile()
            cnt=0
	    _log.info("\tlength of original tiles is %d", len(tiles))
            for tile in tiles:
	        #import pdb; pdb.set_trace()
	        cnt=cnt+1
                dataset = tile.datasets[self.dataset_type]
                tdate= str(tile.end_datetime.strftime("%Y-%m-%d"))
                if tdate in lines: 
                    filtile.append(tile) 

	    _log.info("\tlength of new filtered tiles is %d", len(filtile))

        stack = get_dataset_data_stack(filtile, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            #log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.MEDIAN in self.statistics:
            #log_mem("Before MEDIAN")

            # MEAN
            stack_stat = calculate_stack_statistic_median(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEDIAN), stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            #log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))
                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile],
                                                                  ndv=ndv, interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            #log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat
Beispiel #4
0
def existing_nan_percentile(data):
    print "existing nan percentile..."

    from datacube.api.utils import calculate_stack_statistic_percentile

    print "calculated percentiles\n", calculate_stack_statistic_percentile(data, [25, 50, 75])
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)

        acq_min, acq_max, criteria = build_season_date_criteria(self.acq_min, self.acq_max, self.season,
                                                                seasons=SEASONS, extend=True)

        _log.info("\tcriteria is %s", criteria)

        dataset_types = [self.dataset_type]

        if self.mask_pqa_apply:
            dataset_types.append(DatasetType.PQ25)

        tiles = list_tiles_as_generator(x=[self.x], y=[self.y], satellites=self.satellites,
                                        acq_min=acq_min, acq_max=acq_max,
                                        dataset_types=dataset_types, include=criteria)

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            print "COUNT"
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            print "MIN"
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            print "MAX"
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            print "MEAN"
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        for percentile in [Statistic.PERCENTILE_25, Statistic.PERCENTILE_50, Statistic.PERCENTILE_75, Statistic.PERCENTILE_90, Statistic.PERCENTILE_95]:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                print "Before {p}".format(p=percentile.name)
                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile], ndv=ndv)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            print "COUNT OBSERVED"
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile],
                                                                  ndv=ndv, interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")
    def run(self):

        _log.info("Calculating statistics for chunk")

        ndv = get_dataset_type_ndv(self.dataset_type)
        data_type = get_dataset_type_data_type(self.dataset_type)
        tiles = self.get_tiles()

        stack = get_dataset_data_stack(tiles, self.dataset_type, self.band.name, ndv=ndv,
                                       x=self.x_offset, y=self.y_offset,
                                       x_size=self.x_chunk_size, y_size=self.y_chunk_size,
                                       mask_pqa_apply=self.mask_pqa_apply, mask_pqa_mask=self.mask_pqa_mask)

        if len(stack) == 0:
            return

        # TODO get statistics to be generated from command line argument

        if Statistic.COUNT in self.statistics:
            log_mem("Before COUNT")

            # COUNT
            stack_stat = calculate_stack_statistic_count(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT), stack_stat)
            del stack_stat

        if Statistic.MIN in self.statistics:
            log_mem("Before MIN")

            # MIN
            stack_stat = calculate_stack_statistic_min(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MIN), stack_stat)
            del stack_stat

        if Statistic.MAX in self.statistics:
            log_mem("Before MAX")

            # MAX
            stack_stat = calculate_stack_statistic_max(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MAX), stack_stat)
            del stack_stat

        if Statistic.MEAN in self.statistics:
            log_mem("Before MEAN")

            # MEAN
            stack_stat = calculate_stack_statistic_mean(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.MEAN), stack_stat)
            del stack_stat

        if Statistic.VARIANCE in self.statistics:
            log_mem("Before VARIANCE")

            # VARIANCE
            stack_stat = calculate_stack_statistic_variance(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.VARIANCE), stack_stat)
            del stack_stat

        if Statistic.STANDARD_DEVIATION in self.statistics:
            log_mem("Before STANDARD_DEVIATION")

            # STANDARD_DEVIATION
            stack_stat = calculate_stack_statistic_standard_deviation(stack=stack, ndv=ndv, dtype=data_type)
            numpy.save(self.get_statistic_filename(Statistic.STANDARD_DEVIATION), stack_stat)
            del stack_stat

        for percentile in PERCENTILE:

            if percentile in self.statistics:
                log_mem("Before {p}".format(p=percentile.name))

                stack_stat = calculate_stack_statistic_percentile(stack=stack, percentile=PERCENTILE[percentile],
                                                                  ndv=ndv, interpolation=self.interpolation)
                numpy.save(self.get_statistic_filename(percentile), stack_stat)
                del stack_stat

        if Statistic.COUNT_OBSERVED in self.statistics:
            log_mem("Before OBSERVED COUNT")

            # COUNT OBSERVED - note the copy=False is modifying the array so this is done last
            stack_stat = calculate_stack_statistic_count_observed(stack=stack, ndv=ndv)
            numpy.save(self.get_statistic_filename(Statistic.COUNT_OBSERVED), stack_stat)
            del stack_stat

        log_mem("DONE")