Ejemplo n.º 1
0
    def construct_from_xarray(self, xa, sub_sketch_keys=None):
        self._entry(sub_sketch_keys=sub_sketch_keys)
        if sub_sketch_keys is not None:
            raise NotImplementedError('sub_sketch_keys mode not implemented')

        # these are not going through the xrdd layer -- should they?
        defined = xa.to_rdd().filter(lambda x: not is_missing(x))
        defined.cache()
        self.dtype = xa.dtype()
        self.count = defined.count()
        if util.is_numeric_type(self.dtype):
            self.sketch_type = 'numeric'
        elif util.is_date_type(self.dtype):
            self.sketch_type = 'date'
        else:
            self.sketch_type = 'non-numeric'

        # compute others later if needed
        self._rdd = xa.to_rdd()
        self.defined = defined
Ejemplo n.º 2
0
 def _create_stats(self):
     # calculate some basic statistics
     if self.stats is None:
         if util.is_date_type(self.dtype):
             try:
                 self.min_val = normalize_number(self.defined.min())
                 self.max_val = normalize_number(self.defined.max())
             except py4j.protocol.Py4JJavaError as e:
                 self.min_val = None
                 self.max_val = None
                 logging.warn('Datetime max or min did not compute.  ' +
                              'Possible mixture of offset-native and offset-aware times.')
         else:
             stats = self.defined.stats()
             self.min_val = normalize_number(stats.min())
             self.max_val = normalize_number(stats.max())
             self.mean_val = normalize_number(stats.mean())
             self.sum_val = normalize_number(stats.sum())
             self.variance_val = normalize_number(stats.variance())
             self.stdev_val = normalize_number(stats.stdev())
             self.stats = stats