Beispiel #1
0
def _s_compress_size(self, aggresiveness=0, sparsify=False):      
  '''
  Always returns a new Series as inplace type change is not allowed
  '''
  c = self.copy()
  if c.is_numerical() or c.is_index():
    if c.is_index() or str(c.dtype).startswith('int'):        
      c = c.astype(utils.get_optimal_numeric_type('int', min(c), max(c)))
      return c if not sparsify else c.to_sparse(fill_value=int(c.mode()))    
    elif str(c.dtype).startswith('float'):
      return c.astype(utils.get_optimal_numeric_type(c.dtype, min(c), max(c), aggresiveness=aggresiveness))
    else:
      raise Exception(str(c.name) + ' expected "int" or "float" type got: ', str(c.dtype))
  else : 
    misc.dbg(c.name + ' is not supported, ignored during compression')
  return c
Beispiel #2
0
def _s_compress_size(self, aggresiveness=0, sparsify=False):      
  '''
  Always returns a new Series as inplace type change is not allowed
  '''
  c = self.copy()
  if c.is_numerical() or c.is_index():
    if c.is_index() or str(c.dtype).startswith('int'):        
      c = c.astype(utils.get_optimal_numeric_type('int', min(c), max(c)))
      return c if not sparsify else c.to_sparse(fill_value=int(c.mode()))    
    elif str(c.dtype).startswith('float'):
      return c.astype(utils.get_optimal_numeric_type(c.dtype, min(c), max(c), aggresiveness=aggresiveness))
    else:
      raise Exception(str(c.name) + ' expected "int" or "float" type got: ', str(c.dtype))
  else : 
    misc.dbg(c.name + ' is not supported, ignored during compression')
  return c
Beispiel #3
0
def _s_to_indexes(self):
    cat = pd.Categorical.from_array(self)
    lbls = cat.codes if hasattr(cat, 'codes') else cat.labels
    return pd.Series(lbls,
                     index=self.index,
                     dtype=utils.get_optimal_numeric_type(
                         'int', 0,
                         len(lbls) + 1))
Beispiel #4
0
def _s_to_indexes(self):
  cat = pd.Categorical.from_array(self)
  lbls = cat.codes if hasattr(cat, 'codes') else cat.labels    
  return pd.Series(lbls, index=self.index, dtype=utils.get_optimal_numeric_type('int', 0, len(lbls) + 1))