def __new__(cls, values, missing_value, categories=None, sort=True): # Numpy's fixed-width string types aren't very efficient. Working with # object arrays is faster than bytes or unicode arrays in almost all # cases. if not is_object(values): values = values.astype(object) if categories is None: codes, categories, reverse_categories = factorize_strings( values.ravel(), missing_value=missing_value, sort=sort, ) else: codes, categories, reverse_categories = ( factorize_strings_known_categories( values.ravel(), categories=categories, missing_value=missing_value, sort=sort, ) ) categories.setflags(write=False) return cls._from_codes_and_metadata( codes=codes.reshape(values.shape), categories=categories, reverse_categories=reverse_categories, missing_value=missing_value, )
def __new__(cls, values, missing_value, categories=None, sort=True): # Numpy's fixed-width string types aren't very efficient. Working with # object arrays is faster than bytes or unicode arrays in almost all # cases. if not is_object(values): values = values.astype(object) if categories is None: codes, categories, reverse_categories = factorize_strings( values.ravel(), missing_value=missing_value, sort=sort, ) else: codes, categories, reverse_categories = ( factorize_strings_known_categories( values.ravel(), categories=categories, missing_value=missing_value, sort=sort, )) categories.setflags(write=False) return cls._from_codes_and_metadata( codes=codes.reshape(values.shape), categories=categories, reverse_categories=reverse_categories, missing_value=missing_value, )
def __new__(cls, values, missing_value, categories=None, sort=True): # Numpy's fixed-width string types aren't very efficient. Working with # object arrays is faster than bytes or unicode arrays in almost all # cases. if not is_object(values): values = values.astype(object) if values.flags.f_contiguous: ravel_order = 'F' else: ravel_order = 'C' if categories is None: codes, categories, reverse_categories = factorize_strings( values.ravel(ravel_order), missing_value=missing_value, sort=sort, ) else: codes, categories, reverse_categories = ( factorize_strings_known_categories( values.ravel(ravel_order), categories=categories, missing_value=missing_value, sort=sort, )) # 确保 code of missing_value = -1 # code in [-1, len(categories) - 1] # codes, reverse_categories = _to_pandas_code( # reverse_categories, codes, missing_value) # categories = np.delete(categories, categories == None, None) categories.setflags(write=False) return cls.from_codes_and_metadata( codes=codes.reshape(values.shape, order=ravel_order), categories=categories, reverse_categories=reverse_categories, missing_value=missing_value, )