def pd_multi_index_from_product_impl(cls, iterables, sortorder=None, names=None): # TO-DO: support indexes.unique() method and use it here levels_factorized = sdc_tuple_map(factorize_level, iterables) levels_names = sdc_tuple_map(sdc_indexes_get_name, iterables) index_levels = sdc_tuple_map(lambda x: fix_df_index(list(x[0])), levels_factorized) temp_cumprod_sizes = [ 1, ] codes_info = sdc_tuple_map(next_codes_info, levels_factorized, temp_cumprod_sizes) res_index_size = temp_cumprod_sizes[-1] index_codes = sdc_tuple_map(next_codes_array, codes_info, res_index_size) res = sdc_pandas_multi_index_ctor(index_levels, index_codes, name=levels_names) return res
def sdc_unify_index_types_impl(left, right): if index_dtypes_match == True: # noqa return left else: if is_left_index_cached == True: # noqa index_data = left.values if is_left_index_array == False else left # noqa elif is_right_index_cached == True: # noqa index_data = right.values if is_right_index_array == False else right # noqa else: # using numpy_like.astype but not index.astype since latter works differently index_data = numpy_like.astype(left, numba_index_common_dtype) return fix_df_index(index_data)
def _multi_index_append_level_impl(A, codes_A, B, codes_B): appender_map = _appender_build_map(A, B) res_size = len(codes_A) + len(codes_B) res_level = fix_df_index(list(appender_map.keys())) res_codes = np.empty(res_size, dtype=np.int64) A_size = len(codes_A) for i in prange(res_size): if i < A_size: res_codes[i] = codes_A[i] else: res_codes[i] = appender_map[B[codes_B[i - A_size]]] return (res_level, res_codes)
def pd_int64_index_ctor_impl(data, dtype=None, copy=False, name=None): if not (dtype is None or dtype_is_numpy_signed_int or dtype_is_unicode_str and dtype in ('int8', 'int16', 'int32', 'int64')): raise ValueError("Incorrect `dtype` passed: expected signed integer") if is_data_array == True: # noqa _data = data elif is_data_index == True: # noqa _data = data.values else: _data = fix_df_index(data)._data if data_dtype_is_int64 == False: # noqa _data = numpy_like.astype(_data, dtype=types.int64) else: if copy: _data = np.copy(_data) return init_int64_index(_data, name)
def _multi_index_create_levels_and_codes_impl(level_data, codes_data, name): level_data_fixed = fix_df_index(level_data) level = sdc_indexes_rename(level_data_fixed, name) codes = fix_df_array(codes_data) # to avoid additional overload make data verification checks inplace # these checks repeat those in MultiIndex::_verify_integrity if len(codes) and np.max(codes) >= len(level): raise ValueError( "On one of the levels code max >= length of level. " "NOTE: this index is in an inconsistent state") if len(codes) and np.min(codes) < -1: raise ValueError("On one of the levels code value < -1") # TO-DO: support is_unique for all indexes and use it here indexer_map = sdc_indexes_build_map_positions(level) if len(level) != len(indexer_map): raise ValueError("Level values must be unique") return (level, codes)
def _multi_index_create_level_impl(index_data, name): index = fix_df_index(index_data) return sdc_indexes_rename(index, name)