def fillna(): def arg_value(): if _spec.depth == _spec.max_depth: output: pd.DataFrame = _spec.output all_values = set([ i for col in output for i in output[col] if (not pd.isnull(i)) ]) yield from map(lambda x: AnnotatedVal(x, cost=2), Select(all_values)) def arg_limit(v_self: pd.DataFrame): yield from map(lambda x: AnnotatedVal(x, cost=5), Select(range(1, (max(v_self.shape) + 1)))) def rarg_limit(v_self: pd.DataFrame): (nr, nc) = v_self.shape if coin_flip() == 0: yield random.choice(range(1, max(nr, 2))) else: yield random.choice(range(1, max(nc, 2))) def rarg_value(): yield random.uniform((-1000), 1000) _self = RExt(DType(pd.DataFrame), defaultRandDf(nan_prob=0.5)) _limit = Chain(Default(None), RExt(DType(int), rarg_limit(_self)), arg_limit(_self)) _value = Chain(Default(None), RExt(FType(np.isscalar), rarg_value()), Ext(DType([dict, pd.Series, pd.DataFrame])), arg_value())
def diff(): def arg_periods(v_self: pd.DataFrame): (nr, _) = v_self.shape yield random.choice(range((-(nr - 1)), nr)) _self = RExt(DType(pd.DataFrame), defaultRandDf(nan_prob=0.1)) _periods = Chain(Default(1), RExt(DType(int), arg_periods(_self)))
def astype(): def arg_astype_partial(v_self): if _spec.depth == _spec.max_depth: v_self: pd.DataFrame = v_self output: pd.DataFrame = _spec.output try: if set(output.columns).issubset(set(v_self.columns)): yield dict(output.dtypes) except: pass def arg_dtype(v_self: pd.DataFrame): pool = ['int32', 'uint32', 'float64', 'float32', 'int64', 'uint64'] mapping = { pool[i]: (([None] + pool[:i]) + pool[(i + 1):]) for i in range(len(pool)) } mapping['object'] = [None] res = {} for col in v_self.columns: chosen = random.choice(mapping[str(v_self.dtypes[col])]) if chosen is not None: res[col] = chosen yield res _self = RExt(DType(pd.DataFrame), defaultRandDf()) _dtype = Chain(RExt(DType(dict), arg_dtype(_self)), arg_astype_partial(_self))
def add(): def arg_other(v_self: pd.DataFrame): (nr, nc) = v_self.shape v_nc = random.choice([nc, nc - 1, nc + 1]) val = next( defaultRandDf(num_rows=nr, num_columns=v_nc, column_levels=v_self.columns.nlevels, col_prefix='i1_', value_bags=[*ints_bags, *floats_bags])) val.index = v_self.index if (coin_flip() == 0) and (len(val.columns) == nc): val.columns = v_self.columns elif v_self.columns.nlevels == 1: val.columns = pd.Index( random.sample( set((list(v_self.columns) + list(val.columns))), len(val.columns))) else: val.columns = pd.MultiIndex.from_tuples( random.sample( set((list(v_self.columns) + list(val.columns))), len(val.columns))) yield val def arg_fill_value(): yield random.uniform((-100), 100) _self = RExt(DType(pd.DataFrame), defaultRandDf(value_bags=[*ints_bags, *floats_bags])) _other = RExt(DType(pd.DataFrame), arg_other(_self)) _fill_value = Chain(Default(None), RExt(DType(float), arg_fill_value()))
def round(): def arg_decimals(): yield random.choice([1, 2, 3, 4, 5]) _self = RExt(DType(pd.DataFrame), defaultRandDf(nan_prob=0.1)) _decimals = Chain(Default(0), RExt(DType(int), arg_decimals()))
def clip(): def arg_lower(v_self: pd.DataFrame): vals = list(filter((lambda x: (is_int(x) or is_float(x))), list(v_self.values.flatten()))) if len(vals) == 0: return yield random.uniform(min(vals), max(vals)) def arg_upper(v_self: pd.DataFrame, v_lower): vals = list(filter((lambda x: (is_int(x) or is_float(x))), list(v_self.values.flatten()))) if len(vals) == 0: return if v_lower is None: v_lower = min(vals) yield random.uniform(v_lower, max(vals)) _self = RExt(DType(pd.DataFrame), defaultRandDf(value_bags=[*ints_bags, *floats_bags])) _lower = Chain(Default(None), RExt(DType(float), arg_lower(_self))) _upper = Chain(Default(None), RExt(DType(float), arg_upper(_self, _lower)))
def head(): def arg_head_partial(v_self: pd.DataFrame): if _spec.depth == _spec.max_depth: output: pd.DataFrame = _spec.output yield AnnotatedVal(output.shape[0], cost=0) yield from Select(list(range(1, v_self.shape[0] + 1))) def arg_n(v_self: pd.DataFrame): pool = list(set(([5] + list(range(1, len(v_self)))))) yield random.choice(pool) _self = RExt(DType(pd.DataFrame), defaultRandDf()) _n = Chain(Default(5), RExt(DType(int), arg_n(_self)), arg_head_partial(_self))
def reindex(): def arg_labels(v_self: pd.DataFrame): (nr, nc) = v_self.shape if coin_flip() == 0: vals = list(v_self.index) new_vals = list(StrColGen(all_distinct=True).generate((nr // 2))[1].values()) yield list(random.sample((vals + new_vals), nr)) else: vals = list(v_self.columns) new_vals = list(StrColGen(all_distinct=True).generate((nc // 2))[1].values()) yield list(random.sample((vals + new_vals), nc)) def arg_fill_value(): yield random.uniform((- 100), 100) _self = RExt(DType(pd.DataFrame), defaultRandDf()) _labels = RExt(DType([list, dict]), arg_labels(_self)) _fill_value = Chain(Default(np.NaN), RExt(DType(float), arg_fill_value()))