Beispiel #1
0
    def fillna():
        def arg_value():
            if _spec.depth == _spec.max_depth:
                output: pd.DataFrame = _spec.output
                all_values = set([
                    i for col in output for i in output[col]
                    if (not pd.isnull(i))
                ])
                yield from map(lambda x: AnnotatedVal(x, cost=2),
                               Select(all_values))

        def arg_limit(v_self: pd.DataFrame):
            yield from map(lambda x: AnnotatedVal(x, cost=5),
                           Select(range(1, (max(v_self.shape) + 1))))

        def rarg_limit(v_self: pd.DataFrame):
            (nr, nc) = v_self.shape
            if coin_flip() == 0:
                yield random.choice(range(1, max(nr, 2)))
            else:
                yield random.choice(range(1, max(nc, 2)))

        def rarg_value():
            yield random.uniform((-1000), 1000)

        _self = RExt(DType(pd.DataFrame), defaultRandDf(nan_prob=0.5))
        _limit = Chain(Default(None), RExt(DType(int), rarg_limit(_self)),
                       arg_limit(_self))
        _value = Chain(Default(None), RExt(FType(np.isscalar), rarg_value()),
                       Ext(DType([dict, pd.Series, pd.DataFrame])),
                       arg_value())
Beispiel #2
0
    def diff():
        def arg_periods(v_self: pd.DataFrame):
            (nr, _) = v_self.shape
            yield random.choice(range((-(nr - 1)), nr))

        _self = RExt(DType(pd.DataFrame), defaultRandDf(nan_prob=0.1))
        _periods = Chain(Default(1), RExt(DType(int), arg_periods(_self)))
Beispiel #3
0
    def astype():
        def arg_astype_partial(v_self):
            if _spec.depth == _spec.max_depth:
                v_self: pd.DataFrame = v_self
                output: pd.DataFrame = _spec.output
                try:
                    if set(output.columns).issubset(set(v_self.columns)):
                        yield dict(output.dtypes)
                except:
                    pass

        def arg_dtype(v_self: pd.DataFrame):
            pool = ['int32', 'uint32', 'float64', 'float32', 'int64', 'uint64']
            mapping = {
                pool[i]: (([None] + pool[:i]) + pool[(i + 1):])
                for i in range(len(pool))
            }
            mapping['object'] = [None]
            res = {}
            for col in v_self.columns:
                chosen = random.choice(mapping[str(v_self.dtypes[col])])
                if chosen is not None:
                    res[col] = chosen
            yield res

        _self = RExt(DType(pd.DataFrame), defaultRandDf())
        _dtype = Chain(RExt(DType(dict), arg_dtype(_self)),
                       arg_astype_partial(_self))
Beispiel #4
0
    def add():
        def arg_other(v_self: pd.DataFrame):
            (nr, nc) = v_self.shape
            v_nc = random.choice([nc, nc - 1, nc + 1])
            val = next(
                defaultRandDf(num_rows=nr,
                              num_columns=v_nc,
                              column_levels=v_self.columns.nlevels,
                              col_prefix='i1_',
                              value_bags=[*ints_bags, *floats_bags]))
            val.index = v_self.index
            if (coin_flip() == 0) and (len(val.columns) == nc):
                val.columns = v_self.columns
            elif v_self.columns.nlevels == 1:
                val.columns = pd.Index(
                    random.sample(
                        set((list(v_self.columns) + list(val.columns))),
                        len(val.columns)))
            else:
                val.columns = pd.MultiIndex.from_tuples(
                    random.sample(
                        set((list(v_self.columns) + list(val.columns))),
                        len(val.columns)))
            yield val

        def arg_fill_value():
            yield random.uniform((-100), 100)

        _self = RExt(DType(pd.DataFrame),
                     defaultRandDf(value_bags=[*ints_bags, *floats_bags]))
        _other = RExt(DType(pd.DataFrame), arg_other(_self))
        _fill_value = Chain(Default(None), RExt(DType(float),
                                                arg_fill_value()))
Beispiel #5
0
    def round():

        def arg_decimals():
            yield random.choice([1, 2, 3, 4, 5])

        _self = RExt(DType(pd.DataFrame), defaultRandDf(nan_prob=0.1))
        _decimals = Chain(Default(0), RExt(DType(int), arg_decimals()))
Beispiel #6
0
    def clip():

        def arg_lower(v_self: pd.DataFrame):
            vals = list(filter((lambda x: (is_int(x) or is_float(x))), list(v_self.values.flatten())))
            if len(vals) == 0:
                return
            yield random.uniform(min(vals), max(vals))

        def arg_upper(v_self: pd.DataFrame, v_lower):
            vals = list(filter((lambda x: (is_int(x) or is_float(x))), list(v_self.values.flatten())))
            if len(vals) == 0:
                return
            if v_lower is None:
                v_lower = min(vals)
            yield random.uniform(v_lower, max(vals))

        _self = RExt(DType(pd.DataFrame), defaultRandDf(value_bags=[*ints_bags, *floats_bags]))
        _lower = Chain(Default(None), RExt(DType(float), arg_lower(_self)))
        _upper = Chain(Default(None), RExt(DType(float), arg_upper(_self, _lower)))
Beispiel #7
0
    def head():
        def arg_head_partial(v_self: pd.DataFrame):
            if _spec.depth == _spec.max_depth:
                output: pd.DataFrame = _spec.output
                yield AnnotatedVal(output.shape[0], cost=0)

            yield from Select(list(range(1, v_self.shape[0] + 1)))

        def arg_n(v_self: pd.DataFrame):
            pool = list(set(([5] + list(range(1, len(v_self))))))
            yield random.choice(pool)

        _self = RExt(DType(pd.DataFrame), defaultRandDf())
        _n = Chain(Default(5), RExt(DType(int), arg_n(_self)), arg_head_partial(_self))
Beispiel #8
0
    def reindex():

        def arg_labels(v_self: pd.DataFrame):
            (nr, nc) = v_self.shape
            if coin_flip() == 0:
                vals = list(v_self.index)
                new_vals = list(StrColGen(all_distinct=True).generate((nr // 2))[1].values())
                yield list(random.sample((vals + new_vals), nr))
            else:
                vals = list(v_self.columns)
                new_vals = list(StrColGen(all_distinct=True).generate((nc // 2))[1].values())
                yield list(random.sample((vals + new_vals), nc))

        def arg_fill_value():
            yield random.uniform((- 100), 100)

        _self = RExt(DType(pd.DataFrame), defaultRandDf())
        _labels = RExt(DType([list, dict]), arg_labels(_self))
        _fill_value = Chain(Default(np.NaN), RExt(DType(float), arg_fill_value()))