Ejemplo n.º 1
0
def to_concat(xs, dim=0):
    "Concat the element in `xs` (recursively if they are tuples/lists of tensors)"
    if not xs: return xs
    if is_listy(xs[0]):
        return type(xs[0])(
            [to_concat([x[i] for x in xs], dim=dim) for i in range_of(xs[0])])
    if isinstance(xs[0], dict):
        return {
            k: to_concat([x[k] for x in xs], dim=dim)
            for k in xs[0].keys()
        }
    #We may receive xs that are not concatenable (inputs of a text classifier for instance),
    #   in this case we return a big list
    try:
        return retain_type(torch.cat(xs, dim=dim), xs[0])
    except:
        return sum([
            L(
                retain_type(
                    o_.index_select(dim, tensor(i)).squeeze(dim), xs[0])
                for i in range_of(o_)) for o_ in xs
        ], L())
Ejemplo n.º 2
0
    def _preprocess_train(self, X, y, X_val, y_val):
        from fastai.tabular.core import TabularPandas
        from fastai.data.block import RegressionBlock, CategoryBlock
        from fastai.data.transforms import IndexSplitter
        from fastcore.basics import range_of

        X = self.preprocess(X, fit=True)
        if X_val is not None:
            X_val = self.preprocess(X_val)

        from fastai.tabular.core import Categorify
        self.procs = [Categorify]

        if self.problem_type in [REGRESSION, QUANTILE
                                 ] and self.y_scaler is not None:
            y_norm = pd.Series(
                self.y_scaler.fit_transform(y.values.reshape(-1,
                                                             1)).reshape(-1))
            y_val_norm = pd.Series(
                self.y_scaler.transform(y_val.values.reshape(
                    -1, 1)).reshape(-1)) if y_val is not None else None
            logger.log(
                0,
                f'Training with scaled targets: {self.y_scaler} - !!! NN training metric will be different from the final results !!!'
            )
        else:
            y_norm = y
            y_val_norm = y_val

        logger.log(15, f'Using {len(self.cont_columns)} cont features')
        df_train, train_idx, val_idx = self._generate_datasets(
            X, y_norm, X_val, y_val_norm)
        y_block = RegressionBlock() if self.problem_type in [
            REGRESSION, QUANTILE
        ] else CategoryBlock()

        # Copy cat_columns and cont_columns because TabularList is mutating the list
        data = TabularPandas(
            df_train,
            cat_names=self.cat_columns.copy(),
            cont_names=self.cont_columns.copy(),
            procs=self.procs,
            y_block=y_block,
            y_names=LABEL,
            splits=IndexSplitter(val_idx)(range_of(df_train)),
        )
        return data
Ejemplo n.º 3
0
def set_item_pg(pg, k, v):
    if '__' not in k: pg[k] = v
    else:
        name,idx = k.split('__')
        pg[name] = tuple(v if i==int(idx) else pg[name][i] for i in range_of(pg[name]))
    return pg