def transform(self, X, y=None): X = pd.DataFrame(X.toarray()) assert isinstance(X, pd.DataFrame) X = df_optimized(X) if self.verbose: print(X.head()) return X
def transform(self, df, y=None): df = df_optimized(X, self.verbose) return df
def transform(self, X, y=None): X_dataframe = pd.DataFrame(X.toarray()) pdb.set_trace() X_optimized = df_optimized(X_dataframe) pdb.set_trace() return X_optimized
mlflow=True, # set to True to log params to mlflow experiment_name="[Fed-up!]-Phi-TaxiFare", pipeline_memory=None, model_upload=False, # for automatic upload to gcp distance_type="manhattan", feateng=["distance_to_center", "direction", "distance", "time_features", "geohash"], ) #################### # Get and clean data #################### if __name__ == "__main__": print("############ Loading Data ############") df = get_data(**params) df = clean_df(df) df = df_optimized(df) y_train = df["fare_amount"] X_train = df.drop("fare_amount", axis=1) del df print("shape: {}".format(X_train.shape)) print("size: {} Mb".format(X_train.memory_usage().sum() / 1e6)) #################### # single model kaggle transmission #################### if params["is_4_kaggle"] == True: print("Auto-Kaggle-submit is challenge is active") t = Trainer(X=X_train, y=y_train, **params) del X_train, y_train t.train()
def size_optimize(self, df): return df_optimized(df)