Example #1
0
 def transform(self, X, y=None):
     X = pd.DataFrame(X.toarray())
     assert isinstance(X, pd.DataFrame)
     X = df_optimized(X)
     if self.verbose:
         print(X.head())
     return X
Example #2
0
 def transform(self, df, y=None):
     df = df_optimized(X, self.verbose)
     return df
Example #3
0
 def transform(self, X, y=None):
     X_dataframe = pd.DataFrame(X.toarray())
     pdb.set_trace()
     X_optimized = df_optimized(X_dataframe)
     pdb.set_trace()
     return X_optimized
Example #4
0
    mlflow=True,  # set to True to log params to mlflow
    experiment_name="[Fed-up!]-Phi-TaxiFare",
    pipeline_memory=None,
    model_upload=False,  # for automatic upload to gcp
    distance_type="manhattan",
    feateng=["distance_to_center", "direction", "distance", "time_features", "geohash"],
)

####################
# Get and clean data
####################
if __name__ == "__main__":
    print("############   Loading Data   ############")
    df = get_data(**params)
    df = clean_df(df)
    df = df_optimized(df)
    y_train = df["fare_amount"]
    X_train = df.drop("fare_amount", axis=1)
    del df
    print("shape: {}".format(X_train.shape))
    print("size: {} Mb".format(X_train.memory_usage().sum() / 1e6))

    ####################
    # single model kaggle transmission
    ####################

    if params["is_4_kaggle"] == True:
        print("Auto-Kaggle-submit is challenge is active")
        t = Trainer(X=X_train, y=y_train, **params)
        del X_train, y_train
        t.train()
 def size_optimize(self, df):
     return df_optimized(df)