def format_save(writer: DataFrameWriter, data_source: DataSource) -> None: if data_source.data_format is DataFormat.CSV: writer.option("header", True).csv(data_source.source_path) if data_source.data_format is DataFormat.PARQUET: writer.parquet(data_source.source_path) if data_source.data_format is DataFormat.JSON: writer.json(data_source.source_path) if data_source.data_format is DataFormat.JDBC: if type(data_source) is JDBCDataSource: ## TODO writer.jdbc()
def save_df(df, day_of_week, passenger_count): storage_dir = '/hadoop/cms/store/user/jgran/taxi/saved_dataframes/' name = 'df_' + day_of_week + '_' + passenger_count #df_writer = DataFrameWriter(df.coalesce(50)) df_writer = DataFrameWriter(df) df_writer.parquet(storage_dir + name)
def save_df(df, day_of_week, passenger_count): storage_dir = '/hadoop/cms/store/user/jgran/taxi/saved_dataframes/' name = 'df_'+day_of_week+'_'+passenger_count #df_writer = DataFrameWriter(df.coalesce(50)) df_writer = DataFrameWriter(df) df_writer.parquet(storage_dir+name)