Exemple #1
0
def format_save(writer: DataFrameWriter, data_source: DataSource) -> None:
    if data_source.data_format is DataFormat.CSV:
        writer.option("header", True).csv(data_source.source_path)
    if data_source.data_format is DataFormat.PARQUET:
        writer.parquet(data_source.source_path)
    if data_source.data_format is DataFormat.JSON:
        writer.json(data_source.source_path)
    if data_source.data_format is DataFormat.JDBC:
        if type(data_source) is JDBCDataSource:
            ## TODO
            writer.jdbc()
Exemple #2
0
def save_df(df, day_of_week, passenger_count):
    storage_dir = '/hadoop/cms/store/user/jgran/taxi/saved_dataframes/'
    name = 'df_' + day_of_week + '_' + passenger_count
    #df_writer = DataFrameWriter(df.coalesce(50))
    df_writer = DataFrameWriter(df)
    df_writer.parquet(storage_dir + name)
Exemple #3
0
def save_df(df, day_of_week, passenger_count):
    storage_dir = '/hadoop/cms/store/user/jgran/taxi/saved_dataframes/'
    name = 'df_'+day_of_week+'_'+passenger_count
    #df_writer = DataFrameWriter(df.coalesce(50))
    df_writer = DataFrameWriter(df)
    df_writer.parquet(storage_dir+name)