def create_time_features(df_input):
    """
        This function generates all features from the base data.
        :param df_input: Dataframe with cols that need to be transformed into cyclical representation.
        :return: pandas.DataFrame: Returns a dataframe with all generated features.
    """
    logger.info("Start create_time_features()")

    df_wip_time_features = df_input.copy()

    df_wip_time_features = utils.unset_datecol_as_index_if_needed(df_wip_time_features)
    df_wip_time_features['Datum_dt_bckp'] = df_wip_time_features[DATE_COL].astype(str)
    df_wip_time_features_idx = utils.set_datecol_as_index_if_needed(df_wip_time_features)

    # generate simple time features to model seasonal behaviour
    df_wip_time_features_idx['day_in_month'] = df_wip_time_features_idx.index.day
    df_wip_time_features_idx['calendar_week'] = df_wip_time_features_idx.index.isocalendar().week
    df_wip_time_features_idx['weekday'] = df_wip_time_features_idx.index.weekday
    df_wip_time_features_idx['dayofyear'] = df_wip_time_features_idx.index.dayofyear
    df_wip_time_features_idx['month'] = df_wip_time_features_idx.index.month

    # generate cyclical time features to model seasonal behaviour
    df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'day_in_month', 30)
    df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'dayofyear', 365)
    df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'weekday', 7)
    df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'calendar_week', 52)
    df_wip_time_features_idx = make_time_feature_cyclical(df_wip_time_features_idx, 'month', 12)

    # generate the number of days since the last survey
    df_wip_nb_days_last = add_number_of_days_since_last_surbey(df_wip_time_features_idx)

    df_all_time_features = df_wip_nb_days_last

    return df_all_time_features
Example #2
0
def export_results(df_input):
    """
        This function writes the generated predictions to variousu sources so that the preds can be consumed py other
            applications.

        :param df_input: The dataframe with predictions.
    """
    logger.info("Start export_results()")

    df_working = df_input.copy()
    df_working = utils.unset_datecol_as_index_if_needed(df_working)
    output_col_names = [DATE_COL] + get_pred_col_names() + ['estimator']
    target_table_name = 'sonntagsfrage.predictions_questionaire'

    df_output = df_working[output_col_names]

    # open connection
    conn, cursor = connect_to_azure_sql_db()

    # write to Azure SQL DB
    if WRITE_TO_AZURE:
        write_df_to_sql_db(df_output,
                           conn,
                           cursor,
                           target_table_name,
                           header=False,
                           delete_dates=False)
def generate_features(df_input):
    """
        This function generates all features from the base data.
        :param df_input: Dataframe with cols that need to be transformed into cyclical representation.
        :return: pandas.DataFrame: Returns a dataframe with all generated features.
    """
    logger.info("Start generate_features()")

    df_no_features = df_input.copy()

    df_added_time_features = create_time_features(df_no_features)

    df_all_features = utils.unset_datecol_as_index_if_needed(df_added_time_features)

    all_cols = FEATURE_COLS_USED + [DATE_COL] + TARGET_COLS
    df_chosen_features = df_all_features[all_cols]

    utils.write_df_to_file(df_chosen_features, 'generate_features_all_features')

    return df_chosen_features
Example #4
0
# --- calc predictions
print(f"Add features to survey data ...")
df_with_preds, df_metrics = model.combined_restults_from_all_algorithms(
    df_with_features)

# --- define output parameters
output_fname = 'df_with_preds'
mode = 'parquet'

# --- get ws from run
run = Run.get_context()
ws = run.experiment.workspace
datastore = Datastore.get_default(ws)

# --- register preds
df_for_register = utils.unset_datecol_as_index_if_needed(df_with_preds)
Dataset.Tabular.register_pandas_dataframe(df_for_register,
                                          (datastore, 'azure-ml-datasets'),
                                          'sonntagsfrage_preds')

# --- register metrics
df_for_register = utils.unset_datecol_as_index_if_needed(df_metrics)
Dataset.Tabular.register_pandas_dataframe(df_for_register,
                                          (datastore, 'azure-ml-datasets'),
                                          'sonntagsfrage_metrics')

# --- write output to Azure SQL DB
print("Writing file to Azure SQL DB ...")
output.export_results(df_with_preds)

# --- write output to file
Example #5
0
output_path = os.path.join(output_dir)

# --- load input
print(f"Load file from last step ...")
df_all_data = utils.load_df_from_file('df_all_data', input_path, 'parquet')

# --- add features
print(f"Add features to survey data ...")
df_with_features = feat.generate_features(df_all_data)

# --- define output parameters
output_fname = 'df_with_features'
mode = 'parquet'

# --- register dataset
df_for_register = utils.unset_datecol_as_index_if_needed(df_with_features)
Dataset.Tabular.register_pandas_dataframe(df_for_register,
                                          (datastore, 'azure-ml-datasets'),
                                          'survey_data_with_all_features')

# --- write output
print("Writing file " + output_fname + "." + mode + " to path " + output_path +
      " ...")
utils.write_df_to_file(df_with_features,
                       output_fname,
                       output_path,
                       mode,
                       force_write=True)

# --- Done
print("Done.")