Beispiel #1
0
def combined_restults_from_all_algorithms(df_with_features):
    """[summary]

    Args:
        df_with_features ([type]): [description]

    Returns:
        [type]: [description]
    """

    init_algo = LIST_WITH_ALGOS[0]
    df_with_preds, df_metrics = generate_predictions(df_with_features,
                                                     init_algo)

    for idx in range(1, len(LIST_WITH_ALGOS)):
        current_algo = LIST_WITH_ALGOS[idx]
        df_with_preds_tmp, df_metrics_tmp = generate_predictions(
            df_with_features, current_algo)

        frames = [df_with_preds, df_with_preds_tmp]
        df_with_preds = pd.concat(frames)

        frames = [df_metrics, df_metrics_tmp]
        df_metrics = pd.concat(frames)

    utils.write_df_to_file(df_with_preds, 'generate_predictions_finish_preds')
    utils.write_df_to_file(df_metrics, 'generate_predictions_finish_metrics')
    return df_with_preds, df_metrics
def load_survey_data():
    """
        This function the survey data from the Azure SQL DB.

        :return: pandas.DataFrame: Returns dataframe with servey data.
    """
    logger.info("Started load_survey_data()")

    # open connection to Azure SQL DB
    conn, cursor = general_utils.connect_to_azure_sql_db()

    # get forecast date
    fcast_date = get_forecast_day()
    logger.info("fcast_date")
    logger.info(fcast_date)

    # extract Data from Azure SQL DB and one dummy line for the day that will be predicted here
    sql_stmt = """
        select * from sonntagsfrage.results_questionaire_clean
        union all
        select '""" + fcast_date + """', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0', '0', '0'
        """
    df_survey_results = pd.read_sql(sql_stmt, conn)

    df_survey_results_clean = clean_survey_data(df_survey_results)

    df_survey_results_final = df_survey_results_clean
    utils.write_df_to_file(df_survey_results_final, 'load_survey_data')
    return df_survey_results_final
def load_data():
    """
        This function loads all data from the sources and combines them into one dataframe for further processing.

        :return: pandas.DataFrame: Returns one dataframe with the combinated data from all input sources.
    """
    logger.info("Started load_data()")

    df_survey = load_survey_data()

    df_all_data_combined = df_survey

    utils.write_df_to_file(df_all_data_combined,
                           'load_data_df_all_data_combined')
    return df_all_data_combined
def generate_features(df_input):
    """
        This function generates all features from the base data.
        :param df_input: Dataframe with cols that need to be transformed into cyclical representation.
        :return: pandas.DataFrame: Returns a dataframe with all generated features.
    """
    logger.info("Start generate_features()")

    df_no_features = df_input.copy()

    df_added_time_features = create_time_features(df_no_features)

    df_all_features = utils.unset_datecol_as_index_if_needed(df_added_time_features)

    all_cols = FEATURE_COLS_USED + [DATE_COL] + TARGET_COLS
    df_chosen_features = df_all_features[all_cols]

    utils.write_df_to_file(df_chosen_features, 'generate_features_all_features')

    return df_chosen_features
Beispiel #5
0
#                                  "-images-idx3-ubyte.gz",
#     "t10k-labels-idx1-ubyte.gz": "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k"
#                                  "-labels-idx1-ubyte.gz",
#     "train-images-idx3-ubyte.gz": "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train"
#                                   "-images-idx3-ubyte.gz",
#     "train-labels-idx1-ubyte.gz": "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train"
#                                   "-labels-idx1-ubyte.gz",
# }

# for file_to_download_name in files_to_download:
#     print(f"Downloading file '{file_to_download_name}...")
#     file_to_download_path = os.path.join(output_dir, file_to_download_name)
#     file_to_download_url = files_to_download[file_to_download_name]
#     urllib.request.urlretrieve(file_to_download_url, file_to_download_path)

print("Loading data from Azure SQL DB ...")
df_all_data = prep.load_data()
output_path = os.path.join(output_dir)

output_fname = 'df_all_data'
mode = 'parquet'
print("Writing file " + output_fname + "." + mode + " to path " + output_path +
      " ...")
utils.write_df_to_file(df_all_data,
                       output_fname,
                       output_path,
                       mode,
                       force_write=True)

# --- Done
print("Done.")
Beispiel #6
0
mode = 'parquet'

# --- get ws from run
run = Run.get_context()
ws = run.experiment.workspace
datastore = Datastore.get_default(ws)

# --- register preds
df_for_register = utils.unset_datecol_as_index_if_needed(df_with_preds)
Dataset.Tabular.register_pandas_dataframe(df_for_register,
                                          (datastore, 'azure-ml-datasets'),
                                          'sonntagsfrage_preds')

# --- register metrics
df_for_register = utils.unset_datecol_as_index_if_needed(df_metrics)
Dataset.Tabular.register_pandas_dataframe(df_for_register,
                                          (datastore, 'azure-ml-datasets'),
                                          'sonntagsfrage_metrics')

# --- write output to Azure SQL DB
print("Writing file to Azure SQL DB ...")
output.export_results(df_with_preds)

# --- write output to file
print("Writing file " + output_fname + "." + mode + " to path " + output_path +
      " ...")
utils.write_df_to_file(df_with_preds, output_fname, output_path, mode)

# --- Done
print("Done.")
Beispiel #7
0
output_path = os.path.join(output_dir)

# --- load input
print(f"Load file from last step ...")
df_all_data = utils.load_df_from_file('df_all_data', input_path, 'parquet')

# --- add features
print(f"Add features to survey data ...")
df_with_features = feat.generate_features(df_all_data)

# --- define output parameters
output_fname = 'df_with_features'
mode = 'parquet'

# --- register dataset
df_for_register = utils.unset_datecol_as_index_if_needed(df_with_features)
Dataset.Tabular.register_pandas_dataframe(df_for_register,
                                          (datastore, 'azure-ml-datasets'),
                                          'survey_data_with_all_features')

# --- write output
print("Writing file " + output_fname + "." + mode + " to path " + output_path +
      " ...")
utils.write_df_to_file(df_with_features,
                       output_fname,
                       output_path,
                       mode,
                       force_write=True)

# --- Done
print("Done.")