def combined_restults_from_all_algorithms(df_with_features): """[summary] Args: df_with_features ([type]): [description] Returns: [type]: [description] """ init_algo = LIST_WITH_ALGOS[0] df_with_preds, df_metrics = generate_predictions(df_with_features, init_algo) for idx in range(1, len(LIST_WITH_ALGOS)): current_algo = LIST_WITH_ALGOS[idx] df_with_preds_tmp, df_metrics_tmp = generate_predictions( df_with_features, current_algo) frames = [df_with_preds, df_with_preds_tmp] df_with_preds = pd.concat(frames) frames = [df_metrics, df_metrics_tmp] df_metrics = pd.concat(frames) utils.write_df_to_file(df_with_preds, 'generate_predictions_finish_preds') utils.write_df_to_file(df_metrics, 'generate_predictions_finish_metrics') return df_with_preds, df_metrics
def load_survey_data(): """ This function the survey data from the Azure SQL DB. :return: pandas.DataFrame: Returns dataframe with servey data. """ logger.info("Started load_survey_data()") # open connection to Azure SQL DB conn, cursor = general_utils.connect_to_azure_sql_db() # get forecast date fcast_date = get_forecast_day() logger.info("fcast_date") logger.info(fcast_date) # extract Data from Azure SQL DB and one dummy line for the day that will be predicted here sql_stmt = """ select * from sonntagsfrage.results_questionaire_clean union all select '""" + fcast_date + """', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0', '0', '0' """ df_survey_results = pd.read_sql(sql_stmt, conn) df_survey_results_clean = clean_survey_data(df_survey_results) df_survey_results_final = df_survey_results_clean utils.write_df_to_file(df_survey_results_final, 'load_survey_data') return df_survey_results_final
def load_data(): """ This function loads all data from the sources and combines them into one dataframe for further processing. :return: pandas.DataFrame: Returns one dataframe with the combinated data from all input sources. """ logger.info("Started load_data()") df_survey = load_survey_data() df_all_data_combined = df_survey utils.write_df_to_file(df_all_data_combined, 'load_data_df_all_data_combined') return df_all_data_combined
def generate_features(df_input): """ This function generates all features from the base data. :param df_input: Dataframe with cols that need to be transformed into cyclical representation. :return: pandas.DataFrame: Returns a dataframe with all generated features. """ logger.info("Start generate_features()") df_no_features = df_input.copy() df_added_time_features = create_time_features(df_no_features) df_all_features = utils.unset_datecol_as_index_if_needed(df_added_time_features) all_cols = FEATURE_COLS_USED + [DATE_COL] + TARGET_COLS df_chosen_features = df_all_features[all_cols] utils.write_df_to_file(df_chosen_features, 'generate_features_all_features') return df_chosen_features
# "-images-idx3-ubyte.gz", # "t10k-labels-idx1-ubyte.gz": "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k" # "-labels-idx1-ubyte.gz", # "train-images-idx3-ubyte.gz": "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train" # "-images-idx3-ubyte.gz", # "train-labels-idx1-ubyte.gz": "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train" # "-labels-idx1-ubyte.gz", # } # for file_to_download_name in files_to_download: # print(f"Downloading file '{file_to_download_name}...") # file_to_download_path = os.path.join(output_dir, file_to_download_name) # file_to_download_url = files_to_download[file_to_download_name] # urllib.request.urlretrieve(file_to_download_url, file_to_download_path) print("Loading data from Azure SQL DB ...") df_all_data = prep.load_data() output_path = os.path.join(output_dir) output_fname = 'df_all_data' mode = 'parquet' print("Writing file " + output_fname + "." + mode + " to path " + output_path + " ...") utils.write_df_to_file(df_all_data, output_fname, output_path, mode, force_write=True) # --- Done print("Done.")
mode = 'parquet' # --- get ws from run run = Run.get_context() ws = run.experiment.workspace datastore = Datastore.get_default(ws) # --- register preds df_for_register = utils.unset_datecol_as_index_if_needed(df_with_preds) Dataset.Tabular.register_pandas_dataframe(df_for_register, (datastore, 'azure-ml-datasets'), 'sonntagsfrage_preds') # --- register metrics df_for_register = utils.unset_datecol_as_index_if_needed(df_metrics) Dataset.Tabular.register_pandas_dataframe(df_for_register, (datastore, 'azure-ml-datasets'), 'sonntagsfrage_metrics') # --- write output to Azure SQL DB print("Writing file to Azure SQL DB ...") output.export_results(df_with_preds) # --- write output to file print("Writing file " + output_fname + "." + mode + " to path " + output_path + " ...") utils.write_df_to_file(df_with_preds, output_fname, output_path, mode) # --- Done print("Done.")
output_path = os.path.join(output_dir) # --- load input print(f"Load file from last step ...") df_all_data = utils.load_df_from_file('df_all_data', input_path, 'parquet') # --- add features print(f"Add features to survey data ...") df_with_features = feat.generate_features(df_all_data) # --- define output parameters output_fname = 'df_with_features' mode = 'parquet' # --- register dataset df_for_register = utils.unset_datecol_as_index_if_needed(df_with_features) Dataset.Tabular.register_pandas_dataframe(df_for_register, (datastore, 'azure-ml-datasets'), 'survey_data_with_all_features') # --- write output print("Writing file " + output_fname + "." + mode + " to path " + output_path + " ...") utils.write_df_to_file(df_with_features, output_fname, output_path, mode, force_write=True) # --- Done print("Done.")