report = ProfileReport(arbis_matched, title='ArbIS Matched Dataset Report') report.to_file(work_path + file_prefix + '_report.html') ################### ### Correlation ### ################### # define column types nominal_columns = ['Strasse', 'Month'] dichotomous_columns = ['Richtung'] ordinal_columns = ['AnzGesperrtFs', 'Einzug'] # Encode non numerical columns arbis_encoded, arbis_encoded_dict = numerical_encoding( arbis_matched, ["Strasse", 'Month'], drop_single_label=False, drop_fact_dict=False) arbis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in arbis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, arbis_encoded_dict[key])) # Calculate with Cramers 's V results = None # To make sure that no old data is reused results = compute_correlations(arbis_encoded, columns_nominal=nominal_columns, columns_dichotomous=dichotomous_columns, columns_ordinal=ordinal_columns, bias_correction=False)
################### ### Encoding ### ################### # define column types nominal_columns = [ "Str", "Kat", "Typ", "UArt1", "UArt2", "AUrs1", "AUrs2", "AufHi", "Char1", "Char2", "Bes1", "Bes2", "Lich1", "Lich2", "Zust1", "Zust2", "WoTag", 'Month' ] dichotomous_columns = ["Alkoh"] ordinal_columns = ["Betei", "Fstf", "FeiTag"] # Encode non numerical columns baysis_encoded, baysis_encoded_dict = numerical_encoding( baysis_selected, ["Strasse", "Fstf", 'Month'], drop_single_label=False, drop_fact_dict=False) baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in baysis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, baysis_encoded_dict[key])) ################### ### Correlation ### ################### baysis_encoded = baysis_encoded.rename( columns={ "TempMax": "TMax", "TempAvg": "TAvg",
################### ### Correlation ### ################### # define column types nominal_columns = [ 'Str', 'Kat', 'Typ', 'UArt1', 'UArt2', 'AUrs1', 'AUrs2', 'AufHi', 'Char1', 'Char2', 'Lich1', 'Lich2', 'Zust1', 'Zust2', 'WoTag', 'Month' ] dichotomous_columns = ['Alkoh'] ordinal_columns = ['Betei', 'Fstf', 'FeiTag'] # Encode non numerical columns baysis_encoded, baysis_encoded_dict = numerical_encoding( baysis_matched, ['Strasse', 'Fstf', 'WoTag', 'Month'], drop_single_label=False, drop_fact_dict=False) baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in baysis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, baysis_encoded_dict[key])) baysis_encoded = baysis_encoded.rename( columns={ "TempMax": "TMax", "TempAvg": "TAvg", "SpatMax": "SMax", "SpatAvg": "SAvg", "Coverage": "Cov", "TempDist": "TDist",
"Fstf", "WoTag", "FeiTag", 'Month' ] dichotomous_columns = [] ordinal_columns = [] # Encode non numerical columns baysis_encoded, baysis_encoded_dict = numerical_encoding(baysis_selected, ["Strasse", "Fstf", 'Month', "TempMax", "TempAvg", "SpatMax", "SpatAvg", "Coverage", "TLCar", "TLHGV" ], drop_single_label=False, drop_fact_dict=False) baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in baysis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, baysis_encoded_dict[key])) ################### ### Correlation ### ###################
"Str", "AGF", "Einzug", "Richtung", "Length", "Duration", 'Month' ] dichotomous_columns = [] ordinal_columns = [] # Encode non numerical columns arbis_encoded, arbis_encoded_dict = numerical_encoding( arbis_selected, [ "Strasse", 'Month', "Length", "Duration", "TempMax", "TempAvg", "SpatMax", "SpatAvg", "Coverage", "TLCar", "TLHGV" ], drop_single_label=False, drop_fact_dict=False) arbis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in arbis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, arbis_encoded_dict[key])) arbis_encoded = arbis_encoded.rename( columns={ "TempMax": "TMax", "TempAvg": "TAvg", "SpatMax": "SMax", "SpatAvg": "SAvg",
"AufHi", "Char1", "Char2", "Lich1", "Lich2", "Zust1", "Zust2", "WoTag", "FeiTag", 'Month'] dichotomous_columns = ["Alkoh"] ordinal_columns = ["Betei", "Fstf"] # Encode non numerical columns baysis_encoded, baysis_encoded_dict = numerical_encoding(baysis_original, ["Strasse", "Kat", "Typ", "UArt1", "UArt2", "AUrs1", "AUrs2", "AufHi", "Char1", "Char2", "Lich1", "Lich2", "Zust1", "Zust2", "Fstf" "WoTag", "FeiTag", 'Month'], drop_single_label=False, drop_fact_dict=False) baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in baysis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, baysis_encoded_dict[key])) ################### ### Correlation ### ###################
report = ProfileReport(arbis_original, title='ArbIS Original Dataset Report') report.to_file(work_path + file_prefix + '_report.html') ################### ### Correlation ### ################### # define column types nominal_columns = ['Strasse', 'StreckeID', 'Month'] dichotomous_columns = ['Richtung'] ordinal_columns = ['AnzGesperrtFs', 'Einzug'] # Encode non numerical columns arbis_encoded, arbis_encoded_dict = numerical_encoding( arbis_original, ['Strasse', 'StreckeID', 'Month'], drop_single_label=False, drop_fact_dict=False) arbis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';') with open(csv_path + 'encoded_dict.csv', 'w') as tf: for key in arbis_encoded_dict.keys(): tf.write("%s, %s\n" % (key, arbis_encoded_dict[key])) # Calculate with Cramers 's V results = None # To make sure that no old data is reused results = compute_correlations(arbis_encoded, columns_nominal=nominal_columns, columns_dichotomous=dichotomous_columns, columns_ordinal=ordinal_columns, bias_correction=False)