예제 #1
0
        report = ProfileReport(arbis_matched,
                               title='ArbIS Matched Dataset Report')
        report.to_file(work_path + file_prefix + '_report.html')

    ###################
    ### Correlation ###
    ###################

    # define column types
    nominal_columns = ['Strasse', 'Month']
    dichotomous_columns = ['Richtung']
    ordinal_columns = ['AnzGesperrtFs', 'Einzug']

    # Encode non numerical columns
    arbis_encoded, arbis_encoded_dict = numerical_encoding(
        arbis_matched, ["Strasse", 'Month'],
        drop_single_label=False,
        drop_fact_dict=False)
    arbis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in arbis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, arbis_encoded_dict[key]))

    # Calculate with Cramers 's V
    results = None  # To make sure that no old data is reused
    results = compute_correlations(arbis_encoded,
                                   columns_nominal=nominal_columns,
                                   columns_dichotomous=dichotomous_columns,
                                   columns_ordinal=ordinal_columns,
                                   bias_correction=False)
    ###################
    ### Encoding ###
    ###################

    # define column types
    nominal_columns = [
        "Str", "Kat", "Typ", "UArt1", "UArt2", "AUrs1", "AUrs2", "AufHi",
        "Char1", "Char2", "Bes1", "Bes2", "Lich1", "Lich2", "Zust1", "Zust2",
        "WoTag", 'Month'
    ]
    dichotomous_columns = ["Alkoh"]
    ordinal_columns = ["Betei", "Fstf", "FeiTag"]

    # Encode non numerical columns
    baysis_encoded, baysis_encoded_dict = numerical_encoding(
        baysis_selected, ["Strasse", "Fstf", 'Month'],
        drop_single_label=False,
        drop_fact_dict=False)
    baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in baysis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, baysis_encoded_dict[key]))

    ###################
    ### Correlation ###
    ###################

    baysis_encoded = baysis_encoded.rename(
        columns={
            "TempMax": "TMax",
            "TempAvg": "TAvg",
예제 #3
0
    ###################
    ### Correlation ###
    ###################

    # define column types
    nominal_columns = [
        'Str', 'Kat', 'Typ', 'UArt1', 'UArt2', 'AUrs1', 'AUrs2', 'AufHi',
        'Char1', 'Char2', 'Lich1', 'Lich2', 'Zust1', 'Zust2', 'WoTag', 'Month'
    ]
    dichotomous_columns = ['Alkoh']
    ordinal_columns = ['Betei', 'Fstf', 'FeiTag']

    # Encode non numerical columns
    baysis_encoded, baysis_encoded_dict = numerical_encoding(
        baysis_matched, ['Strasse', 'Fstf', 'WoTag', 'Month'],
        drop_single_label=False,
        drop_fact_dict=False)
    baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in baysis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, baysis_encoded_dict[key]))

    baysis_encoded = baysis_encoded.rename(
        columns={
            "TempMax": "TMax",
            "TempAvg": "TAvg",
            "SpatMax": "SMax",
            "SpatAvg": "SAvg",
            "Coverage": "Cov",
            "TempDist": "TDist",
예제 #4
0
        "Fstf",
        "WoTag",
        "FeiTag",
        'Month'
    ]
    dichotomous_columns = []
    ordinal_columns = []

    # Encode non numerical columns
    baysis_encoded, baysis_encoded_dict = numerical_encoding(baysis_selected,
                                                             ["Strasse",
                                                              "Fstf",
                                                              'Month',
                                                              "TempMax",
                                                              "TempAvg",
                                                              "SpatMax",
                                                              "SpatAvg",
                                                              "Coverage",
                                                              "TLCar",
                                                              "TLHGV"
                                                              ],
                                                             drop_single_label=False,
                                                             drop_fact_dict=False)
    baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in baysis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, baysis_encoded_dict[key]))

    ###################
    ### Correlation ###
    ###################
예제 #5
0
        "Str",
        "AGF",
        "Einzug",
        "Richtung",
        "Length",
        "Duration",
        'Month'
    ]
    dichotomous_columns = []
    ordinal_columns = []

    # Encode non numerical columns
    arbis_encoded, arbis_encoded_dict = numerical_encoding(
        arbis_selected, [
            "Strasse", 'Month', "Length", "Duration", "TempMax", "TempAvg",
            "SpatMax", "SpatAvg", "Coverage", "TLCar", "TLHGV"
        ],
        drop_single_label=False,
        drop_fact_dict=False)
    arbis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in arbis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, arbis_encoded_dict[key]))

    arbis_encoded = arbis_encoded.rename(
        columns={
            "TempMax": "TMax",
            "TempAvg": "TAvg",
            "SpatMax": "SMax",
            "SpatAvg": "SAvg",
                       "AufHi",
                       "Char1", "Char2",
                       "Lich1", "Lich2",
                       "Zust1", "Zust2",
                       "WoTag",
                       "FeiTag", 'Month']
    dichotomous_columns = ["Alkoh"]
    ordinal_columns = ["Betei", "Fstf"]

    # Encode non numerical columns
    baysis_encoded, baysis_encoded_dict = numerical_encoding(baysis_original,
                                                             ["Strasse", "Kat", "Typ",
                                                              "UArt1", "UArt2",
                                                              "AUrs1", "AUrs2",
                                                              "AufHi",
                                                              "Char1", "Char2",
                                                              "Lich1", "Lich2",
                                                              "Zust1", "Zust2",
                                                              "Fstf"
                                                              "WoTag",
                                                              "FeiTag", 'Month'], drop_single_label=False,
                                                             drop_fact_dict=False)
    baysis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in baysis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, baysis_encoded_dict[key]))

    ###################
    ### Correlation ###
    ###################
예제 #7
0
        report = ProfileReport(arbis_original,
                               title='ArbIS Original Dataset Report')
        report.to_file(work_path + file_prefix + '_report.html')

    ###################
    ### Correlation ###
    ###################

    # define column types
    nominal_columns = ['Strasse', 'StreckeID', 'Month']
    dichotomous_columns = ['Richtung']
    ordinal_columns = ['AnzGesperrtFs', 'Einzug']

    # Encode non numerical columns
    arbis_encoded, arbis_encoded_dict = numerical_encoding(
        arbis_original, ['Strasse', 'StreckeID', 'Month'],
        drop_single_label=False,
        drop_fact_dict=False)
    arbis_encoded.to_csv(csv_path + 'encoded.csv', index=False, sep=';')

    with open(csv_path + 'encoded_dict.csv', 'w') as tf:
        for key in arbis_encoded_dict.keys():
            tf.write("%s, %s\n" % (key, arbis_encoded_dict[key]))

    # Calculate with Cramers 's V
    results = None  # To make sure that no old data is reused
    results = compute_correlations(arbis_encoded,
                                   columns_nominal=nominal_columns,
                                   columns_dichotomous=dichotomous_columns,
                                   columns_ordinal=ordinal_columns,
                                   bias_correction=False)