Example #1
0
def write_feature_importance_files(n_est=10, max_depth=None):
    profile_ids = map(lambda x: int(x.split("_")[1]), os.listdir(param.FEATURE_AGG_PATH))
    profile_ids.sort()
    user_feature_df = loader.read_csv_dataset(profile_ids=profile_ids)
    print "user feature data set is loaded"
    # mod_feature_dfs = {mod: loader.read_csv_dataset(profile_ids=profile_ids, modality=mod)
    #                    for mod in info.FREE_MODE_LIST}
    # print "each modality feature data sets are loaded"
    p_df = loader.load_profile_info()
    print "profile information is loaded"

    target_labels = info.LABEL_LIST
    # target_labels = ['gender', 'age', 'job', 'religion', 'marriage', 'numberOfChildren', 'income', 'education']
    # target_label = 'religion'

    ranking_limits = [-1]
    # min_not_nans = [-1, 10]

    if not os.path.isdir(param.IMPORTANCE_PATH):
        os.makedirs(param.IMPORTANCE_PATH)

    for target_label in target_labels:
        features_powers_lr = fimp.compute_randomized_lr_score(user_feature_df, p_df, target_label)
        # features_powers_mi = fimp.compute_mics(user_feature_df, p_df, target_label)
        # features_powers_fs_10 = fimp.compute_fscore(user_feature_df, p_df, target_label, min_not_nan=10)

        for r_l in ranking_limits:
            features_powers_lr.iloc[:r_l].to_csv(
                    "%s/%s_%s_%s-%s.csv" % (param.IMPORTANCE_PATH, target_label, "withAppFeat",
                                            "LR", r_l if r_l > -1 else "all"))
Example #2
0
    if split_modal is True:
        for modal in modalities:
            x = df_filtered.loc[modal].dropna(how='all')
            compute(x)
    else:
        x = df_filtered.dropna(how='all')
        compute(x)

    feature_importances.sort_values('importance', ascending=False, inplace=True, na_position='last')
    return feature_importances


if __name__ == '__main__':
    profile_ids = map(lambda x: int(x.split("_")[1]), os.listdir(param.FEATURE_AGG_PATH))[:20]
    user_feature_df = data_loader.read_csv_dataset(profile_ids=profile_ids)
    # user_feature_df = ida_data_loader.read_csv_dataset(file_name=csv_file)
    print "user feature data set is loaded"
    # mod_feature_dfs = {mod: ida_data_loader.read_csv_dataset(profile_ids=profile_ids, modality=mod)
    #                    for mod in info.FREE_MODE_LIST}
    # print "each modality feature data sets are loaded"
    profile_df = data_loader.load_profile_info()
    print "profile information is loaded"

    # target_labels = info.LABEL_LIST
    target_labels = [info.LABEL_GEN, info.LABEL_AGE]
    # target_labels = ['gender', 'age', 'job', 'religion', 'marriage', 'numberOfChildren', 'income', 'education']

    compute_randomized_lr_score(user_feature_df, profile_df)
    # compute_fscore(user_feature_df, profile_df)
    #