def create_feature_from_diff_bins_with_dim(diff: pd.DataFrame,
                                           removed_nodes: [int],
                                           original_graph: gc.Graph,
                                           num_of_bins,
                                           save_info: sl.MemoryAccess,
                                           save: bool = True,
                                           output_feature: bool = False,
                                           check_for_existing: bool = True):
    if check_for_existing and save_info.has_training_data(
            removed_nodes=removed_nodes,
            feature_type=ft.FeatureType.DIFF_BIN_WITH_DIM,
            num_of_bins=num_of_bins):
        # print("training feature for removed nodes '{}' and feature type '{}' already exists"
        #      .format(removed_nodes, ft.FeatureType.DIFF_BIN_WITH_DIM.to_str(num_of_bins)))
        if output_feature:
            data = save_info.load_training_data(
                removed_nodes=removed_nodes,
                feature_type=ft.FeatureType.DIFF_BIN_WITH_DIM,
                num_of_bins=num_of_bins)
            features = data.drop(["y"], axis=1)
            labels = data["y"]
            return features, labels
        else:
            return

    features, target = create_feature_from_diff_bins(
        diff=diff,
        removed_nodes=removed_nodes,
        original_graph=original_graph,
        num_of_bins=num_of_bins,
        save_info=save_info,
        save=False)

    labels = utils.get_row_labels(diff)
    degrees = compute_degrees(graph=original_graph, labels=labels)

    features = utils.pd_append_column(features, degrees)

    if save:
        save_info.save_training_data(
            removed_nodes,
            feature_type=ft.FeatureType.DIFF_BIN_WITH_DIM,
            num_of_bins=num_of_bins,
            training_data=utils.pd_append_column(features, target))
    if output_feature:
        # output format is different for data loaded from file!
        return features, target
def create_features(diff: pd.DataFrame,
                    removed_nodes: [int],
                    original_graph: gc.Graph,
                    num_of_bins,
                    feature_type: ft.FeatureType,
                    save_info: sl.MemoryAccess,
                    save: bool = True,
                    output_feature: bool = False,
                    check_for_existing: bool = True):
    if feature_type == ft.FeatureType.DIFF_BIN_WITH_DIM:
        return create_feature_from_diff_bins_with_dim(
            diff=diff,
            removed_nodes=removed_nodes,
            original_graph=original_graph,
            num_of_bins=num_of_bins,
            save_info=save_info,
            save=save,
            output_feature=output_feature,
            check_for_existing=check_for_existing)
    elif feature_type == ft.FeatureType.DIFF_BIN_WITH_DIM_2_HOP:
        features, target = create_feature_from_diff_bins_with_dim(
            diff=diff,
            removed_nodes=removed_nodes,
            original_graph=original_graph,
            num_of_bins=num_of_bins,
            save_info=save_info,
            save=False,
            output_feature=True)
        target = two_hop_neighbours(nodes=utils.get_row_labels(features),
                                    graph=original_graph,
                                    node_to_predict=removed_nodes[-1])
        if save:
            save_info.save_training_data(
                removed_nodes=removed_nodes,
                feature_type=ft.FeatureType.DIFF_BIN_WITH_DIM_2_HOP,
                num_of_bins=num_of_bins,
                training_data=utils.pd_append_column(features, target))

        return features, target
    elif feature_type == ft.FeatureType.EVEN_DIST:
        raise NotImplementedError()

    else:
        raise ValueError(f"Feature type {feature_type} is not known!")
def create_feature_from_diff_bins(diff: pd.DataFrame,
                                  removed_nodes: [int],
                                  original_graph: gc.Graph,
                                  num_of_bins: int,
                                  save_info: sl.MemoryAccess,
                                  save: bool = True):
    target = create_target_vector(utils.get_row_labels(diff), original_graph,
                                  removed_nodes[-1])

    # calculate bin distribution for all labels
    features = get_features_from_bins(diff=diff, num_of_bins=num_of_bins)

    if save:
        save_info.save_training_data(
            removed_nodes,
            feature_type=ft.FeatureType.DIFF_BIN_WITH_DIM,
            num_of_bins=num_of_bins,
            training_data=utils.pd_append_column(features, target))

    return features, target