def get_features_array(self,
                           prop_graphs,
                           micro_features,
                           macro_features,
                           news_source=None,
                           label=None,
                           file_dir="data/features",
                           use_cache=False):
        function_refs = []

        file_name = self.get_dump_file_name(news_source, micro_features,
                                            macro_features, label, file_dir)
        data_file = Path(file_name)

        if use_cache and data_file.is_file():
            return pickle.load(open(file_name, "rb"))

        if micro_features:
            function_refs.extend(self.get_micro_feature_method_references())

        if len(function_refs) == 0:
            return None

        all_features = []

        for idx in range(len(function_refs)):
            features_set = get_sample_feature_value(prop_graphs,
                                                    function_refs[idx])
            all_features.append(features_set)

        feature_array = np.transpose(get_numpy_array(all_features))

        pickle.dump(feature_array, open(file_name, "wb"))

        return feature_array
def get_all_linguistic_features(news_graphs, micro_features, macro_features):
    all_features = []

    if macro_features:
        retweet_function_references = []

        for function_reference in retweet_function_references:
            features_set = get_stats_for_features(news_graphs,
                                                  function_reference,
                                                  print=False,
                                                  feature_name=None)
            all_features.append(features_set)

    if micro_features:

        reply_function_references = [
            get_reply_nodes_average_sentiment,
            get_first_reply_nodes_average_sentiment,
            get_deepest_cascade_reply_nodes_avg_sentiment,
            get_deepest_cascade_first_level_reply_sentiment
        ]

        for function_reference in reply_function_references:
            features_set = get_stats_for_features(news_graphs,
                                                  function_reference,
                                                  print=True,
                                                  feature_name=None)
            all_features.append(features_set)

    return np.transpose(get_numpy_array(all_features))
예제 #3
0
    def get_features_array(self, prop_graphs, micro_features, macro_features, news_source=None, label=None,
                           file_dir="data/features", use_cache=False):
        all_features = []

        file_name = self.get_dump_file_name(news_source, micro_features, macro_features, label, file_dir)
        data_file = Path(file_name)

        if use_cache and data_file.is_file():
            return pickle.load(open(file_name, "rb"))

        if micro_features:
            target_edge_type = REPLY_EDGE

            reply_function_references = self.get_micro_feature_method_references()
            for function_ref in reply_function_references:
                features = function_ref(prop_graphs, target_edge_type)
                all_features.append(features)

        if macro_features:
            target_edge_type = RETWEET_EDGE
            retweet_function_references = self.get_macro_feature_method_references()
            for function_ref in retweet_function_references:
                features = function_ref(prop_graphs, target_edge_type)
                all_features.append(features)

        feature_array = np.transpose(get_numpy_array(all_features))

        pickle.dump(feature_array, open(file_name, "wb"))

        return feature_array
예제 #4
0
def get_all_temporal_features(prop_graphs, micro_features, macro_features):
    macro_features_functions = [
        get_average_time_between_post_tweets,
        get_time_diff_first_last_post_tweet,
        get_time_diff_first_post_last_retweet,
        get_time_diff_first_post_first_retweet, get_avg_time_between_retweets,
        get_avg_retweet_time_deepest_cascade,
        get_time_diff_post_time_last_retweet_time_deepest_cascade
    ]

    micro_features_functions = [
        get_avg_time_between_replies, get_time_diff_first_post_last_reply,
        get_time_diff_post_time_last_reply_time_deepest_cascade
    ]

    function_refs = []

    if macro_features:
        function_refs.extend(macro_features_functions)

    if micro_features:
        function_refs.extend(micro_features_functions)

    all_features = []

    for function_reference in function_refs:
        features_set = get_stats_for_features(prop_graphs,
                                              function_reference,
                                              print=False,
                                              feature_name=None)
        all_features.append(features_set)

    return np.transpose(get_numpy_array(all_features))
예제 #5
0
def get_all_structural_features(news_graphs, micro_features, macro_features):
    all_features = []
    target_edge_type = RETWEET_EDGE

    if macro_features:
        retweet_function_references = [get_tree_heights, get_prop_graphs_node_counts, get_prop_graps_cascade_num,
                                       get_max_outdegrees, get_num_of_cascades_with_retweets,
                                       get_fraction_of_cascades_with_retweets]
        for function_ref in retweet_function_references:
            features = function_ref(news_graphs, target_edge_type)
            all_features.append(features)

    if micro_features:
        target_edge_type = REPLY_EDGE

        reply_function_references = [get_tree_heights, get_prop_graphs_node_counts, get_max_outdegrees]
        for function_ref in reply_function_references:
            features = function_ref(news_graphs, target_edge_type)
            all_features.append(features)

    return np.transpose(get_numpy_array(all_features))