def get_features_array(self, prop_graphs, micro_features, macro_features, news_source=None, label=None, file_dir="data/features", use_cache=False): function_refs = [] file_name = self.get_dump_file_name(news_source, micro_features, macro_features, label, file_dir) data_file = Path(file_name) if use_cache and data_file.is_file(): return pickle.load(open(file_name, "rb")) if micro_features: function_refs.extend(self.get_micro_feature_method_references()) if len(function_refs) == 0: return None all_features = [] for idx in range(len(function_refs)): features_set = get_sample_feature_value(prop_graphs, function_refs[idx]) all_features.append(features_set) feature_array = np.transpose(get_numpy_array(all_features)) pickle.dump(feature_array, open(file_name, "wb")) return feature_array
def get_prop_graphs_min_time_to_reach_level_2(news_graphs: list, edge_type=None): return get_sample_feature_value(news_graphs, get_min_time_to_reach_level_2)
def get_prop_graphs_num_unique_user_under_level_4(prop_graphs, edge_type=RETWEET_EDGE): return get_sample_feature_value(prop_graphs, get_num_unique_users_under_level_4)
def get_prop_graphs_max_breadth(prop_graphs, edge_type=RETWEET_EDGE): return get_sample_feature_value(prop_graphs, get_breadth_at_each_level)
def get_prop_graphs_fraction_of_cascades_with_replies(prop_graphs, edge_type=RETWEET_EDGE): return get_sample_feature_value(prop_graphs, get_fraction_of_cascades_with_replies)
def get_prop_graphs_fraction_of_bot_users_retweeting(prop_graphs: tweet_node, edge_type=None): return get_sample_feature_value(prop_graphs, get_fraction_of_bot_users_retweeting)
def get_prop_graphs_num_bot_users_retweeting(prop_graphs: tweet_node, edge_type=None): global user_id_bot_score_dict return get_sample_feature_value(prop_graphs, get_num_bot_users)
def get_prop_graphs_ratio_of_retweet_to_reply(prop_graphs, edge_type=None): return get_sample_feature_value(prop_graphs, get_ratio_of_retweet_to_reply)
def get_prop_graphs_num_user_retweet_and_reply(prop_graphs, edge_type=None): return get_sample_feature_value(prop_graphs, get_num_user_retweet_and_reply)
def get_prop_graphs_num_of_cascades_with_retweets(prop_graphs, edge_type=RETWEET_EDGE): return get_sample_feature_value(prop_graphs, get_num_of_cascades_with_retweets)