def get_features_array(self, prop_graphs, micro_features, macro_features, news_source=None, label=None, file_dir="data/features", use_cache=False): function_refs = [] file_name = self.get_dump_file_name(news_source, micro_features, macro_features, label, file_dir) data_file = Path(file_name) if use_cache and data_file.is_file(): return pickle.load(open(file_name, "rb")) if micro_features: function_refs.extend(self.get_micro_feature_method_references()) if len(function_refs) == 0: return None all_features = [] for idx in range(len(function_refs)): features_set = get_sample_feature_value(prop_graphs, function_refs[idx]) all_features.append(features_set) feature_array = np.transpose(get_numpy_array(all_features)) pickle.dump(feature_array, open(file_name, "wb")) return feature_array
def get_all_linguistic_features(news_graphs, micro_features, macro_features): all_features = [] if macro_features: retweet_function_references = [] for function_reference in retweet_function_references: features_set = get_stats_for_features(news_graphs, function_reference, print=False, feature_name=None) all_features.append(features_set) if micro_features: reply_function_references = [ get_reply_nodes_average_sentiment, get_first_reply_nodes_average_sentiment, get_deepest_cascade_reply_nodes_avg_sentiment, get_deepest_cascade_first_level_reply_sentiment ] for function_reference in reply_function_references: features_set = get_stats_for_features(news_graphs, function_reference, print=True, feature_name=None) all_features.append(features_set) return np.transpose(get_numpy_array(all_features))
def get_features_array(self, prop_graphs, micro_features, macro_features, news_source=None, label=None, file_dir="data/features", use_cache=False): all_features = [] file_name = self.get_dump_file_name(news_source, micro_features, macro_features, label, file_dir) data_file = Path(file_name) if use_cache and data_file.is_file(): return pickle.load(open(file_name, "rb")) if micro_features: target_edge_type = REPLY_EDGE reply_function_references = self.get_micro_feature_method_references() for function_ref in reply_function_references: features = function_ref(prop_graphs, target_edge_type) all_features.append(features) if macro_features: target_edge_type = RETWEET_EDGE retweet_function_references = self.get_macro_feature_method_references() for function_ref in retweet_function_references: features = function_ref(prop_graphs, target_edge_type) all_features.append(features) feature_array = np.transpose(get_numpy_array(all_features)) pickle.dump(feature_array, open(file_name, "wb")) return feature_array
def get_all_temporal_features(prop_graphs, micro_features, macro_features): macro_features_functions = [ get_average_time_between_post_tweets, get_time_diff_first_last_post_tweet, get_time_diff_first_post_last_retweet, get_time_diff_first_post_first_retweet, get_avg_time_between_retweets, get_avg_retweet_time_deepest_cascade, get_time_diff_post_time_last_retweet_time_deepest_cascade ] micro_features_functions = [ get_avg_time_between_replies, get_time_diff_first_post_last_reply, get_time_diff_post_time_last_reply_time_deepest_cascade ] function_refs = [] if macro_features: function_refs.extend(macro_features_functions) if micro_features: function_refs.extend(micro_features_functions) all_features = [] for function_reference in function_refs: features_set = get_stats_for_features(prop_graphs, function_reference, print=False, feature_name=None) all_features.append(features_set) return np.transpose(get_numpy_array(all_features))
def get_all_structural_features(news_graphs, micro_features, macro_features): all_features = [] target_edge_type = RETWEET_EDGE if macro_features: retweet_function_references = [get_tree_heights, get_prop_graphs_node_counts, get_prop_graps_cascade_num, get_max_outdegrees, get_num_of_cascades_with_retweets, get_fraction_of_cascades_with_retweets] for function_ref in retweet_function_references: features = function_ref(news_graphs, target_edge_type) all_features.append(features) if micro_features: target_edge_type = REPLY_EDGE reply_function_references = [get_tree_heights, get_prop_graphs_node_counts, get_max_outdegrees] for function_ref in reply_function_references: features = function_ref(news_graphs, target_edge_type) all_features.append(features) return np.transpose(get_numpy_array(all_features))