def predict_link(self, graph, graph_name, links, measure_name, negative_edge_type): for link in links: source_node = link[0] dest_node = link[1] method_to_call = getattr(LinkPredictionStaticFunctions, measure_name) result = method_to_call(graph, source_node, dest_node) author_guid = str(graph_name + "#" + source_node + "#" + dest_node).encode('utf-8') attribute_name = str(graph_name + "_" + measure_name + "_" + negative_edge_type).encode('utf-8') af = AuthorFeatures(_author_guid=author_guid, _window_start=self._window_start, _window_end=self._window_end, _attribute_name=attribute_name, _attribute_value=result) self._author_features.append(af) af_2 = AuthorFeatures(_author_guid=author_guid, _window_start=self._window_start, _window_end=self._window_end, _attribute_name="author_screen_name", _attribute_value=author_guid) self._author_features.append(af_2)
def create_features_from_calculated_distances(self, authors_features): # key #value for author_guid, author_sub_type_paths_dict in self._author_distances.items( ): author = self._author_dict[author_guid] for author_sub_type in self._author_types_and_sub_types_in_dictionary: series = author_sub_type_paths_dict[author_sub_type] if DistancesFromTargetedClass.MEAN in self._distances_statistics: mean = series.mean() if math.isnan(mean): mean = None #TODO add parameter attenuation function, that should be applied to the distance #TODO think of several types of attenuation functions, exponentially decreasing, logarithmically decreasing, etc.. mean_dist_to_author_sub_type_feature = AuthorFeatures( author[3], self._window_start, self._window_end, unicode("mean_dist_to_" + author_sub_type), mean) authors_features.append( mean_dist_to_author_sub_type_feature) if DistancesFromTargetedClass.MIN in self._distances_statistics: min_in_series = series.min() if math.isnan(min_in_series): min_in_series = None min_dist_to_author_sub_type_feature = AuthorFeatures( author[3], self._window_start, self._window_end, unicode("min_dist_to_" + author_sub_type), min_in_series) authors_features.append( min_dist_to_author_sub_type_feature)
def _create_tf_idf_author_feature(self, tweet_id, aggregation_functions_name, aggregated_tf_idf_score): author_feature = AuthorFeatures() author_feature.author_guid = tweet_id author_feature.attribute_name = self.__class__.__name__ + "_" + aggregation_functions_name author_feature.attribute_value = aggregated_tf_idf_score return author_feature
def _add_ground_truth_to_author_features(self, graph_name, links, label): # label 1.0 = link / label 0.0 = no link for link in links: author_guid = str(graph_name + "#" + link[0] + "#" + link[1]).encode('utf-8') af = AuthorFeatures(_author_guid=author_guid, _window_start=self._window_start, _window_end=self._window_end, _attribute_name='has_link', _attribute_value=label) self._author_features.append(af)
def create_author_feature(feature_name, author_guid, attribute_value, window_start, window_end): feature = AuthorFeatures() feature.author_guid = author_guid feature.attribute_name = feature_name feature.attribute_value = attribute_value feature.window_start = window_start feature.window_end = window_end return feature
def _create_feature(self, feature, id_val, result, suffix=u''): author_feature = AuthorFeatures() author_feature.author_guid = id_val author_feature.window_start = self._window_start author_feature.window_end = self._window_end subclass_name = self.__class__.__name__ author_feature.attribute_name = unicode(subclass_name + "_" + feature + suffix) author_feature.attribute_value = result return author_feature
def _create_feature(self, feature, id_val, result, suffix=''): author_feature = AuthorFeatures() author_feature.author_guid = id_val author_feature.window_start = self._window_start author_feature.window_end = self._window_end subclass_name = self.__class__.__name__ author_feature.attribute_name = '{}_{}{}'.format(subclass_name, feature, suffix) author_feature.attribute_value = str(result) return author_feature
def _create_author_feature_with_name(self, author_guid, value, feature_name): author_feature = AuthorFeatures() author_feature.author_guid = author_guid author_feature.window_start = date('2010-01-01 00:00:00') author_feature.window_end = date('2020-01-01 23:59:59') author_feature.attribute_name = feature_name author_feature.attribute_value=value self._authors_to_author_features_dict[author_guid].append(author_feature) self._db.update_author_features((author_feature)) self._db.session.commit()
def run_and_create_author_feature(self, author, feature, posts, id_val, feature_name): try: result = getattr(self, feature)(posts=posts, author=author) author_feature = AuthorFeatures() author_feature.author_guid = id_val author_feature.window_start = self._window_start author_feature.window_end = self._window_end author_feature.attribute_name = unicode(feature_name) author_feature.attribute_value = result return author_feature except Exception as e: info_msg = e.message logging.error(info_msg + feature)
def run_and_create_author_feature_with_given_value(self, author, value, feature_name): try: result = value author_feature = AuthorFeatures() author_feature.author_guid = author author_feature.window_start = self._window_start author_feature.window_end = self._window_end author_feature.attribute_name = unicode(feature_name) author_feature.attribute_value = result return author_feature except Exception as e: info_msg = e.message logging.error(info_msg + str(value))
def run_and_create_author_feature(self, kwargs, id_val, feature): try: result = getattr(self, feature)(**kwargs) author_feature = AuthorFeatures() author_feature.author_guid = id_val author_feature.window_start = self._window_start author_feature.window_end = self._window_end subclass_name = self.__class__.__name__ author_feature.attribute_name = unicode(subclass_name + "_" + feature) author_feature.attribute_value = result return author_feature except Exception as e: info_msg = e.message logging.error(info_msg)
def execute(self): start_time = time.time() info_msg = "execute started for " + self.__class__.__name__ logging.info(info_msg) post_topics_map = self._db.get_single_post_per_author_topic_mapping() total = len(post_topics_map) processed = 0 authors_features = [] for tuple in post_topics_map: author_guid = tuple[0] for topic in range(1, len(tuple)): probability = tuple[topic] author_feature = AuthorFeatures() author_feature.author_guid = author_guid author_feature.window_start = self._window_start author_feature.window_end = self._window_end author_feature.attribute_name = unicode('probability_topic_' + str(topic)) author_feature.attribute_value = probability authors_features.append(author_feature) processed += 1 print("\r processed authors " + str(processed) + " from " + str(total), end="") if authors_features: print('\n Beginning merging author_features objects') counter = 0 for author_features_row in authors_features: counter += 1 self._db.update_author_features(author_features_row) if counter == 100: print("\r " + str(self.__class__.__name__) + " merging author-features objects", end="") self._db.commit() counter = 0 if counter != 0: self._db.commit() print('Finished merging author_features objects') end_time = time.time() diff_time = end_time - start_time print('execute finished in ' + str(diff_time) + ' seconds')