Beispiel #1
0
    def predict_link(self, graph, graph_name, links, measure_name,
                     negative_edge_type):
        for link in links:
            source_node = link[0]
            dest_node = link[1]
            method_to_call = getattr(LinkPredictionStaticFunctions,
                                     measure_name)
            result = method_to_call(graph, source_node, dest_node)

            author_guid = str(graph_name + "#" + source_node + "#" +
                              dest_node).encode('utf-8')
            attribute_name = str(graph_name + "_" + measure_name + "_" +
                                 negative_edge_type).encode('utf-8')
            af = AuthorFeatures(_author_guid=author_guid,
                                _window_start=self._window_start,
                                _window_end=self._window_end,
                                _attribute_name=attribute_name,
                                _attribute_value=result)
            self._author_features.append(af)

            af_2 = AuthorFeatures(_author_guid=author_guid,
                                  _window_start=self._window_start,
                                  _window_end=self._window_end,
                                  _attribute_name="author_screen_name",
                                  _attribute_value=author_guid)

            self._author_features.append(af_2)
Beispiel #2
0
    def create_features_from_calculated_distances(self, authors_features):
        # key             #value
        for author_guid, author_sub_type_paths_dict in self._author_distances.items(
        ):
            author = self._author_dict[author_guid]
            for author_sub_type in self._author_types_and_sub_types_in_dictionary:
                series = author_sub_type_paths_dict[author_sub_type]

                if DistancesFromTargetedClass.MEAN in self._distances_statistics:
                    mean = series.mean()
                    if math.isnan(mean):
                        mean = None
                    #TODO add parameter attenuation function, that should be applied to the distance
                    #TODO think of several types of attenuation functions, exponentially decreasing, logarithmically decreasing, etc..
                    mean_dist_to_author_sub_type_feature = AuthorFeatures(
                        author[3], self._window_start, self._window_end,
                        unicode("mean_dist_to_" + author_sub_type), mean)
                    authors_features.append(
                        mean_dist_to_author_sub_type_feature)

                if DistancesFromTargetedClass.MIN in self._distances_statistics:
                    min_in_series = series.min()
                    if math.isnan(min_in_series):
                        min_in_series = None

                    min_dist_to_author_sub_type_feature = AuthorFeatures(
                        author[3], self._window_start, self._window_end,
                        unicode("min_dist_to_" + author_sub_type),
                        min_in_series)
                    authors_features.append(
                        min_dist_to_author_sub_type_feature)
 def _create_tf_idf_author_feature(self, tweet_id,
                                   aggregation_functions_name,
                                   aggregated_tf_idf_score):
     author_feature = AuthorFeatures()
     author_feature.author_guid = tweet_id
     author_feature.attribute_name = self.__class__.__name__ + "_" + aggregation_functions_name
     author_feature.attribute_value = aggregated_tf_idf_score
     return author_feature
Beispiel #4
0
    def _add_ground_truth_to_author_features(self, graph_name, links, label):
        # label 1.0 = link / label 0.0 = no link

        for link in links:
            author_guid = str(graph_name + "#" + link[0] + "#" + link[1]).encode('utf-8')
            af = AuthorFeatures(_author_guid=author_guid,
                                _window_start=self._window_start, _window_end=self._window_end,
                                _attribute_name='has_link', _attribute_value=label)
            self._author_features.append(af)
 def create_author_feature(feature_name, author_guid, attribute_value, window_start, window_end):
     feature = AuthorFeatures()
     feature.author_guid = author_guid
     feature.attribute_name = feature_name
     feature.attribute_value = attribute_value
     feature.window_start = window_start
     feature.window_end = window_end
     return feature
 def _create_feature(self, feature, id_val, result, suffix=u''):
     author_feature = AuthorFeatures()
     author_feature.author_guid = id_val
     author_feature.window_start = self._window_start
     author_feature.window_end = self._window_end
     subclass_name = self.__class__.__name__
     author_feature.attribute_name = unicode(subclass_name + "_" + feature + suffix)
     author_feature.attribute_value = result
     return author_feature
 def _create_feature(self, feature, id_val, result, suffix=''):
     author_feature = AuthorFeatures()
     author_feature.author_guid = id_val
     author_feature.window_start = self._window_start
     author_feature.window_end = self._window_end
     subclass_name = self.__class__.__name__
     author_feature.attribute_name = '{}_{}{}'.format(subclass_name, feature, suffix)
     author_feature.attribute_value = str(result)
     return author_feature
 def _create_author_feature_with_name(self, author_guid, value, feature_name):
     author_feature = AuthorFeatures()
     author_feature.author_guid = author_guid
     author_feature.window_start = date('2010-01-01 00:00:00')
     author_feature.window_end = date('2020-01-01 23:59:59')
     author_feature.attribute_name = feature_name
     author_feature.attribute_value=value
     self._authors_to_author_features_dict[author_guid].append(author_feature)
     self._db.update_author_features((author_feature))
     self._db.session.commit()
 def run_and_create_author_feature(self, author, feature, posts, id_val,
                                   feature_name):
     try:
         result = getattr(self, feature)(posts=posts, author=author)
         author_feature = AuthorFeatures()
         author_feature.author_guid = id_val
         author_feature.window_start = self._window_start
         author_feature.window_end = self._window_end
         author_feature.attribute_name = unicode(feature_name)
         author_feature.attribute_value = result
         return author_feature
     except Exception as e:
         info_msg = e.message
         logging.error(info_msg + feature)
 def run_and_create_author_feature_with_given_value(self, author, value,
                                                    feature_name):
     try:
         result = value
         author_feature = AuthorFeatures()
         author_feature.author_guid = author
         author_feature.window_start = self._window_start
         author_feature.window_end = self._window_end
         author_feature.attribute_name = unicode(feature_name)
         author_feature.attribute_value = result
         return author_feature
     except Exception as e:
         info_msg = e.message
         logging.error(info_msg + str(value))
 def run_and_create_author_feature(self, kwargs, id_val, feature):
     try:
         result = getattr(self, feature)(**kwargs)
         author_feature = AuthorFeatures()
         author_feature.author_guid = id_val
         author_feature.window_start = self._window_start
         author_feature.window_end = self._window_end
         subclass_name = self.__class__.__name__
         author_feature.attribute_name = unicode(subclass_name + "_" + feature)
         author_feature.attribute_value = result
         return author_feature
     except Exception as e:
         info_msg = e.message
         logging.error(info_msg)
    def execute(self):
        start_time = time.time()
        info_msg = "execute started for " + self.__class__.__name__
        logging.info(info_msg)

        post_topics_map = self._db.get_single_post_per_author_topic_mapping()
        total = len(post_topics_map)
        processed = 0
        authors_features = []
        for tuple in post_topics_map:
            author_guid = tuple[0]
            for topic in range(1, len(tuple)):
                probability = tuple[topic]
                author_feature = AuthorFeatures()
                author_feature.author_guid = author_guid
                author_feature.window_start = self._window_start
                author_feature.window_end = self._window_end
                author_feature.attribute_name = unicode('probability_topic_' +
                                                        str(topic))
                author_feature.attribute_value = probability
                authors_features.append(author_feature)

            processed += 1
            print("\r processed authors " + str(processed) + " from " +
                  str(total),
                  end="")

        if authors_features:
            print('\n Beginning merging author_features objects')
            counter = 0
            for author_features_row in authors_features:
                counter += 1
                self._db.update_author_features(author_features_row)
                if counter == 100:
                    print("\r " + str(self.__class__.__name__) +
                          " merging author-features objects",
                          end="")
                    self._db.commit()
                    counter = 0
            if counter != 0:
                self._db.commit()
            print('Finished merging author_features objects')

        end_time = time.time()
        diff_time = end_time - start_time
        print('execute finished in ' + str(diff_time) + ' seconds')