예제 #1
0
 def _create_feature_extractor(self):
     bigraph = Bigraph(self.graph, source_cls=User, target_cls=Repository)
     features = []
     if 'behavior' in self.feature_types:
         features.extend([
             (bigraph.matrix.copy(), 'source', 'contributed repository'),
             (bigraph.matrix.T.tocsr().copy(), 'target', 'contributor'),
         ])
     if 'content' in self.feature_types:
         languages = load_language_features(self.data_path, bigraph.targets)
         descriptions = load_description_features(self.data_path, bigraph.targets)
         features.extend([
             (languages, 'target', 'language'),
             (descriptions, 'target', 'description'),
         ])
     if 'relation' in self.feature_types:
         followers, followees = load_follow_features(self.data_path, bigraph.sources)
         features.extend([
             (followers, 'source', 'follower'),
             (followees, 'source', 'followee'),
         ])
     gc.collect()  # force garbage collection
     extractors = [
         ConstantFeature(self.adj),
         EdgeAttributeFeature(self.adj, keys=self.weight_key),
     ]
     type_map = {'source': 'user', 'target': 'repository'}
     for feature, type_, name in features:
         print 'Creating node similarity matrix from %s feature (%s count: %d, ' \
             'dimension: %d)...' % (name, type_map[type_], feature.shape[0], feature.shape[1])
         similarity = BigraphSimilarity(bigraph, feature, type_)
         extractor = SimilarityFeature(self.adj, similarity)
         extractors.append(extractor)
     return CombinedFeature(self.adj, *extractors)
예제 #2
0
 def _calculate_similarities(self):
     _, followees = load_follow_features(self.data_path,
                                         self.bigraph.sources)
     self.similarity = BigraphSimilarity(self.bigraph, followees, 'source')
예제 #3
0
 def _calculate_similarities(self):
     features = self.bigraph.matrix.copy()
     self.similarity = BigraphSimilarity(self.bigraph, features, 'source')
예제 #4
0
 def _calculate_similarities(self):
     features = self.bigraph.matrix.T.tocsr().copy()
     self.similarity = BigraphSimilarity(self.bigraph, features, 'target')
예제 #5
0
 def _calculate_similarities(self):
     features = load_description_features(self.data_path,
                                          self.bigraph.targets)
     self.similarity = BigraphSimilarity(self.bigraph, features, 'target')