def _create_feature_extractor(self): bigraph = Bigraph(self.graph, source_cls=User, target_cls=Repository) features = [] if 'behavior' in self.feature_types: features.extend([ (bigraph.matrix.copy(), 'source', 'contributed repository'), (bigraph.matrix.T.tocsr().copy(), 'target', 'contributor'), ]) if 'content' in self.feature_types: languages = load_language_features(self.data_path, bigraph.targets) descriptions = load_description_features(self.data_path, bigraph.targets) features.extend([ (languages, 'target', 'language'), (descriptions, 'target', 'description'), ]) if 'relation' in self.feature_types: followers, followees = load_follow_features(self.data_path, bigraph.sources) features.extend([ (followers, 'source', 'follower'), (followees, 'source', 'followee'), ]) gc.collect() # force garbage collection extractors = [ ConstantFeature(self.adj), EdgeAttributeFeature(self.adj, keys=self.weight_key), ] type_map = {'source': 'user', 'target': 'repository'} for feature, type_, name in features: print 'Creating node similarity matrix from %s feature (%s count: %d, ' \ 'dimension: %d)...' % (name, type_map[type_], feature.shape[0], feature.shape[1]) similarity = BigraphSimilarity(bigraph, feature, type_) extractor = SimilarityFeature(self.adj, similarity) extractors.append(extractor) return CombinedFeature(self.adj, *extractors)
def _calculate_similarities(self): _, followees = load_follow_features(self.data_path, self.bigraph.sources) self.similarity = BigraphSimilarity(self.bigraph, followees, 'source')
def _calculate_similarities(self): features = self.bigraph.matrix.copy() self.similarity = BigraphSimilarity(self.bigraph, features, 'source')
def _calculate_similarities(self): features = self.bigraph.matrix.T.tocsr().copy() self.similarity = BigraphSimilarity(self.bigraph, features, 'target')
def _calculate_similarities(self): features = load_description_features(self.data_path, self.bigraph.targets) self.similarity = BigraphSimilarity(self.bigraph, features, 'target')