Ejemplo n.º 1
0
 def predict(self, graphs):
     data_mtx = vectorize_graphs(graphs,
                                 encoding_func=self.encoding_func,
                                 feature_size=self.feature_size)
     # binarize
     data_mtx.data = np.where(data_mtx.data > 0, 1, 0)
     preds = self.classifier.predict(data_mtx)
     return preds
Ejemplo n.º 2
0
 def fit(self, graphs, targets):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     self.estimator = RandomForestRegressor(n_estimators=100)
     self.estimator = self.estimator.fit(x, targets)
     self.importance_dict, self.intercept = self.feature_importance(graphs)
     return self
Ejemplo n.º 3
0
 def fit(self, graphs, targets):
     data_mtx = vectorize_graphs(graphs,
                                 encoding_func=self.encoding_func,
                                 feature_size=self.feature_size)
     # binarize
     data_mtx.data = np.where(data_mtx.data > 0, 1, 0)
     self.classifier.fit(data_mtx, targets)
     return self
Ejemplo n.º 4
0
 def feature_importance(self, graphs):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     prediction, biases, contributions = ti.predict(self.estimator, x)
     importances = np.mean(contributions, axis=0)
     intercept = biases[0]
     importance_dict = dict(enumerate(importances))
     return importance_dict, intercept
Ejemplo n.º 5
0
 def decision_function(self, graphs):
     # return probability associated to largest target type
     data_mtx = vectorize_graphs(graphs,
                                 encoding_func=self.encoding_func,
                                 feature_size=self.feature_size)
     # binarize
     data_mtx.data = np.where(data_mtx.data > 0, 1, 0)
     preds = self.classifier.decision_function(data_mtx)
     return preds
Ejemplo n.º 6
0
 def decision_function(self, graphs):
     # return probability associated to largest target type
     data_mtx = vectorize_graphs(graphs,
                                 encoding_func=self.encoding_func,
                                 feature_size=self.feature_size)
     # binarize
     data_mtx.data = np.where(data_mtx.data > 0, 1, 0)
     preds = self.classifier.predict_proba(data_mtx)
     # assuming binary classification and column 1 to represent positives
     preds = preds[:, 1].reshape(-1)
     return preds
Ejemplo n.º 7
0
 def fit(self, graphs, targets):
     data_mtx = vectorize_graphs(graphs,
                                 encoding_func=self.encoding_func,
                                 feature_size=self.feature_size)
     # binarize
     data_mtx.data = np.where(data_mtx.data > 0, 1, 0)
     target_mtx = np.array([(0, 1) if t == 1 else (1, 0) for t in targets])
     self.target_bias = right_stochastic(
         target_mtx.sum(axis=0).reshape(1, -1))
     target_mtx = csr_matrix(target_mtx)
     self.classifier_mtx = target_mtx.T.dot(data_mtx)
     return self
Ejemplo n.º 8
0
 def decision_function(self, graphs):
     # return probability associated to largest target type
     data_mtx = vectorize_graphs(graphs,
                                 encoding_func=self.encoding_func,
                                 feature_size=self.feature_size)
     prediction_mtx = data_mtx.dot(self.classifier_mtx.T).todense()
     preds = right_stochastic(prediction_mtx)
     # incorporate training set class bias
     preds = right_stochastic(
         preds * np.diag(np.asarray(self.target_bias).reshape(-1)))
     # assuming binary classification and column 1 to represent positives
     preds = preds[:, 1].A.reshape(-1)
     return preds
Ejemplo n.º 9
0
 def feature_importance(self, pos_graphs, neg_graphs):
     graphs = pos_graphs + neg_graphs
     y = [1] * len(pos_graphs) + [-1] * len(neg_graphs)
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     estimator = SGDClassifier(penalty='elasticnet', tol=1e-3)
     fs = RFECV(estimator, step=.1, cv=3)
     fs.fit(x, y)
     fs.estimator_.decision_function(fs.transform(x)).reshape(-1)
     self.estimator = fs.estimator_
     importances = fs.inverse_transform(fs.estimator_.coef_).reshape(-1)
     intercept = fs.estimator_.intercept_[0]
     importance_dict = dict(enumerate(importances))
     return importance_dict, intercept
Ejemplo n.º 10
0
 def decision_function(self, graphs):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     return self.estimator.predict(x)
Ejemplo n.º 11
0
 def classifier_predict(self, graphs):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     preds = self.estimator.predict(x)
     return preds
Ejemplo n.º 12
0
 def classifier_decision_function(self, graphs):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     preds = self.estimator.decision_function(x)
     return preds
Ejemplo n.º 13
0
 def fit(self, graphs, targets):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     self.estimator.fit(x, targets)
     return self
Ejemplo n.º 14
0
 def decision_function(self, graphs):
     x = vectorize_graphs(graphs,
                          encoding_func=self.encoding_func,
                          feature_size=self.feature_size)
     preds = self.estimator.predict_proba(x)[:, 1].reshape(-1)
     return preds