def get_supervised_basis(graphs, targets, decomposition_funcs=None, preprocessors=None, nbits=11, n_components=2): data = vectorize(graphs, decomposition_funcs, preprocessors=preprocessors, nbits=nbits, seed=1) sparse_basis = basis(data, n_components) threshold = 0.1 importances, signs = get_feature_scaling(graphs, targets, decomposition_funcs, preprocessors, nbits, threshold) # select basis rows according to sign (positive) # compute average direction of pos basis avg_pos_vec = csc_matrix(normalize(sparse_basis[signs >= 0].mean(axis=0))) # remove projection on average pos basis proj = sparse_basis.dot(avg_pos_vec.T) * avg_pos_vec new_neg_basis = normalize((sparse_basis - proj).T).T # select basis elements according to sign new_basis = csc_matrix(sparse_basis.shape) for i, s in enumerate(signs): if s > 0: new_basis[i] = sparse_basis[i] elif s < 0: new_basis[i] = new_neg_basis[i] return new_basis
def get_feature_scaling(graphs, targets, decomposition_funcs=None, preprocessors=None, nbits=11, threshold=0.25): x = vectorize(graphs, decomposition_funcs, preprocessors=preprocessors, nbits=nbits, seed=1) estimator = SGDClassifier(penalty='elasticnet', tol=1e-3) fs = RFECV(estimator, step=.1, cv=3) fs.fit(x, targets) fs.estimator_.decision_function(fs.transform(x)).reshape(-1) importances = fs.inverse_transform(fs.estimator_.coef_).reshape(-1) signs = np.sign(importances) importances = np.absolute(importances) importances = importances / np.max(importances) # non linear thresholding to remove least important features th = np.percentile(importances, threshold * 100) signs[importances < th] = 0 importances[importances < th] = 0 return importances, signs
def _evaluate_complexity(decompose_func, graphs=None): D = vectorize(graphs, decomposition_funcs=decompose_func) K = D.dot(D.T).todense() eig_vals, eig_vecs = np.linalg.eig(K) v = eig_vals * eig_vals v = -np.sort(-v) v = np.log(v + 1) d = np.mean(v) return d
def transform(graphs, basis=None, decomposition_funcs=None, preprocessors=None, nbits=11): data = vectorize(graphs, decomposition_funcs, preprocessors=preprocessors, nbits=nbits, seed=1) return np.dot(data, basis).todense()
def get_basis(graphs, decomposition_funcs=None, preprocessors=None, nbits=11, n_components=2): data = vectorize(graphs, decomposition_funcs, preprocessors=preprocessors, nbits=nbits, seed=1) sparse_orthogonal_basis = basis(data, n_components) return sparse_orthogonal_basis
def vectorize(graphs): return evec.vectorize(graphs, decomposition_funcs=decomp)
def _transform(self, graphs): data_mtx = vectorize(graphs, self.decomposition_funcs, self.preprocessors, self.nbits, self.seed) return data_mtx
def evaluate_identity(decompose_func): D = vectorize(sel_graphs, decomposition_funcs=decompose_func) signature = (D.count_nonzero(), D.sum()) return signature