예제 #1
0
def get_supervised_basis(graphs,
                         targets,
                         decomposition_funcs=None,
                         preprocessors=None,
                         nbits=11,
                         n_components=2):
    data = vectorize(graphs,
                     decomposition_funcs,
                     preprocessors=preprocessors,
                     nbits=nbits,
                     seed=1)
    sparse_basis = basis(data, n_components)
    threshold = 0.1
    importances, signs = get_feature_scaling(graphs, targets,
                                             decomposition_funcs,
                                             preprocessors, nbits, threshold)
    # select basis rows according to sign (positive)
    # compute average direction of pos basis
    avg_pos_vec = csc_matrix(normalize(sparse_basis[signs >= 0].mean(axis=0)))
    # remove projection on average pos basis
    proj = sparse_basis.dot(avg_pos_vec.T) * avg_pos_vec
    new_neg_basis = normalize((sparse_basis - proj).T).T
    # select basis elements according to sign
    new_basis = csc_matrix(sparse_basis.shape)
    for i, s in enumerate(signs):
        if s > 0:
            new_basis[i] = sparse_basis[i]
        elif s < 0:
            new_basis[i] = new_neg_basis[i]
    return new_basis
예제 #2
0
def get_feature_scaling(graphs,
                        targets,
                        decomposition_funcs=None,
                        preprocessors=None,
                        nbits=11,
                        threshold=0.25):

    x = vectorize(graphs,
                  decomposition_funcs,
                  preprocessors=preprocessors,
                  nbits=nbits,
                  seed=1)
    estimator = SGDClassifier(penalty='elasticnet', tol=1e-3)
    fs = RFECV(estimator, step=.1, cv=3)
    fs.fit(x, targets)
    fs.estimator_.decision_function(fs.transform(x)).reshape(-1)
    importances = fs.inverse_transform(fs.estimator_.coef_).reshape(-1)
    signs = np.sign(importances)
    importances = np.absolute(importances)
    importances = importances / np.max(importances)
    # non linear thresholding to remove least important features
    th = np.percentile(importances, threshold * 100)
    signs[importances < th] = 0
    importances[importances < th] = 0
    return importances, signs
예제 #3
0
def _evaluate_complexity(decompose_func, graphs=None):
    D = vectorize(graphs, decomposition_funcs=decompose_func)
    K = D.dot(D.T).todense()
    eig_vals, eig_vecs = np.linalg.eig(K)
    v = eig_vals * eig_vals
    v = -np.sort(-v)
    v = np.log(v + 1)
    d = np.mean(v)
    return d
예제 #4
0
def transform(graphs,
              basis=None,
              decomposition_funcs=None,
              preprocessors=None,
              nbits=11):
    data = vectorize(graphs,
                     decomposition_funcs,
                     preprocessors=preprocessors,
                     nbits=nbits,
                     seed=1)
    return np.dot(data, basis).todense()
예제 #5
0
def get_basis(graphs,
              decomposition_funcs=None,
              preprocessors=None,
              nbits=11,
              n_components=2):
    data = vectorize(graphs,
                     decomposition_funcs,
                     preprocessors=preprocessors,
                     nbits=nbits,
                     seed=1)
    sparse_orthogonal_basis = basis(data, n_components)
    return sparse_orthogonal_basis
예제 #6
0
def vectorize(graphs):
    return evec.vectorize(graphs, decomposition_funcs=decomp)
예제 #7
0
 def _transform(self, graphs):
     data_mtx = vectorize(graphs, self.decomposition_funcs,
                          self.preprocessors, self.nbits, self.seed)
     return data_mtx
예제 #8
0
 def evaluate_identity(decompose_func):
     D = vectorize(sel_graphs, decomposition_funcs=decompose_func)
     signature = (D.count_nonzero(), D.sum())
     return signature