Exemple #1
0
 def egograph(self, node, delete_center=False):
     t0 = time.time()
     res = nx.ego_graph(self.graph, node, radius=self.radius)
     if self.delete_center: res.remove_node(node)
     if self.print_flag:
         print('Finish node %s in %s' %
               (node, precision_format(time.time() - t0, 3)))
     return res
Exemple #2
0
 def sw_dist(self):
     swdgms = dgms2swdgms(self.dgms)
     res = sw([swdgms[0]], [swdgms[1]],
              kernel_type='sw',
              n_directions=10,
              bandwidth=1.0,
              K=1,
              p=1)[0][0]
     sw_dist = np.log(res) * (-2)
     return precision_format(sw_dist)
Exemple #3
0
def evaluate_tda_kernel(tda_kernel, Y, best_result_so_far, print_flag='off'):
    """
    TODO: figure this out
    :param tda_kernel:
    :param Y:
    :param best_result_so_far:
    :param print_flag:
    :return:
    """

    t1 = time.time()
    n = np.shape(tda_kernel)[0]
    grid_search_re = train_svm(np.zeros((n, 23)),
                               Y,
                               print_flag=print_flag,
                               kernel=tda_kernel,
                               kernel_flag=True,
                               nonlinear_flag=False)  # X is dummy here
    if grid_search_re['score'] < best_result_so_far[0] - 4:
        print('Saved one unnecessary evaluation of bad kernel')
        return (0, 0, {}, 0)

    cv_score = []
    for seed in range(5):
        clf = svm.SVC(kernel='precomputed', C=grid_search_re['param']['C'])
        k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
        scores = cross_val_score(clf,
                                 tda_kernel,
                                 Y,
                                 cv=k_fold,
                                 scoring='accuracy',
                                 n_jobs=-1)
        cv_score.append(scores.mean())

    cv_score = np.array(cv_score)
    t2 = time.time()
    svm_time = precision_format(t2 - t1, 1)
    return (precision_format(100 * cv_score.mean(),
                             1), precision_format(100 * cv_score.std(),
                                                  1), grid_search_re, svm_time)
Exemple #4
0
def alldgms(gs, radius=1, n = 100, dataset = 'blogcatalog', recompute_flag=False, method = 'serial', verbose = 5, zigzag = False):
    """
    :param gs:  a list of egographs
    :param radius: radius of egograph. # todo not very useful.
    :param n:
    :param dataset:
    :param recompute_flag: whether to recompute or not
    :param method: serial or parallel
    :param verbose:
    :param zigzag:
    :return:
    """

    t0 = time.time()
    dir = os.path.join('/home/cai.507/Documents/DeepLearning/deep-persistence/EigenPro2/emb', dataset, '') # the file to save the emb
    file = dir + 'dgms_radius_' + str(radius) + '_' + str(n) + '.emb'
    if recompute_flag: os.remove(file) if os.path.exists(file) else 'File do not exist'

    try: # load existing dgms
        with open(file, "r") as f:
            print(file)
            diags = json.load(f)
            print('load existing dgms takes %s\n'%(precision_format(time.time() - t0)))
            dgms = diags2dgms(diags)
        return dgms

    except IOError or FileNotFoundError:
        kwargs_ = {'key': 'fv', 'subflag': 'True', 'one_homology_flag': False}
        if method == 'parallel':
            diags = Parallel(n_jobs=-1, verbose=verbose)(delayed(wrapper_getdiagram)(g, zigzag=zigzag) for g in gs)  # the cpu usage is only 250%. TODO: optimize
            dgms = diags2dgms(diags)
            print('Dgms Parallel version finished')

        elif method == 'serial':
            dgms = Parallel(n_jobs=1, verbose=verbose)(delayed(wrapper_getdiagram)(g, parallel_flag=False, zigzag=zigzag) for g in gs)
            print('Dgms Serial version finished')

        else:
            raise Exception('No method %s'%method)

        # save the computed dgms
        with open(file, 'w') as f:
            diags = dgms2diags(dgms)
            json.dump(diags, f)
            print ('Finish computing and saving %s dgms using method %s. It takes %s\n'%(len(dgms), method, time.time()-t0))
        return diags2dgms(diags)
Exemple #5
0
def sw_parallel(dgms1,
                dgms2,
                kernel_type='sw',
                parallel_flag=True,
                granularity=25,
                **featkwargs):
    """
    build on top of function sw

    :param dgms1: a list of array.
    :param dgms2:
    :param kernel_type: sw, pss, wg
    :param parallel_flag: Ture if want to compute in parallel
    :param granularity: parameter for parallel computing.
    :param featkwargs: kwargs for sw/pss/wg
    :return:
    """

    t1 = time.time()
    assert_sw_dgm(dgms1)
    assert_sw_dgm(dgms2)
    n1 = len(dgms1)
    n2 = len(dgms2)
    kernel = np.zeros((n2, n1))

    if parallel_flag:
        # parallel version
        kernel = Parallel(n_jobs=-1, backend='multiprocessing')(
            delayed(sw)(dgms1,
                        dgms2[i:min(i + granularity, n2)],
                        kernel_type=kernel_type,
                        **featkwargs) for i in range(0, n2, granularity))
        kernel = (np.vstack(kernel))
    else:  # used as verification
        for i in range(n2):
            kernel[i] = sw(dgms1, [dgms2[i]],
                           kernel_type=kernel_type,
                           **featkwargs)

    t = precision_format(time.time() - t1, 1)
    print('Finish computing %s kernel of shape %s. Takes %s' %
          (kernel_type, kernel.shape, t))
    return (kernel / float(np.max(kernel)), t)
Exemple #6
0
def load_existing_graph(graph, file):
    start = time.time()

    if os.path.isfile(file):
        print('Loading existing files')
        if graph == 'reddit_12K':
            file = os.path.join(
                '/home/cai.507/Documents/DeepLearning/deep-persistence/dataset/datasets/',
                'reddit_12K' + '.graph')
            with open(file, 'rb') as f:
                data = pickle.load(f, encoding='latin1')
            graphs, labels = data['graph'], data['labels']
        else:
            with open(file, 'rb') as f:
                graphs, labels = pickle.load(f, encoding='latin1')

        print(('Loading takes %s' % precision_format(
            (time.time() - start), 1)))
        if graph == 'ptc': graphs[151] = graphs[152]  # small hack
        return graphs, labels
Exemple #7
0
def format_(v, idx=0):
    # idx 0 for training error, idx 1 for test error
    from Esme.helper.format import precision_format
    res = (1 - list(list(v.values())[0][idx].values())[0][1]) * 100
    res = precision_format(res, 1)
    return res
Exemple #8
0
 def bd_dist(self):
     return precision_format(d.bottleneck_distance(self.dgm1, self.dgm2))
Exemple #9
0
    def dgms_vecs(self, **kwargs):
        """
        :param kwargs: pass all kwargs here. PI('bandwidth', 'weight', 'im_range', 'resolution'), PL('num_landscapes', 'resolution')
        :return: np.array of shape (n_dgms, n_dim) where all zero columns are removed
        """
        self.param = kwargs
        t1 = time.time()

        def arctan(C, p):
            return lambda x: C * np.arctan(np.power(x[1], p))

        if self.vec_type == 'pi':
            if True:
                diagsT = DiagramPreprocessor(use=True,
                                             scalers=[
                                                 ([0, 1],
                                                  BirthPersistenceTransform())
                                             ]).fit_transform(self.diags)
                PI = PersistenceImage(bandwidth=1.,
                                      weight=lambda x: x[1],
                                      im_range=[0, 10, 0, 10],
                                      resolution=[100, 100])
                res = PI.fit_transform(diagsT)

            if False:
                diagsT = tda.DiagramPreprocessor(
                    use=True,
                    scalers=[([0, 1], tda.BirthPersistenceTransform())
                             ]).fit_transform(self.diags)
                PI = tda.PersistenceImage(bandwidth=1.,
                                          weight=lambda x: x[1],
                                          im_range=[0, 10, 0, 10],
                                          resolution=[100, 100])
                res = PI.fit_transform(diagsT)

            if False:
                diagsT = tda.DiagramPreprocessor(
                    use=True,
                    scalers=[([0, 1], tda.BirthPersistenceTransform())
                             ]).fit_transform(self.diags)

                kwargs = filterdict(
                    kwargs, ['bandwidth', 'weight', 'im_range', 'resolution'])
                kwargs['weight'] = arctan(kwargs['weight'][0],
                                          kwargs['weight'][1])

                # PI = tda.PersistenceImage(**kwargs)
                PI = tda.PersistenceImage(bandwidth=1.,
                                          weight=lambda x: x[1],
                                          im_range=[0, 10, 0, 10],
                                          resolution=[100, 100])

                # PI = tda.PersistenceImage(bandwidth=1.0, weight=arctan(1.0, 1.0), im_range=[0, 1, 0, 1], resolution=[25, 25])
                res = PI.fit_transform(diagsT)

        elif self.vec_type == 'pi_':
            kwargs_ = filterdict(
                kwargs, ['bandwidth', 'weight', 'im_range', 'resolution'])
            diagsT = DiagramPreprocessor(use=True,
                                         scalers=[
                                             ([0,
                                               1], BirthPersistenceTransform())
                                         ]).fit_transform(self.diags)
            PI = PersistenceImage(
                **kwargs_
            )  #(bandwidth=1., weight=lambda x: x[1], im_range=[0, 2, 0, 2], resolution=[20, 20])
            res = PI.fit_transform(diagsT)

        elif self.vec_type == 'pl':
            kwargs_ = filterdict(kwargs, ['num_landscapes', 'resolution'])
            LS = tda.Landscape(**kwargs_)
            # LS = tda.Landscape(num_landscapes=5, resolution=100)
            # print('self.diags', self.diags[1], self.diags[2])
            # diags = [np.array(diag) for diag in self.diags]
            # D = np.array([[0., 4.], [1., 2.], [3., 8.], [6., 8.]])

            # res = LS.fit_transform([D, D])

            # matheiu's implementation
            # LS = Landscape(resolution=1000)
            # D = np.array([[0., 4.], [1., 2.], [3., 8.], [6., 8.]])
            # diags = [D]

            res = LS.fit_transform(self.diags)

        elif self.vec_type == 'pervec':  # permutation vector, i.e. the historgram of coordinates of dgm
            dgms = self.dgms
            kwargs = filterdict(kwargs, ['dim'])
            res = coordinate(dgms[0], **kwargs)
            for i in range(1, len(dgms)):
                tmp = coordinate(dgms[i], **kwargs)
                res = np.concatenate((res, tmp), axis=0)
            assert res.shape[0] == len(dgms)

        else:
            raise Exception('Unknown vec_type. You can only chose pi or pl')

        t2 = time.time()
        t = precision_format((t2 - t1), 1)
        self.t = t
        if kwargs.get('keep_zero', None) == True:
            return normalize_(res, axis=self.axis)
        return rm_zerocol(normalize_(res, axis=self.axis), cor_flag=False)