Ejemplo n.º 1
0
    subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm)
    supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm)
    epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm)

    # serial
    # subdgms = gs2dgms(gs, fil=fil, fil_d='sub', norm=norm, one_hom=False) # step2 # TODO: need to add interface
    # supdgms = gs2dgms(gs, fil=fil, fil_d='sup', norm=norm, one_hom=False)  # step2 #
    # epddgms = gs2dgms(gs, fil=fil, norm=norm, one_hom=True)  # step2 # TODO

    dgms = combine_dgms(subdgms, supdgms, epddgms, args)
    dgms = permute_dgms(dgms, permute_flag=args.permute, permute_ratio=0.5)
    dgms_summary(dgms)

    # sw kernel
    swdgms = dgms2swdgms(dgms)
    kwargs = {'bw': args.bw, 'n_directions': 10, 'K': 1, 'p': 1}
    sw_kernel, _ = sw_parallel(swdgms,
                               swdgms,
                               parallel_flag=True,
                               kernel_type='sw',
                               **kwargs)
    print(sw_kernel.shape)

    clf = classifier(labels,
                     labels,
                     method='svm',
                     n_cv=args.n_cv,
                     kernel=sw_kernel)
    clf.svm_kernel_(n_splits=10)
    print(clf.stat)
Ejemplo n.º 2
0
def main(graph, fil, norm, permute, ss, epd, n_cv, flip, feat, feat_kwargs,
         ntda):
    """
    All hyperprameter goes here.

    :param graph: graph dataset
    :param fil: filtration function
    :param norm: normalize or not
    :param permute: whether permute dgm
    :param ss: both sublevel and superlevel or not
    :param epd: include extended persistence or not
    :param n_cv: number of cross validation
    :return:
    """

    global gs
    print('feat kwargs', feat_kwargs)
    db = get_tda_db()
    params = {
        'graph': graph,
        'fil': fil,
        'norm': norm,
        'permute': permute,
        'ss': ss,
        'epd': epd,
        'n_cv': n_cv,
        'flip': flip,
        'feat': feat,
        'ntda': ntda,
        'feat_kwargs': feat_kwargs
    }
    if check_duplicate(db, params): return

    label_flag = dgms_dir_test(fil=fil, fil_d='sub', norm=norm, graph=graph)[1]
    # gs, labels = load_graphs(dataset=graph, labels_only=label_flag)  # step 1
    gs, labels = load_tugraphs(
        graph, labels_only=False
    )  # labels_only true means gs is None. Turned on for high speed

    # parallel

    # subdgms = gs2dgms(gs, n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph = graph, ntda = ntda, debug_flag=True)
    subdgms = gs2dgms_parallel(n_jobs=-1,
                               fil=fil,
                               fil_d='sub',
                               norm=norm,
                               graph=graph,
                               ntda=ntda)
    supdgms = gs2dgms_parallel(n_jobs=-1,
                               fil=fil,
                               fil_d='sup',
                               norm=norm,
                               graph=graph,
                               ntda=ntda)
    epddgms = gs2dgms_parallel(n_jobs=-1,
                               fil=fil,
                               one_hom=True,
                               norm=norm,
                               graph=graph,
                               ntda=ntda)

    dgms = combine_dgms(subdgms, supdgms, epddgms, ss=ss, epd=epd, flip=flip)
    dgms = permute_dgms(dgms, permute_flag=permute)  # old way
    dgms_summary(dgms)

    swdgms = dgms2swdgms(dgms)
    if feat == 'sw':
        print(feat_kwargs)
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=True,
                           kernel_type='sw',
                           **feat_kwargs)
        print(k.shape)
        cmargs = {'print_flag': 'off'}  # confusion matrix
        clf = classifier(labels,
                         labels,
                         method='svm',
                         n_cv=n_cv,
                         kernel=k,
                         **cmargs)
        clf.svm_kernel_(n_splits=10)

    elif feat == 'pi':  # vector
        params = {
            'bandwidth': 1.0,
            'weight': (1, 1),
            'im_range': [0, 1, 0, 1],
            'resolution': [5, 5]
        }
        images = merge_dgms(subdgms,
                            supdgms,
                            epddgms,
                            vectype='pi',
                            ss=ss,
                            epd=epd,
                            **params)
        clf = classifier(images, labels, method='svm', n_cv=n_cv)
        clf.svm(n_splits=10)

    elif feat == 'pss':
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=True,
                           kernel_type='pss',
                           **feat_kwargs)
        # print(k.shape, k, np.max(k))
        clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k)
        clf.svm_kernel_(n_splits=10)

    elif feat == 'wg':
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=True,
                           kernel_type='wg',
                           **feat_kwargs)
        print(k.shape)
        clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k)
        clf.svm_kernel_(n_splits=10)

    elif feat == 'pervec':
        cmargs = {'print_flag': 'on'}  # confusion matrix
        pd_vector = dgms2vec(dgms, vectype='pervec', **feat_kwargs)
        clf = classifier(pd_vector, labels, method='svm', n_cv=n_cv, **cmargs)
        clf.svm(n_splits=10)

    elif feat == 'pf':
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=False,
                           kernel_type='pf',
                           **feat_kwargs)
        clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k)
        clf.svm_kernel_(n_splits=10)
    else:
        raise Exception('No such feat %s' % feat)

    print(clf.stat)
    print_line()
    return clf.stat
Ejemplo n.º 3
0
    kwargs = {'h': 0.3}
    g = fil_strategy(g, lapfeat, method=fil_method, viz_flag=False, **kwargs)

    ego = egograph(g,
                   radius=radius,
                   n=len(g),
                   recompute_flag=True,
                   norm_flag=True,
                   print_flag=False)
    egographs = ego.egographs(method='serial')
    dgms = alldgms(egographs,
                   radius=radius,
                   dataset='',
                   recompute_flag=True,
                   method='serial',
                   n=n1 + n2,
                   zigzag=zigzag)  # compute dgms in parallel

    swdgms = dgms2swdgms(dgms)
    kwargs = {'bw': 1, 'n_directions': 10}
    sw_kernel, _ = sw_parallel(swdgms,
                               swdgms,
                               kernel_type='sw',
                               parallel_flag=True,
                               **kwargs)
    clf = classifier(np.zeros((n1 + n2, 10)),
                     labels,
                     method=None,
                     kernel=sw_kernel)
    clf.svm_kernel_()
Ejemplo n.º 4
0
        indicator_labels = [1] * len(fake_dgms) + [-1] * len(another_fake_dgms)
    all_dgms = dgms2swdgms(all_dgms)

    # classify true diagrams from fake ones
    feat_kwargs = {'n_directions': 10, 'bw': 1}
    k, _ = sw_parallel(all_dgms,
                       all_dgms,
                       parallel_flag=True,
                       kernel_type='sw',
                       **feat_kwargs)

    print(k.shape)
    cmargs = {'print_flag': 'off'}  # confusion matrix
    clf = classifier(indicator_labels,
                     indicator_labels,
                     method='svm',
                     n_cv=1,
                     kernel=k,
                     **cmargs)
    clf.svm_kernel_(n_splits=10)
    if not args.viz: sys.exit('-' * 50)

    feat_kwargs = {'n_directions': 10, 'bw': 1}
    k, _ = sw_parallel(all_dgms,
                       all_dgms,
                       parallel_flag=True,
                       kernel_type='sw',
                       **feat_kwargs)
    print(np.diag(k).shape)
    k_diag = np.diag(np.diag(k))
    kdist = diag2m(k_diag) + diag2m(k_diag, row_major=False) - 2 * k
    assert kdist[1, 2] == k[1, 1] + k[2, 2] - 2 * k[1, 2]
Ejemplo n.º 5
0
def main(graph, fil, norm, permute, ss, epd, n_cv, flip, feat, feat_kwargs):
    """
    All hyperprameter goes here.

    :param graph: graph dataset
    :param fil: filtration function
    :param norm: normalize or not
    :param permute: whether permute dgm
    :param ss: both sublevel and superlevel or not
    :param epd: include extended persistence or not
    :param n_cv: number of cross validation
    :return:
    """

    global gs
    print('kwargs', feat_kwargs)
    label_flag = dgms_dir_test(fil=fil, fil_d='sub', norm=norm, graph=graph)[1]
    # gs, labels = load_graphs(dataset=graph, labels_only=label_flag)  # step 1
    gs, labels = load_tugraphs(graph, labels_only=True)

    # parallel
    subdgms = gs2dgms_parallel(n_jobs=-1,
                               fil=fil,
                               fil_d='sub',
                               norm=norm,
                               graph=graph)
    supdgms = gs2dgms_parallel(n_jobs=-1,
                               fil=fil,
                               fil_d='sup',
                               norm=norm,
                               graph=graph)
    epddgms = gs2dgms_parallel(n_jobs=-1,
                               fil=fil,
                               one_hom=True,
                               norm=norm,
                               graph=graph)

    dgms = combine_dgms(subdgms, supdgms, epddgms, ss=ss, epd=epd, flip=flip)
    dgms = permute_dgms(dgms, permute_flag=permute, permute_ratio=0.5)
    dgms_summary(dgms)

    swdgms = dgms2swdgms(dgms)
    if feat == 'sw':
        print(feat_kwargs)
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=True,
                           kernel_type='sw',
                           **feat_kwargs)
        clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k)
        clf.svm_kernel_(n_splits=10)
        print(clf.stat)
        return clf.stat

    elif feat == 'pi':
        params = {
            'bandwidth': 1.0,
            'weight': (1, 1),
            'im_range': [0, 1, 0, 1],
            'resolution': [5, 5]
        }
        images = merge_dgms(subdgms,
                            supdgms,
                            epddgms,
                            vectype='pi',
                            ss=ss,
                            epd=epd,
                            **params)
        clf = classifier(images, labels, method='svm', n_cv=n_cv)
        clf.svm(n_splits=10)
        return clf.stat

    elif feat == 'pss':
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=False,
                           kernel_type='pss',
                           **feat_kwargs)
        print(k.shape, k, np.max(k))
        clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k)
        clf.svm_kernel_(n_splits=10)
        print(clf.stat)
        return clf.stat

    elif feat == 'wg':
        k, _ = sw_parallel(swdgms,
                           swdgms,
                           parallel_flag=True,
                           kernel_type='wg',
                           **feat_kwargs)
        print(k.shape)
        clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k)
        clf.svm_kernel_(n_splits=10)
        print(clf.stat)
        return clf.stat

    elif feat == 'pdvector':
        pass
Ejemplo n.º 6
0
    if args.kernel == 'sw':
        swdgms = dgms2swdgms(dgms)
        for bw in [0.1, 1, 10, 100]:
            feat_kwargs = {'n_directions': 10, 'bw': bw}
            print(f'star computing kernel...')
            k, _ = sw_parallel(swdgms,
                               swdgms,
                               parallel_flag=True,
                               kernel_type='sw',
                               **feat_kwargs)
            print(k.shape)

            cmargs = {'print_flag': 'off'}  # confusion matrix
            clf = classifier(labels,
                             labels,
                             method='svm',
                             n_cv=1,
                             kernel=k,
                             **cmargs)
            clf.svm_kernel_(n_splits=10)
        sys.exit()

    # convert to vector
    # kwargs = {'num_landscapes': 5, 'resolution': 100, 'keep_zero': True}
    # x = dgms2vec(dgms, vectype='pl', **kwargs)
    kwargs = {'dim': 100}
    print('using pervec')
    x = dgms2vec(dgms, vectype='pervec', **kwargs)

    if args.random: x = np.random.random(x.shape)
    if args.norm: x = normalize_(x, axis=0)
Ejemplo n.º 7
0
def main(idx, n_iter, clf, test_size, vec, method, seg, permute, norm):

    cat_dict = prince_cat()
    for k, v in cat_dict.items():
        if idx >= k[0] and idx <= k[1]:
            print(f'idx {idx} is {v}')
            break

    # seg one shape
    dgms = loaddgm(str(idx), form='dionysus')
    dgms = flip_dgms(dgms)
    if permute: dgms = permute_dgms(dgms, permute_flag=True, seed_flag=True)

    # vectorize
    if vec == 'pvector':
        dgm_vector = dgms2vec(
            dgms, vectype='pvector'
        )  # print(np.shape(pd_vector), np.shape(pd_vectors))
    elif vec == 'pl':
        kwargs = {'num_landscapes': 5, 'resolution': 100}
        dgm_vector = dgms2vec(dgms, vectype='pl', **kwargs)
    elif vec == 'pervec':
        kwargs = {'dim': 300}
        dgm_vector = dgms2vec(
            dgms, vectype='pervec',
            **kwargs)  # print(np.shape(pd_vector), np.shape(pd_vectors))
        dgm_vector = normalize_(dgm_vector)
    else:
        raise Exception(f'No vec like {vec}')

    y = loady(model=idx, counter=True, seg=seg)

    X, Y = [], []
    n_face, n_node = face_num(str(idx)), node_num(str(idx))
    face_x = np.zeros((n_face, dgm_vector.shape[1]))
    face_indices = face_idx(str(idx))
    for i in range(n_face):
        idx1, idx2, idx3 = face_indices[i]
        idx1, idx2, idx3 = int(idx1), int(idx2), int(idx3)
        face_x[i, :] = dgm_vector[idx1][:] + dgm_vector[idx2, :] + dgm_vector[
            idx3, :]
    print(face_x.shape, y.shape)
    X.append(face_x)
    Y.append(y)

    X, Y = np.concatenate(X), np.concatenate(Y)
    if norm: X = normalize(X, axis=0)
    print(f'X is of shape {dgm_vector.shape} and Y is of shape {y.shape}\n')

    # classifer
    if clf == 'rf':
        clf = classifier(X, Y, method='svm', n_cv=1)
        res = clf.svm(n_splits=10)  # todo res format
    else:
        kwargs = {}
        res = eigenpro(X,
                       Y,
                       max_iter=n_iter,
                       test_size=test_size,
                       bd=1,
                       **kwargs)
    print('-' * 150)

    return res
Ejemplo n.º 8
0
    for i in range(n_face):
        idx1, idx2, idx3 = face_indices[i]
        idx1, idx2, idx3 = int(idx1), int(idx2), int(idx3)
        face_x[i, :] = dgm_vector[idx1][:] + dgm_vector[idx2, :] + dgm_vector[
            idx3, :]
    print(face_x.shape, y.shape)
    X.append(face_x)
    Y.append(y)

    X, Y = np.concatenate(X), np.concatenate(Y)
    if args.norm: X = normalize(X, axis=0)

    # classifer
    print()
    if args.clf == 'rf':
        clf = classifier(X, Y, method='svm', n_cv=1)
        clf.svm(n_splits=10)
    else:
        kwargs = {}
        res = eigenpro(X,
                       Y,
                       max_iter=args.n_iter,
                       test_size=args.test_size,
                       bd=1,
                       **kwargs)
    print('-' * 150)

    sys.exit()

    # check consistency btwn loady and load_labels
    y = loady(model=args.idx, counter=True, seg=args.seg)
Ejemplo n.º 9
0
    rs = args.rs
    radius, fil = 1, args.fil
    n_node, p = 100, args.p
    sizes = [n_node] * 3
    permute_flag = True
    labels = [0] * n_node + [1] * n_node + [2] * n_node
    probs = [[0.5, p, p],
             [p, 0.5, p],
             [p, p, 0.5]]

    g = stochastic_block_model(sizes, probs, seed=rs)
    lp = LaplacianEigenmaps(d=1)
    lp.learn_embedding(g, weight='weight')
    lapfeat = lp.get_embedding()
    degfeat = np.array(list(dict(nx.degree(g)).values())).reshape(3 * n_node, 1)
    clf = classifier(degfeat, labels, method=None)
    clf.svm()

    for n in g.nodes():
        g.node[n]['lap'] = float(lapfeat[n,0])
    g = add_edgeval(g, fil=fil)

    ego = egograph(g, radius=radius, n = len(g), recompute_flag=True, norm_flag=True, print_flag=False)
    egographs = ego.egographs(method='serial')
    dgms = alldgms(egographs, radius=radius, dataset='', recompute_flag=True, method='serial', n=n_node)  # compute dgms in parallel


    if permute_flag: dgms = permute_dgms(dgms)
    dgms_summary(dgms)

    swdgms = dgms2swdgms(dgms)
Ejemplo n.º 10
0
    # viz fv value
    # val = dict(nx.get_node_attributes(gs[i], 'fv')).values()
    # plt.plot(val)
    # plt.title('q: %s, i: %s'%(q, i))
    # plt.show()
    # sys.exit()

    print('Finish computing lapfeat')
    dgms = alldgms(gs,
                   radius=float('inf'),
                   dataset='',
                   recompute_flag=True,
                   method='serial',
                   n=2 * n,
                   zigzag=zigzag)  # compute dgms in parallel
    print('Finish computing dgms')
    swdgms = dgms2swdgms(dgms)

    feat_kwargs = {'n_directions': 10, 'bw': 1}
    sw_kernel, _ = sw_parallel(swdgms,
                               swdgms,
                               kernel_type='sw',
                               parallel_flag=True,
                               **feat_kwargs)
    clf = classifier(np.zeros((len(labels), 10)),
                     labels,
                     method=None,
                     kernel=sw_kernel)
    print(clf.svm_kernel_())
    print(p, q, edge_kwargs)
Ejemplo n.º 11
0
                   n=len(g),
                   recompute_flag=True,
                   norm_flag=True,
                   print_flag=False)
    egographs = ego.egographs(method='parallel')
    dgms = alldgms(egographs,
                   radius=radius,
                   dataset='',
                   recompute_flag=True,
                   method='serial',
                   n=n_node,
                   zigzag=zigzag)  # compute dgms in parallel
    dgms_summary(dgms)
    print_dgm(dgms[0])

    swdgms = dgms2swdgms(dgms)
    for bw in [10]:
        kwargs = {'bw': bw, 'K': 1, 'p': 1}  # TODO: K and p is dummy here
        sw_kernel, _ = sw_parallel(swdgms,
                                   swdgms,
                                   kernel_type='sw',
                                   parallel_flag=True,
                                   **kwargs)
        sw_distancem = np.log(sw_kernel) * (-2)
        # viz_matrix(sw_distancem)
        clf = classifier(np.zeros((3 * n_node, 10)),
                         labels,
                         method=None,
                         kernel=sw_kernel)
        clf.svm_kernel_()