def main(graph, fil, norm, permute, ss, epd, n_cv, flip, feat, feat_kwargs, ntda): """ All hyperprameter goes here. :param graph: graph dataset :param fil: filtration function :param norm: normalize or not :param permute: whether permute dgm :param ss: both sublevel and superlevel or not :param epd: include extended persistence or not :param n_cv: number of cross validation :return: """ global gs print('feat kwargs', feat_kwargs) db = get_tda_db() params = { 'graph': graph, 'fil': fil, 'norm': norm, 'permute': permute, 'ss': ss, 'epd': epd, 'n_cv': n_cv, 'flip': flip, 'feat': feat, 'ntda': ntda, 'feat_kwargs': feat_kwargs } if check_duplicate(db, params): return label_flag = dgms_dir_test(fil=fil, fil_d='sub', norm=norm, graph=graph)[1] # gs, labels = load_graphs(dataset=graph, labels_only=label_flag) # step 1 gs, labels = load_tugraphs( graph, labels_only=False ) # labels_only true means gs is None. Turned on for high speed # parallel # subdgms = gs2dgms(gs, n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph = graph, ntda = ntda, debug_flag=True) subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph=graph, ntda=ntda) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm, graph=graph, ntda=ntda) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm, graph=graph, ntda=ntda) dgms = combine_dgms(subdgms, supdgms, epddgms, ss=ss, epd=epd, flip=flip) dgms = permute_dgms(dgms, permute_flag=permute) # old way dgms_summary(dgms) swdgms = dgms2swdgms(dgms) if feat == 'sw': print(feat_kwargs) k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) print(k.shape) cmargs = {'print_flag': 'off'} # confusion matrix clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k, **cmargs) clf.svm_kernel_(n_splits=10) elif feat == 'pi': # vector params = { 'bandwidth': 1.0, 'weight': (1, 1), 'im_range': [0, 1, 0, 1], 'resolution': [5, 5] } images = merge_dgms(subdgms, supdgms, epddgms, vectype='pi', ss=ss, epd=epd, **params) clf = classifier(images, labels, method='svm', n_cv=n_cv) clf.svm(n_splits=10) elif feat == 'pss': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='pss', **feat_kwargs) # print(k.shape, k, np.max(k)) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) elif feat == 'wg': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='wg', **feat_kwargs) print(k.shape) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) elif feat == 'pervec': cmargs = {'print_flag': 'on'} # confusion matrix pd_vector = dgms2vec(dgms, vectype='pervec', **feat_kwargs) clf = classifier(pd_vector, labels, method='svm', n_cv=n_cv, **cmargs) clf.svm(n_splits=10) elif feat == 'pf': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=False, kernel_type='pf', **feat_kwargs) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) else: raise Exception('No such feat %s' % feat) print(clf.stat) print_line() return clf.stat
one_hom=True, norm=norm, graph=graph, ntda=False, debug_flag=False) dgms = combine_dgms(subdgms, supdgms, epddgms, ss=True, epd=False, flip=False) true_dgms = dgms if False: fake_dgms = permute_dgms(true_dgms, permute_flag=True, seed=np.random.randint(10000), seed_flag=seed_flag) # sanity_dgms = permute_dgms(true_dgms, permute_flag=True, seed=args.s1, seed_flag=seed_flag) another_fake_dgms = permute_dgms(true_dgms, permute_flag=True, seed=np.random.randint(10000), seed_flag=seed_flag) else: fake_dgms = permute_dgms(true_dgms, permute_flag=True, seed=args.s1, seed_flag=seed_flag) # sanity_dgms = permute_dgms(true_dgms, permute_flag=True, seed=args.s1, seed_flag=seed_flag) another_fake_dgms = permute_dgms(true_dgms, permute_flag=True, seed=args.s2,
# load graphs gs, labels = load_graphs(dataset=args.graph) # parallel subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm) # serial # subdgms = gs2dgms(gs, fil=fil, fil_d='sub', norm=norm, one_hom=False) # step2 # TODO: need to add interface # supdgms = gs2dgms(gs, fil=fil, fil_d='sup', norm=norm, one_hom=False) # step2 # # epddgms = gs2dgms(gs, fil=fil, norm=norm, one_hom=True) # step2 # TODO dgms = combine_dgms(subdgms, supdgms, epddgms, args) dgms = permute_dgms(dgms, permute_flag=args.permute, permute_ratio=0.5) dgms_summary(dgms) # sw kernel swdgms = dgms2swdgms(dgms) kwargs = {'bw': args.bw, 'n_directions': 10, 'K': 1, 'p': 1} sw_kernel, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **kwargs) print(sw_kernel.shape) clf = classifier(labels, labels, method='svm',
version = '40' train_dataset, test_dataset = load_modelnet(version, point_flag=False) if version == '40': train_dataset = train_dataset[:3632] + train_dataset[ 3633:3763] + train_dataset[3764:] all_dataset = train_dataset + test_dataset labels = [int(data.y) for data in all_dataset] graph, fil = 'mn' + version, args.fil n = len(labels) dgms = [] dgms = Parallel(n_jobs=-1, backend='multiprocessing')( delayed(load_clfdgm)(idx=i, ntda=args.ntda) for i in range(n)) if args.permute: dgms = permute_dgms(dgms, permute_flag=True) if args.kernel == 'sw': swdgms = dgms2swdgms(dgms) for bw in [0.1, 1, 10, 100]: feat_kwargs = {'n_directions': 10, 'bw': bw} print(f'star computing kernel...') k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) print(k.shape) cmargs = {'print_flag': 'off'} # confusion matrix clf = classifier(labels,
# fake fake test graph = 'imdb_binary' # 'reddit_binary' norm = True fil = 'ricci' gs, labels = load_tugraphs(graph) # subdgms = gs2dgms(gs, fil=fil, fil_d='sub', norm=norm, graph = graph, ntda = False, debug_flag = False) subdgms = gs2dgms_parallel(gs, fil=fil, fil_d='sub', norm=norm, graph=graph, ntda=False, debug_flag=False) true_dgms = subdgms fake_dgms = permute_dgms(true_dgms, permute_flag=True, seed=42) another_fake_dgms = permute_dgms(true_dgms, permute_flag=True, seed=41) print_dgm(true_dgms[0]) print('-' * 20) print_dgm(fake_dgms[0]) print('-' * 20) print_dgm(another_fake_dgms[0]) all_dgms = true_dgms + fake_dgms all_dgms = dgms2swdgms(all_dgms) feat_kwargs = {'n_directions': 10, 'bw': 1} k, _ = sw_parallel(all_dgms, all_dgms, parallel_flag=True,
def main(graph, fil, norm, permute, ss, epd, n_cv, flip, feat, feat_kwargs): """ All hyperprameter goes here. :param graph: graph dataset :param fil: filtration function :param norm: normalize or not :param permute: whether permute dgm :param ss: both sublevel and superlevel or not :param epd: include extended persistence or not :param n_cv: number of cross validation :return: """ global gs print('kwargs', feat_kwargs) label_flag = dgms_dir_test(fil=fil, fil_d='sub', norm=norm, graph=graph)[1] # gs, labels = load_graphs(dataset=graph, labels_only=label_flag) # step 1 gs, labels = load_tugraphs(graph, labels_only=True) # parallel subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm, graph=graph) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm, graph=graph) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm, graph=graph) dgms = combine_dgms(subdgms, supdgms, epddgms, ss=ss, epd=epd, flip=flip) dgms = permute_dgms(dgms, permute_flag=permute, permute_ratio=0.5) dgms_summary(dgms) swdgms = dgms2swdgms(dgms) if feat == 'sw': print(feat_kwargs) k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **feat_kwargs) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) print(clf.stat) return clf.stat elif feat == 'pi': params = { 'bandwidth': 1.0, 'weight': (1, 1), 'im_range': [0, 1, 0, 1], 'resolution': [5, 5] } images = merge_dgms(subdgms, supdgms, epddgms, vectype='pi', ss=ss, epd=epd, **params) clf = classifier(images, labels, method='svm', n_cv=n_cv) clf.svm(n_splits=10) return clf.stat elif feat == 'pss': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=False, kernel_type='pss', **feat_kwargs) print(k.shape, k, np.max(k)) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) print(clf.stat) return clf.stat elif feat == 'wg': k, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='wg', **feat_kwargs) print(k.shape) clf = classifier(labels, labels, method='svm', n_cv=n_cv, kernel=k) clf.svm_kernel_(n_splits=10) print(clf.stat) return clf.stat elif feat == 'pdvector': pass
# load graphs gs, labels = load_graphs(dataset=args.graph) # step 1 # parallel subdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sub', norm=norm) supdgms = gs2dgms_parallel(n_jobs=-1, fil=fil, fil_d='sup', norm=norm) epddgms = gs2dgms_parallel(n_jobs=-1, fil=fil, one_hom=True, norm=norm) # serial # subdgms = gs2dgms(gs, fil=fil, fil_d='sub', norm=norm, one_hom=False) # step2 # TODO: need to add interface # supdgms = gs2dgms(gs, fil=fil, fil_d='sup', norm=norm, one_hom=False) # step2 # # epddgms = gs2dgms(gs, fil=fil, norm=norm, one_hom=True) # step2 # TODO dgms = combine_dgms(subdgms, supdgms, epddgms, args) dgms = permute_dgms(dgms, permute_flag=args.permute) dgms_summary(dgms) # sw kernel swdgms = dgms2swdgms(dgms) kwargs = {'bw': args.bw, 'n_directions': 10, 'K': 1, 'p': 1} sw_kernel, _ = sw_parallel(swdgms, swdgms, parallel_flag=True, kernel_type='sw', **kwargs) print(sw_kernel.shape) clf = classifier(labels, labels, method='svm',
def main(idx, n_iter, clf, test_size, vec, method, seg, permute, norm): cat_dict = prince_cat() for k, v in cat_dict.items(): if idx >= k[0] and idx <= k[1]: print(f'idx {idx} is {v}') break # seg one shape dgms = loaddgm(str(idx), form='dionysus') dgms = flip_dgms(dgms) if permute: dgms = permute_dgms(dgms, permute_flag=True, seed_flag=True) # vectorize if vec == 'pvector': dgm_vector = dgms2vec( dgms, vectype='pvector' ) # print(np.shape(pd_vector), np.shape(pd_vectors)) elif vec == 'pl': kwargs = {'num_landscapes': 5, 'resolution': 100} dgm_vector = dgms2vec(dgms, vectype='pl', **kwargs) elif vec == 'pervec': kwargs = {'dim': 300} dgm_vector = dgms2vec( dgms, vectype='pervec', **kwargs) # print(np.shape(pd_vector), np.shape(pd_vectors)) dgm_vector = normalize_(dgm_vector) else: raise Exception(f'No vec like {vec}') y = loady(model=idx, counter=True, seg=seg) X, Y = [], [] n_face, n_node = face_num(str(idx)), node_num(str(idx)) face_x = np.zeros((n_face, dgm_vector.shape[1])) face_indices = face_idx(str(idx)) for i in range(n_face): idx1, idx2, idx3 = face_indices[i] idx1, idx2, idx3 = int(idx1), int(idx2), int(idx3) face_x[i, :] = dgm_vector[idx1][:] + dgm_vector[idx2, :] + dgm_vector[ idx3, :] print(face_x.shape, y.shape) X.append(face_x) Y.append(y) X, Y = np.concatenate(X), np.concatenate(Y) if norm: X = normalize(X, axis=0) print(f'X is of shape {dgm_vector.shape} and Y is of shape {y.shape}\n') # classifer if clf == 'rf': clf = classifier(X, Y, method='svm', n_cv=1) res = clf.svm(n_splits=10) # todo res format else: kwargs = {} res = eigenpro(X, Y, max_iter=n_iter, test_size=test_size, bd=1, **kwargs) print('-' * 150) return res
args = parser.parse_args() print(args) print(prince_cat()) for k, v in prince_cat().items(): if args.idx >= k[0] and args.idx <= k[1]: print(f'idx {args.idx} is {v}') cat = v break # # seg one shape idx = args.idx dgms = loaddgm(str(idx), form='dionysus') dgms = flip_dgms(dgms) if args.permute: dgms = permute_dgms(dgms, permute_flag=True, seed_flag=True) # vectorize if args.vec == 'pvector': dgm_vector = dgms2vec( dgms, vectype='pvector' ) # print(np.shape(pd_vector), np.shape(pd_vectors)) elif args.vec == 'pl': kwargs = {'num_landscapes': 5, 'resolution': 100} dgm_vector = dgms2vec(dgms, vectype='pl', **kwargs) elif args.vec == 'pi_': params = { 'bandwidth': 1.0, 'weight': lambda x: x[1],
lp.learn_embedding(g, weight='weight') lapfeat = lp.get_embedding() degfeat = np.array(list(dict(nx.degree(g)).values())).reshape(3 * n_node, 1) clf = classifier(degfeat, labels, method=None) clf.svm() for n in g.nodes(): g.node[n]['lap'] = float(lapfeat[n,0]) g = add_edgeval(g, fil=fil) ego = egograph(g, radius=radius, n = len(g), recompute_flag=True, norm_flag=True, print_flag=False) egographs = ego.egographs(method='serial') dgms = alldgms(egographs, radius=radius, dataset='', recompute_flag=True, method='serial', n=n_node) # compute dgms in parallel if permute_flag: dgms = permute_dgms(dgms) dgms_summary(dgms) swdgms = dgms2swdgms(dgms) kwargs = {'bw': 1, 'n_directions': 10} sw_kernel, _ = sw_parallel(swdgms, swdgms, kernel_type='sw', parallel_flag=True, **kwargs) # sw_distm = np.log() clf = classifier(np.zeros((3 * n_node, 10)), labels, method=None, kernel = sw_kernel) clf.svm_kernel_() sys.exit() model = gnn_bl(g, d = 2) gnnfeat = model.feat()