def pipeline1(i, beta=np.array([0, 0, 0, 0, 1]), hop_flag='n', basep=0, debug='off', rs=100, edge_fil='off'): ''' :param i: i-th graph :param beta: [deg, ricci, fiedler, cc] :param hop_flag: :param basep: :param debug: flag :param rs: random seed :param edge_fil: :return: calculate persistence diagram of a graph (may disconneced) ''' # data: mutag dict assert 'data' in globals().keys() assert 'graphs_' in globals().keys() subgraphs = [] (dgm_, dgm_sub, dgm_super, epd_dgm) = (d.Diagram([ (0, 0) ]), d.Diagram([(0, 0)]), d.Diagram([(0, 0)]), d.Diagram([(0, 0)])) for k in range(len(graphs_[i])): # prepare if debug == 'on': print('Processing graph %s, subgraph %s' % (i, k)) g = graphs_[i][k] graphassertion(g) g = fv(g, beta, hop_flag=hop_flag, basep=basep, rs=rs, edge_fil=edge_fil) # belong to pipe1 (g, fv_list) = add_function_value(g, fv_input='fv_test', edge_value='max') # belong to pipe1 dgm_sub = get_diagram(g, key='fv', subflag='True') (g, fv_list) = add_function_value(g, fv_input='fv_test', edge_value='min') # belong to pipe1 dgm_super = get_diagram(g, key='fv', subflag='False') dgm_super = flip_dgm(dgm_super) epd_dgm = get_diagram(g, key='fv', one_homology_flag=True) dgm = add_dgms(dgm_sub, dgm_super) dgm_ = add_dgms(dgm_, dgm) subgraphs.append(g) if i % 50 == 0: print('.'), if i % 100 == 0: print_dgm(dgm) return subgraphs, dgm_, dgm_sub, dgm_super, epd_dgm
def compute_bottleneck_distance(all_seeds_rips_files, remove_infinity=False, compute_wass_distance=False, use_persim=False, M=10): matrix = [] x = [] y = [] for file1 in all_seeds_rips_files: print('Computing file: {}'.format(file1)) row = np.zeros(len(all_seeds_rips_files)) # example file1: LTHT/remote_data/saves/alexnet_nmp/mnist/42/pickle/8.pickle split1_name = file1.split('/') # print(split1_name) seed, model_name, dataset, file1_name = split1_name[-5], split1_name[ -7], split1_name[-6], split1_name[-1] # appending 'alexnet_nmp-mnist-42-8' x.append(model_name + "-" + dataset + "-" + seed + "-" + file1_name.split(".")[0]) rips1 = pickle.load(open(file1, 'rb')) if remove_infinity: l1 = list(rips1['dgms'][0][rips1['dgms'][0][:, 1] < np.inf]) else: l1 = list(rips1['dgms'][0]) d1 = dion.Diagram(l1) for i, file2 in enumerate(all_seeds_rips_files): rips2 = pickle.load(open(file2, 'rb')) if remove_infinity: l2 = list(rips2['dgms'][0][rips2['dgms'][0][:, 1] < np.inf]) else: l2 = list(rips2['dgms'][0]) d2 = dion.Diagram(l2) if compute_wass_distance: if use_persim: wdist = persim.sliced_wasserstein_kernel(d1, d2, M=M) else: wdist = dion.wasserstein_distance(d1, d2, q=2) row[i] = wdist else: if use_persim: bdist = persim.bottleneck(d1, d2) else: bdist = dion.bottleneck_distance(d1, d2) row[i] = bdist matrix.append(row) # x = list( map( lambda y: '{}-{} seed:{}-{}'.format( y.split('-')[0], y.split('-')[1], y.split('-')[2], y.split('-')[3]), x)) return matrix, x
def add_dgm(dgm1, dgm2): """ add(overlay) two dgms """ diag1 = dgm2diag(dgm1) diag2 = dgm2diag(dgm2) data = diag1 + diag2 if len(data) ==0: return d.Diagram([[0,0]]) return d.Diagram(data)
def bad_example(): import dionysus as d dgm1 = d.Diagram([(1, 2.07464)]) dgm1 = d.Diagram([(1, 2.04287)]) dgm2 = d.Diagram([(1, 1.68001), (1, 1.68001), (1, 1.68001)]) # this one doesn't work dgm2 = d.Diagram([(1, 1.71035)]) # dgm2 = d.Diagram([(1,1.68), (1,1.68), (1,1.68)]) # But this one works print((d.bottleneck_distance(dgm1, dgm2))) print((d.bottleneck_distance_with_edge(dgm1, dgm2)))
def get_diagram(self, g, key='fv', subflag='True', one_homology_flag=False, parallel_flag=False, zigzag=False): """ :param g: networkx graph with fv computed on each node and edge :param key: fv. This is the key to access filtration function value :param subflag: 'True' if sub level filtration used. 'False' if superlevel filtration used. :param one_homology_flag: ignore for now. :param parallel_flag: ignore for now. :param zigzag: Set to be true if you want to use combined filtration. (set filtration for nodes and edges seprately, instead of using node filtration or edge filtration.) :return: Persistence diagram """ # only return 0-homology of sublevel filtration TODO: include one homology # type can be tuple or pd. tuple can be parallized, pd cannot. g = nx.convert_node_labels_to_integers(g) simplices = self.get_simplices(g, key=key) if one_homology_flag: epd_dgm = self.epd(g, pd_flag=False)[1] epd_dgm = self.post_process(epd_dgm) return epd_dgm super_dgms = self.compute_PD(simplices, sub=False) sub_dgms = self.compute_PD( simplices, sub=True) if not zigzag else self.compute_PD( simplices, zigzag=True) _min = min([g.node[n][key] for n in g.nodes()]) _max = max([g.node[n][key] for n in g.nodes() ]) + 1e-5 # avoid the extra node lies on diagonal p_min = d.Diagram([(_min, _max)]) p_max = d.Diagram([(_max, _min)]) sub_dgms[0].append(p_min[0]) super_dgms[0].append(p_max[0]) if subflag == 'True': return sub_dgms[0] if not parallel_flag else dgm2diag(sub_dgms[0]) elif subflag == 'False': return super_dgms[0] if not parallel_flag else dgm2diag( super_dgms[0]) else: raise Exception('subflag can be either True or False')
def array2dgm(x, fil_d = 'sub', print_flag = True): """ convert a array of shape (n,1) to a diagram where largest value is paired up with smallest value """ assert x.shape[1] == 1 x = x.tolist() x = [val for sublist in x for val in sublist] if len(x) % 2 == 1: x = x + [0] assert len(x) % 2 == 0 if fil_d == 'sub': order = False elif fil_d == 'sup': order = True else: raise Exception(f'No fil_d {fil_d} in array2dgm') x.sort(reverse=order) lis = [] # a list of tuples while len(x)!=0: tuple = (x[0], x[-1]) lis.append(tuple) x = x[1:-1] if print_flag: print('finish converting array to dgm...') return d.Diagram(lis)
def compare_diagrams(n=16, negate=False, n_threads=4, seed=1, top_dim=2): # no wrap in Dionysus wrap = False # generate random grid data np.random.seed(seed) a = np.random.randn(n**3).reshape((n, n, n)) # compute diagrams with Oineus oin_dgms = oin.compute_diagrams_ls(a, negate, wrap, top_dim, n_threads) # compute diagrams with Dionysis fil_us = dion.fill_freudenthal(a, reverse=negate) p = dion.homology_persistence(fil_us) dion_dgms = dion.init_diagrams(p, fil_us) dist = 0.0 for dim in range(top_dim): # convert Oineus diagram to Dionysus format oin_dgm = dion.Diagram(oin_dgms[dim]) dion_dgm = dion_dgms[dim] dist += dion.bottleneck_distance(oin_dgm, dion_dgm) print("total dist: ", dist) assert (dist < 0.001)
def diag2dgm(diag): import dionysus as d if type(diag) == list: diag = [tuple(i) for i in diag] elif type(diag) == np.ndarray: diag = [tuple(i) for i in diag] # just help to tell diag might be an array dgm = d.Diagram(diag) return dgm
def flip_dgm(dgm): import dionysus as d for p in dgm: if np.float(p.birth) < np.float(p.death): return dgm assert np.float(p.birth) >= np.float(p.death) data = [(np.float(p.death), np.float(p.birth)) for p in dgm] return d.Diagram(data)
def gs2dgms(gs, fil='deg', fil_d='sub', norm=False, one_hom=False, debug_flag=False, **kwargs): """ serial computing dgms :param gs: a list of raw nx graphs(no function value) :param fil: filtration(deg, ricci) :param fil_d : sub or sup :param norm: whether normalize or not :param one_hom: one homology or not :param debug_flag: False by default :return: dgms: a list of dgm """ dgms = [] for i in range(len(gs)): if debug_flag: print( f'process {i}-th graph({len(gs[i])}/{len(nx.edges(gs[i]))}) where one_hom is {one_hom} fil is {fil} and fil_d is {fil_d}' ) components = component_graphs(gs[i]) # todo chnage back to 4 # components4 = component_graphs(gs[i], threshold=4) #todo # components5 = component_graphs(gs[i], threshold=5) #todo # print(f'threshold 4/5 has {len(components4)}/{len(components5)}') if len(components) == 0: return d.Diagram([[0, 0]]) dgm = d.Diagram([]) for component in components: tmp_dgm = node_fil_(g=component, fil=fil, fil_d=fil_d, norm=norm, one_hom=one_hom, **kwargs) dgm = add_dgm(dgm, tmp_dgm) dgm = dgm_filter(dgm) # TODO: implement edge_fil_ assert len(dgm) > 0 dgms.append(dgm) return dgms
def normalize_dgm(dgm): import numpy as np max_ = 0 for p in dgm: max_ = max(max_, max(np.float(abs(p.birth)), np.float(abs(p.death)))) max_ = np.float(max_) data = [(np.float(p.death) / max_, np.float(p.birth) / max_) for p in dgm] return d.Diagram(data)
def load_diagram(file, dgm): """ Load a diagram saved as a list of tuples in numpy format :param file: :param dgm: :return: """ barcode = np.load(file) return di.Diagram(barcode)
def flip_dgm(dgm): # flip dgm from below to above, not vise versa for p in dgm: if np.float(p.birth) < np.float(p.death): assert_dgm_above(dgm) return dgm assert np.float(p.birth) >= np.float(p.death) data = [(np.float(p.death), np.float(p.birth)) for p in dgm] return d.Diagram(data)
def fake_diagrams(graphs_, dgms, true_dgms = ['null']*10000, attribute='deg', seed=45): fake_dgms = [] for i in range(len(graphs_)): cardinality = len(dgms[i]) if len(graphs_[i])==0: fake_dgms.append(d.Diagram([(0,0)])) continue tmp_dgm = fake_diagram(graphs_[i][0], cardinality = cardinality, attribute=attribute, seed=seed, true_dgm=true_dgms[i]) fake_dgms.append(tmp_dgm) return fake_dgms
def load_clfdgm(idx=1, ntda=False): dgm = d.Diagram([[np.random.random(), 1]]) for fil_d in ['sub']: #['sub', 'sup', 'epd']: dir = os.path.join(DIRECT, graph, fil, fil_d, 'norm_True', '') if ntda: dir = os.path.join(DIRECT, graph, 'ntda_True', fil, fil_d, 'norm_True', '') f = dir + str(idx) + '.csv' try: tmp_dgm = load_dgm(dir, filename=f) except FileNotFoundError: print( f'{f} of size {all_dataset[idx].pos.shape[0]}/{all_dataset[idx].face.shape[1]} not found. Added a dummy one' ) tmp_dgm = d.Diagram([[0, 0]]) dgm = add_dgm(dgm, tmp_dgm) # print(f'finsih {idx}-th diagram') return dgm
def compute_PD(self, simplices, sub=True, inf_flag='False', zigzag = False): def cmp(a, b): return (a > b) - (a < b) def compare(s1, s2, sub_flag=True): if sub_flag == True: if s1.dimension() > s2.dimension(): return 1 elif s1.dimension() < s2.dimension(): return -1 else: return cmp(s1.data, s2.data) elif sub_flag == False: return -compare(s1, s2, sub_flag=True) def zigzag_less(x, y): # x, y are simplex dimx, datax = x.dimension(), x.data dimy, datay = y.dimension(), y.data if dimx == dimy == 0: return datax <= datay elif dimx == dimy == 1: return datax >= datay else: return dimx < dimy f = d.Filtration() for simplex, time in simplices: f.append(d.Simplex(simplex, time)) if not zigzag: f.sort() if sub else f.sort(reverse=True) else: f.sort(zigzag_less, reverse=True) # print('After zigzag\n') # print_f(f) # simplices = [([2], 4), ([1, 2], 5), ([0, 2], 6),([0], 1), ([1], 2), ([0, 1], 3)] # f = d.Filtration() # for vertices, time in simplices: # f.append(d.Simplex(vertices, time)) # f.append(d.Simplex(vertices, time)) # f.sort(cmp=zigzag_less,reverse=True) # print_f(f) m = d.homology_persistence(f) dgms = d.init_diagrams(m, f) if inf_flag == 'False': dgms = self.del_inf(dgms) # for some degenerate case, return dgm(0,0) if (dgms == []) or (dgms == None): return d.Diagram([[0,0]]) return dgms
def del_inf(self, dgms): # remove inf dgms_list = [[], []] for i in range(2): pt_list = list() for pt in dgms[i]: if (pt.birth == float('inf')) or (pt.death == float('inf')): pass else: pt_list.append(tuple([pt.birth, pt.death])) diagram = d.Diagram(pt_list) dgms_list[i] = diagram return dgms_list
def barcode_to_diagram(barcode): """ Transform a list of tuple into a dionysus diagram :param barcode: :return: """ l = [] for bar in barcode: if len(bar) < 2: l.append((bar[0], np.inf)) elif int(bar[0] * 1000) != int(bar[1] * 1000): l.append(bar) return di.Diagram(l)
def gs2dgms(gs, fil='deg', fil_d='sub', norm=False, one_hom=False, debug_flag=False, **kwargs): """ serial computing dgms :param gs: a list of nx graphs :param fil: filtration(deg, ricci) :param fil_d : sub or sup :param norm: whether normalize or not :param one_hom: one homology or not :param debug_flag: False by default :return: dgms: a list of dgm """ dgms = [] for i in range(len(gs)): if debug_flag: print('processing %s-th graph where fil is %s and fil_d is %s' % (i, fil, fil_d)) components = component_graphs(gs[i]) if len(components) == 0: return d.Diagram([[0, 0]]) dgm = d.Diagram([]) for component in components: tmp_dgm = node_fil_(component, fil=fil, fil_d=fil_d, norm=norm, one_hom=one_hom, **kwargs) dgm = add_dgm(dgm, tmp_dgm) dgm = dgm_filter(dgm) assert len(dgm) > 0 dgms.append(dgm) return dgms
def get_diagram(self, g, key='fv', subflag = 'True', one_homology_flag=False, parallel_flag = False, zigzag = False): # only return 0-homology of sublevel filtration TODO: include one homology # type can be tuple or pd. tuple can be parallized, pd cannot. """ for a graph with a function on its nodes or edges defined, compute its 0-persistence diagram. :param g: graph :param key: 'fv' :param subflag: :param one_homology_flag: :param parallel_flag: :param zigzag: True of edge based filtration :return: """ g = nx.convert_node_labels_to_integers(g) simplices = self.get_simplices(g, key = key) if one_homology_flag: epd_dgm = self.epd(self, g, pd_flag=False)[1] epd_dgm = self.post_process(epd_dgm) return epd_dgm super_dgms = self.compute_PD(simplices, sub=False) sub_dgms = self.compute_PD(simplices, sub=True) if not zigzag else self.compute_PD(simplices, zigzag=True) _min = min([g.node[n][key] for n in g.nodes()]) _max = max([g.node[n][key] for n in g.nodes()])+ 1e-5 # avoid the extra node lies on diagonal p_min = d.Diagram([(_min, _max)]) p_max = d.Diagram([(_max, _min)]) sub_dgms[0].append(p_min[0]) super_dgms[0].append(p_max[0]) if subflag=='True': return sub_dgms[0] if not parallel_flag else dgm2diag(sub_dgms[0]) elif subflag=='False': return super_dgms[0] if not parallel_flag else dgm2diag(super_dgms[0]) else: raise Exception('subflag can be either True or False')
def post_process(self, dgm, debug_flag=False): if len(dgm) == 0: return d.Diagram([(0, 0)]) for p in dgm: if p.birth == np.float('-inf'): p.birth = 0 if p.death == np.float('inf'): p.death = 0 if debug_flag == True: print('Before flip:'), print_dgm(dgm) dgm = flip_dgm(dgm) if debug_flag == True: print('After:'), print_dgm(dgm) return dgm
def g2dgm(i, g=None, fil='deg', fil_d='sub', norm=False, one_hom=False, debug_flag=False, **kwargs): """ a wrapper of node_fil_ for parallel computing dgms. :param g: :param fil: :param fil_d: sub/super :param norm: False by default :param one_hom: False by default :param debug_flag: False by default :param kwargs: :return: """ # assert 'gs' in globals().keys() # g = gs[i].copy() if debug_flag: print('processing %s-th graph where fil is %s and fil_d is %s' % (i, fil, fil_d)) components = component_graphs(g) dgm = d.Diagram([]) for component in components: if fil in ['jaccard', 'ricci', 'edge_p']: tmp_dgm = edge_fil_(component, fil=fil, fil_d=fil_d, norm=norm, one_hom=one_hom, **kwargs) print_dgm(tmp_dgm) else: tmp_dgm = node_fil_(component, fil=fil, fil_d=fil_d, norm=norm, one_hom=one_hom, **kwargs) dgm = add_dgm(dgm, tmp_dgm) dgm = dgm_filter(dgm) dgm = dgm_filter(dgm) # handle the case when comonents is empty return dgm
def compute_PD(self, simplices, sub=True, inf_flag='False'): def cmp(a, b): return (a > b) - (a < b) def compare(s1, s2, sub_flag=True): if sub_flag == True: if s1.dimension() > s2.dimension(): return 1 elif s1.dimension() < s2.dimension(): return -1 else: return cmp(s1.data, s2.data) elif sub_flag == False: return -compare(s1, s2, sub_flag=True) node_simplices, edge_simplices = list(), list() for simplex, time in simplices: if len(simplex) == 1: node_simplices.append((simplex, time)) elif len(simplex) == 2: edge_simplices.append((simplex, time)) else: raise Exception('Expect Dim of simplex be either 1 or 2') f_node, f_edge = d.Filtration(), d.Filtration() for simplex, time in node_simplices: f_node.append(d.Simplex(simplex, time)) f_node.sort() for simplex, time in edge_simplices: f_edge.append(d.Simplex(simplex, time)) f_edge.sort(reverse=True) m = d.homology_persistence(f_node) dgms = d.init_diagrams(m, f_node) if inf_flag == 'False': dgms = self.del_inf(dgms) # for some degenerate case, return dgm(0,0) if (dgms == []) or (dgms == None): return d.Diagram([[0,0]]) return dgms
def fake_diagram(g, cardinality = 2, attribute='deg', seed=42, true_dgm = 'null'): random.seed(seed) sample_pool = nx.get_node_attributes(g, attribute).values() if true_dgm != 'null': tmp = dgm2diag(true_dgm) # tmp is array sample_pool = [p[0] for p in tmp] + [p[1] for p in tmp] try: sample = random.choice(sample_pool, size=2*cardinality, replace=False) except: sample = random.choice(sample_pool, size=2 * cardinality, replace=True) assert set(sample).issubset(set(sample_pool)) dgm = [] for i in range(0, len(sample),2): x_ = sample[i] y_ = sample[i+1] dgm.append((min(x_, y_), max(x_, y_)+1e-3)) return d.Diagram(dgm)
def viz_vector(): # https: // matplotlib.org / users / pyplot_tutorial.html dgm = d.Diagram([(2, 3), (3, 4)]) from Esme.dgms.format import dgmxy dgmx, dgmy = dgmxy(dgm) dgms = [dgm] * 2 params = { 'bandwidth': 1.0, 'weight': (1, 1), 'im_range': [0, 1, 0, 1], 'resolution': [5, 5] } image = dgms2vec(dgms, vectype='pi', **params) images = merge_dgms(dgms, dgms, vectype='pi', **params) print(np.shape(image), np.shape(images)) plt.figure() plt.subplot(121) plt.scatter(dgmx, dgmy) plt.subplot(122) plt.plot(images.T) # (n_image, dim) plt.show()
def g2dgm(i, g=None, fil='deg', fil_d='sub', norm=False, one_hom=False, debug_flag=False, **kwargs): """ a wrapper of node_fil_ for parallel computing dgms. :param g: :param fil: :param fil_d: :param norm: False by default :param one_hom: False by default :param debug_flag: False by default :param kwargs: :return: """ # assert 'gs' in globals().keys() # g = gs[i].copy() if len(g) > 60000: return d.Diagram([[0, 0]]) # todo better handling if debug_flag: print('in g2dm', kwargs) i += kwargs.get('a', 0) print( f'processing {i}-th graph({len(g)}/{len(g.edges)}) where fil is {fil} and fil_d is {fil_d} and one_hom is {one_hom}' ) if kwargs.get('write', None) == True: # 一个后门 fil_d_ = 'epd' if one_hom == True else fil_d # if check_single_dgm(graph = 'mn'+version, fil = fil, fil_d=fil_d_, norm=norm, idx=i): return components = component_graphs(g) dgm = d.Diagram([]) for component in components: if fil in ['jaccard']: tmp_dgm = edge_fil_(component, fil=fil, fil_d=fil_d, norm=norm, one_hom=one_hom, **kwargs) print_dgm(tmp_dgm) else: tmp_dgm = node_fil_(g=component, fil=fil, fil_d=fil_d, norm=norm, one_hom=one_hom, **kwargs) dgm = add_dgm(dgm, tmp_dgm) dgm = dgm_filter(dgm) dgm = dgm_filter(dgm) # handle the case when comonents is empty if kwargs.get('write', None) == True: # 一个后门 if one_hom == True: fil_d = 'epd' fil_save = fil + '_nbr' + str(args.nbr_size) + '_exp' + str(args.exp) ntda = 'ntda_' + str(kwargs.get('ntda', 'NotFound')) dir = os.path.join( '/home/cai.507/anaconda3/lib/python3.6/site-packages/save_dgms/', 'mn' + version, ntda, fil_save, fil_d, 'norm_' + str(norm), '') export_dgm(dgm, dir=dir, filename=str(i) + '.csv', print_flag=True) return dgm
def dgm_filter(dgm): """ if input is an empyt dgm, add origin point """ if len(dgm) > 0: return dgm else: return d.Diagram([[0, 0]])
for dgm in dgms: dgm = permute(dgm, seed=seed, seed_flag=seed_flag) permuted_dgms_list.append(dgm) return permuted_dgms_list else: assert permute_ratio < 1 n = len(dgms) permute_idx = random_.sample(range(n), int(n * permute_ratio)) for i in range(n): if i in permute_idx: dgm = permute(dgms[i], seed=seed, seed_flag=seed_flag) else: dgm = dgms[i] permuted_dgms_list.append(dgm) return permuted_dgms_list else: return dgms if __name__ == "__main__": dgm = d.Diagram([[1,2], [3,4], [5,6], [7,8]]) from Esme.dgms.format import normalize_dgm dgm = normalize_dgm(dgm) x = coordinate(dgm, dim=20) print(x,x.shape) dgms = [dgm] * 3 dgms = permute_dgms(dgms, permute_flag=True, permute_ratio=1, seed_flag=False, seed=49) for dgm in dgms: print_dgm(dgm) print()
def test_issue39(): dgm1 = np.loadtxt('data/issue39/dgm1.txt', delimiter=',') dgm2 = np.loadtxt('data/issue39/dgm2.txt', delimiter=',') dgm1 = d.Diagram(dgm1) dgm2 = d.Diagram(dgm2) dist = d.wasserstein_distance(dgm1,dgm2,q=5)
def remove_inf(D): # return [dio.Diagram([(p.birth, p.death if p.death < np.inf else 0) for p in d]) for d in D] return [ dio.Diagram([(p.birth, p.death) for p in d if p.death < np.inf]) for d in D ]