def compute_dynamic_filtration2(self, x, hiddens, percentile=0, return_nm=False, absolute_value=True, input_layer=False): id = 0 f = dion.Filtration() enums = [] nm = {} wm = {} params = self.params percentiles = np.zeros((len(params))) def collect_result(res): nonlocal id nonlocal f nonlocal nm nonlocal wm for enum in res: nodes = enum[0] weight = enum[1][0] if len(nodes) == 1: if nodes[0] not in nm: nm[nodes[0]] = id id += 1 f.append(dion.Simplex([nm[nodes[0]]], weight)) else: f.append(dion.Simplex([nm[nodes[0]]], weight)) if len(nodes) == 2: act_weight = enum[1][1] if nodes[0] not in nm: nm[nodes[0]] = id id += 1 if nodes[1] not in nm: nm[nodes[1]] = id id += 1 wm[(nodes[0], nodes[1])] = act_weight f.append(dion.Simplex([nm[nodes[0]], nm[nodes[1]]], weight)) x = x.cpu().detach().numpy() num_channels = x.shape[0] l = 0 percentiles[l] = compute_percentiles(hiddens[l].cpu().detach().numpy(), percentile, absolute_value) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] nls = hn.shape[0] stride = 1 for c in range(num_channels): p = params[l].weight.data[:, c, :, :] mat = conv_layer_as_matrix(p, x[c], stride) m1, h0_births, h1_births = conv_filtration_fast2( x[c], mat, l, c, nlc, nls, percentile=percentiles[l], absolute_value=absolute_value) if input_layer: enums = m1 enums += [([spec_hash((l, c, i[0]))], [h0_births[i].item()]) for i in np.argwhere(h0_births > percentile)] else: enums = [] enums += [([spec_hash( (l + 1, i[0] // nlc, i[0] % nls))], [h1_births[i].item()]) for i in np.argwhere(h1_births > percentile)] collect_result(enums) h1 = hiddens[l].cpu().detach().numpy() l = 1 percentiles[l] = compute_percentiles(hiddens[l].cpu().detach().numpy(), percentile, absolute_value) p = params[l] m1, h0_births, h1_births = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l], absolute_value=absolute_value) enums = m1 comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, c, i[0]))], [h0_births[i]]) for i in np.argwhere(h0_births > comp_percentile)] h1 = hiddens[l].cpu().detach().numpy() l = 2 percentiles[l] = compute_percentiles(hiddens[l].cpu().detach().numpy(), percentile, absolute_value) p = params[l] m1, h0_births, h1_births_2 = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l], absolute_value=absolute_value) enums += m1 max1 = np.maximum.reduce([h0_births, h1_births]) comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, 0, i[0]))], [max1[i]]) for i in np.argwhere(max1 > comp_percentile)] enums += [([spec_hash((l + 1, 0, i[0]))], [h1_births_2[i]]) for i in np.argwhere(h1_births_2 > percentiles[l])] collect_result(enums) print('filtration size', len(f)) f.sort(reverse=True) if return_nm: return f, nm, wm else: return f
def compute_dynamic_filtration_no_inf(self, x, hiddens, percentile=0, return_nm=False): id = 0 f = dion.Filtration() f.append(dion.Simplex([-1], 0)) enums = [] nm = {(-1, 0, 0): -1} params = self.params percentiles = np.zeros((len(params))) def collect_result(res): nonlocal id nonlocal f nonlocal nm for enum in res: nodes = enum[0] weight = enum[1] if len(nodes) == 1: if nodes[0] not in nm: nm[nodes[0]] = id id += 1 f.append(dion.Simplex([nm[nodes[0]]], weight)) else: f.append(dion.Simplex([nm[nodes[0]]], weight)) f.append(dion.Simplex([nm[nodes[0]], -1], 0)) if len(nodes) == 2: if nodes[0] not in nm: nm[nodes[0]] = id id += 1 if nodes[1] not in nm: nm[nodes[1]] = id id += 1 f.append(dion.Simplex([nm[nodes[0]], nm[nodes[1]]], weight)) x = x.cpu().detach().numpy() num_channels = x.shape[0] l = 0 percentiles[l] = np.percentile( np.absolute(hiddens[l].cpu().detach().numpy()), percentile) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] stride = 1 for c in range(num_channels): p = params[l].weight.data[:, c, :, :] mat = conv_layer_as_matrix(p, x[c], stride) m1, h0_births, h1_births = conv_filtration_fast2( x[c], mat, l, c, nlc, percentile=percentiles[l]) # enums = m1 # enums += [([spec_hash((l,c,i[0]))], h0_births[i].item()) for i in np.argwhere(h0_births > percentile)] enums = [] enums += [([spec_hash( (l + 1, i[0] // nlc, i[0] % nlc))], h1_births[i].item()) for i in np.argwhere(h1_births > percentile)] collect_result(enums) h1 = hiddens[l].cpu().detach().numpy() l = 1 percentiles[l] = np.percentile( np.absolute(hiddens[l].cpu().detach().numpy()), percentile) p = params[l] m1, h0_births, h1_births = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l]) enums += m1 comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, c, i[0]))], h0_births[i]) for i in np.argwhere(h0_births > comp_percentile)] h1 = hiddens[l].cpu().detach().numpy() l = 2 percentiles[l] = np.percentile( np.absolute(hiddens[l].cpu().detach().numpy()), percentile) p = params[l] m1, h0_births, h1_births_2 = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l]) enums += m1 max1 = np.maximum.reduce([h0_births, h1_births]) comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, 0, i[0]))], max1[i]) for i in np.argwhere(max1 > comp_percentile)] enums += [([spec_hash((l + 1, 0, i[0]))], h1_births_2[i]) for i in np.argwhere(h1_births_2 > percentiles[l])] collect_result(enums) # h1_id_start = x.cpu().detach().numpy().reshape(-1).shape[0] # print('h1_id_start', h1_id_start) # f, h1_births = conv_filtration(f, x[0], self.conv1.weight.data[:,0,:,:], 0, h1_id_start, percentile=percentile) # # h2_id_start = h1_id_start + hiddens[0].cpu().detach().numpy().shape[0] # print('h2_id_start', h2_id_start) # f, h2_births = linear_filtration(f, hiddens[0], self.fc1, h1_births, h1_id_start, h2_id_start, percentile=percentile, last=False) # # h3_id_start = h2_id_start + hiddens[1].cpu().detach().numpy().shape[0] # print('h3_id_start', h3_id_start) # f = linear_filtration(f, hiddens[1], self.fc2, h2_births, h2_id_start, h3_id_start, percentile=percentile, last=True) print('filtration size', len(f)) f.sort(reverse=True) if return_nm: return f, nm else: return f
return pc def L2RankDist(rankA, rankB, weightfun): diff = { key: get_rankval(rankA, key) - get_rankval(rankB, key) for key in weightfun } return sum(math.pow(diff[key], 2) * weightfun[key] for key in weightfun) for i in range(num_sam1 + num_sam2): filename = './samples/samples{:d}.csv'.format(i) samples.append(np.genfromtxt(filename, delimiter=',')) simplices = diode.fill_alpha_shapes(samples[-1]) f = d.Filtration(simplices) m = d.homology_persistence(f) dgms = d.init_diagrams(m, f) dgms_list.append(dgms[homology_dimension]) local_birth_max = np.max([pt.birth for pt in dgms[homology_dimension]]) birth_max = max(birth_max, local_birth_max) local_death_max = np.max([pt.death for pt in dgms[homology_dimension]]) death_max = max(death_max, local_death_max) grid = (birth_max + death_max) / 100 rankfuns = [] for dgm in dgms_list: print(dgm) bvals = np.arange(0.0, np.max([pt.birth for pt in dgm]), grid) dvals = np.arange(0.0, np.max([pt.death for pt in dgm]), grid)
from torchvision import datasets, transforms import dionysus as dion import networkx as nx import matplotlib.pyplot as plt import multiprocessing as mp import numpy as np import pandas as pd from homo_explico.functions.filtration import conv_filtration_fast2, linear_filtration_fast2, max_pooling_filtration, conv_layer_as_matrix, spec_hash enums = [] id = 0 nm = {} f = dion.Filtration() def first_layer(x, p, l, c, percentile, stride, nlc): mat = conv_layer_as_matrix(p, x, stride) m1, h0_births, h1_births = conv_filtration_fast2(x, mat, l, c, nlc, percentile=percentile) enums = m1 enums += [([spec_hash((l, c, i[0]))], h0_births[i].item()) for i in np.argwhere(h0_births > percentile)] enums += [([spec_hash( (l + 1, i[0] // nlc, i[0] % nlc))], h1_births[i].item())
def GCC2(dataset,threshold,maxscale,CEthreshold=1e-5,lp=1,lq=2,Nsteps=1000,lambda_coef=0.5,PLOT=True): sys.argv = np.asarray(['GCC2_fun.GCC2','dataset',threshold,maxscale,CEthreshold,lp,lq,Nsteps,lambda_coef]) return_list = [] print('Number of arguments:', str(len(sys.argv)), 'arguments.') if len(sys.argv)!=8 and len(sys.argv)!=9: print(""" ### usage: GCC2(dataset,threshold,maxscale,CEthreshold=1e-5,lp=1,lq=2,Nsteps=1000,lambda_coef=0.5) ###[dataset] The dataset you want to analyze using circular coordinates in numpy array. The cols of the array are dimensions/variables; the rows of the array are samples. ###[threshold] The threhold on persistence which we use to select those significant cocyles from all cocycles constructed from the Vietoris-Rips complex built upon the data. If negative integer M, the 1-cocycles with the 1,2,...,M-th largest persistence will be picked. This option would override the threshold option. ###[CEthreshold] The threshold that we use to determine the constant edges. When the coordinate functions' values changed below this threshold, we consider it as a constant edge and plot it. ###[maxscal] The maximal scale at which we shall construct the Vietoris-Rips complex for circular coordinate computation. ###[lp] [lq] The generalized penalty function is in form of (1-lambda_parameter)*L^[lp]+lambda_parameter*L^[lq]. ###[Nsteps] How many iterations you want to run in the tensorflow optimizer to obtain our circular coordinates? If negative number, no optimization would be executed. ###[lambda] This is a float parameter, if supplied, then only that lambda in the genealized coordinate would be calculated. ###Functionality of this code. ####Part1: Construct the Vietoris-Rips complex built upon the data and associated persistence diagrams and barcodes. #####Scatter plot and associated persistence diagrams and barcodes, with significant topological features selected. ####Part2: Output the circular coordinates with different penalty functions. ####Part3: Output the embeddings with different penalty functions.""") return return_list print('Argument List:', str(sys.argv),'\n') filenam=sys.argv[1] print('Data file:', filenam) #From Python_code/utils.py def coboundary_1(vr, thr): D = [[],[]] data = [] indexing = {} ix = [0]*2 for s in vr: if s.dimension() != 1: continue elif s.data > thr: #break continue indexing.setdefault(s.dimension(),{}) indexing.setdefault(s.dimension()-1,{}) if not s in indexing[s.dimension()]: indexing[s.dimension()][s] = ix[s.dimension()] ix[s.dimension()] += 1 for dat, k in enumerate(s.boundary()): if not k in indexing[s.dimension()-1]: indexing[k.dimension()][k] = k[0] ix[k.dimension()] += 1 D[0].append(indexing[s.dimension()][s]) #rows D[1].append(indexing[k.dimension()][k]) #cols data.append(1. if dat % 2 == 0 else -1.) return sp.sparse.csr_matrix((data, (D[0], D[1]))), indexing def optimizer_inputs(vr, bars, cocycle, init_z, prime, thr): bdry,indexing = coboundary_1(vr,thr) n, m = bdry.shape # edges X nodes #----------------- l2_cocycle = [0]*len(init_z) #reorganize the coordinates so they fit with the coboundary indices for i, coeff in enumerate(init_z): l2_cocycle[i] = coeff l2_cocycle = np.array(l2_cocycle) #----------------- f = np.zeros((n,1)) # cocycle we need to smooth out, reorganize to fit coboundary for c2 in cocycle: if c2.element<(prime//2): f[indexing[1][vr[c2.index]]] += c2.element else: f[indexing[1][vr[c2.index]]] += c2.element-prime return l2_cocycle,f,bdry #Dionysus2 only. #This code is composed in such way that it produces the whole thing in a single pdf file. #dataset = np.loadtxt(filenam) #from matplotlib.backends.backend_pdf import PdfPages title_str='Circular Coordinates with Generalized Penalty Functions' # Create the PdfPages object to which we will save the pages: # The with statement makes sure that the PdfPages object is closed properly at the end of the block, even if an Exception occurs. os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' filenam = os.path.splitext(filenam)[0] pdfnam = filenam+'_output.pdf' print('Output file:', pdfnam,'\n') now = datetime.datetime.now() print('>>>>>>Start Time(VR computation):',now.strftime("%Y-%m-%d %H:%M:%S")) ############################## #Scatter plots for datapoints# if PLOT: fig = plt.figure(figsize=(5,5), dpi=100) plt.xlabel('X') plt.ylabel('Y') plt.gca().set_aspect('equal', 'datalim') plt.scatter(dataset.T[0,:],dataset.T[1,:],s=10, c='b') plt.axis('equal') plt.title('Scatter plot of data points') #pdf.savefig(fig) # # ############################## ############################## #Compute Persistence Diagrams# prime = 23 maxscale = float(sys.argv[3]) #Choose the prime base for the coefficient field that we use to construct the persistence cohomology. threshold = float(sys.argv[2]) print('Base coefficient field: Z/', prime ,'Z',sep='') print('Maximal scale:', float(sys.argv[3])) print('Persistence threshold for selecting significant cocyles:',threshold) vr = dionysus.fill_rips(dataset, 2, float(sys.argv[3])) #Vietoris-Rips complex cp = dionysus.cohomology_persistence(vr, prime, True) #Create the persistent cohomology based on the chosen parameters. dgms = dionysus.init_diagrams(cp, vr) #Calculate the persistent diagram using the designated coefficient field and complex. now = datetime.datetime.now() print('>>>>>>End Time (VR-computation):',now.strftime("%Y-%m-%d %H:%M:%S")) ###Plot the barcode and diagrams using matplotlib incarnation within Dionysus2. This mechanism is different in Dionysus. if PLOT: #Plots of persistence barcodes of Vietoris-Rips complex for dimension 0 and 1. fig = plt.figure(figsize=(5,5), dpi=100) plt.title('Persistence Barcode for dim 0') dionysus.plot.plot_bars(dgms[0], show=True) #pdf.savefig(fig) fig = plt.figure(figsize=(5,5), dpi=100) plt.title('Persistence Barcode for dim 1') dionysus.plot.plot_bars(dgms[1], show=True) #pdf.savefig(fig) plt.show() #Plots of persistence diagrams of Vietoris-Rips complex for dimension 0 and 1. fig = plt.figure(figsize=(5,5), dpi=100) plt.title('Persistence Diagram for dim 0') dionysus.plot.plot_diagram(dgms[0], show=True) #pdf.savefig(fig) fig = plt.figure(figsize=(5,5), dpi=100) plt.title('Persistence Diagram for dim 1') dionysus.plot.plot_diagram(dgms[1], show=True) #pdf.savefig(fig) plt.show() ######Select and highlight the features selected. bars = [bar for bar in dgms[1] ] #Choose cocycle that persist at least threshold we choose. cocycles = [cp.cocycle(bar.data) for bar in bars] #Sort the list 'cocycles' by the persistence corresponding to each bar cocycles_persistence = [bar.death-bar.birth for bar in bars] #print(cocycles_persistence) cocycles_ind = np.argsort(-np.asarray(cocycles_persistence), axis=-1, kind=None, order=None) cocycles = [cocycles[i] for i in cocycles_ind] bars = [bars[i] for i in cocycles_ind] if threshold<0: leadings = int(np.abs(threshold)) #Override threshold option threshold = 0 print('\n>>>>>>Threshold overridden, the 1-cocyles with the ',leadings,' largest persistence would be selected for computation.') cocycles = cocycles[0:leadings] bars = bars[0:leadings] else: bars = [bar for bar in dgms[1] if bar.death-bar.birth > threshold and bar.death-bar.birth < float(sys.argv[3]) ] cocycles = [cp.cocycle(bar.data) for bar in bars] print('>>>>>>Selected significant features:') for B_Lt in bars: print(B_Lt,'\tpersistence = ',B_Lt.death-B_Lt.birth) #################### #PersistenceBarcode# if PLOT: #Red highlight ***ALL*** cocyles that persist more than threshold value on barcode, when more than one cocyles have persisted over threshold values, this plots the first one. fig = plt.figure(figsize=(5,5), dpi=100) dionysus.plot.plot_bars(dgms[1], show=False) Lt1 = [[bar.birth,bar.death] for bar in bars] #Lt1 stores the bars with persistence greater than the [threshold]. Lt1_tmp = [[bar.birth,bar.death] for bar in dgms[1] ] for Lt in Lt1: loc=0 target=Lt for g in range(len(Lt1_tmp)): if Lt1_tmp[g][0] == target[0] and Lt1_tmp[g][1] == target[1]: loc=g #Searching correct term plt.plot([Lt[0],Lt[1]],[loc,loc],'r-') #print(Lt) plt.title('Selected cocycles on barcodes (red bars)') #pdf.savefig(fig) plt.show() # # #################### #################### #PersistenceDiagram# if PLOT: #Red highlight ***ALL*** cocyles that persist more than threshold value on diagram. fig = plt.figure(figsize=(5,5), dpi=100) dionysus.plot.plot_diagram(dgms[1], show=False) Lt2 = [[point.birth,point.death] for point in bars ] #Lt2 stores the (multi-)points with persistence greater than the [threshold]. for Lt in Lt2: plt.plot(Lt[0],Lt[1],'ro') plt.title('Selected cocycles on diagram (red points)') plt.figure(figsize=(5,5), dpi=100) #pdf.savefig(fig) plt.show() # # #################### # # ############################## ############################## # Visualization of GCC # overall_coords = np.zeros(dataset.shape[0], dtype = float) #from Python_code import utils toll = float(sys.argv[4])#tolerance for constant edges. print('\nConstant edges, with coordinates difference <',toll) print('Optimizer maximal iteration steps=',int(sys.argv[7])) lp=int(sys.argv[5]) lq=int(sys.argv[6]) now = datetime.datetime.now() print('>>>>>> Start Time (GCC computation):',now.strftime("%Y-%m-%d %H:%M:%S")) if len(sys.argv)>=9: lambda_list = [float(sys.argv[8])] else: lambda_list = [0,0.5,1] for lambda_parameter in lambda_list: print('>>>>>> lambda = ',lambda_parameter,'. => Analysis of Circular coordinates \n (mod {} - {}*L{} + {}*L{})'.format(prime,1-lambda_parameter,lp,lambda_parameter,lq)) embedding = [] if PLOT: fig = plt.figure(figsize=(5,5), dpi=100) plt.text(0.3,0.5,'Analysis of Circular coordinates \n (mod {} - {}*L{} + {}*L{})'.format(prime,1-lambda_parameter,lp,lambda_parameter,lq),transform=plt.gca().transAxes) #pdf.savefig(fig) plt.show() print('Penalty function =>',(1-lambda_parameter),'*L^',lp,"+",lambda_parameter,"*L^",lq,sep='') for g in range(len(cocycles)): chosen_cocycle = cocycles[g] chosen_bar = bars[g] #print(chosen_cocycle,chosen_bar) NEW_THRESHOLD = max([vr[c2.index].data for c2 in chosen_cocycle]) vr_L2 = dionysus.Filtration([s for s in vr if s.data <=NEW_THRESHOLD]) coords = dionysus.smooth(vr_L2, chosen_cocycle, prime) l2_cocycle,f,bdry = optimizer_inputs(vr, bars, chosen_cocycle, coords, prime, NEW_THRESHOLD) l2_cocycle = l2_cocycle.reshape(-1, 1) ##It does not seem to work to have double invokes here... B_mat = bdry.todense() z_init = l2_cocycle z = tf.Variable(z_init, name='z', trainable=True, dtype=tf.float64) trainable_variables = [z] def loss_function(): cost_z = (1-lambda_parameter)*tf.math.pow( tf.math.reduce_sum( tf.math.pow( tf.abs(f - tf.linalg.matmul(B_mat,z) ),lp ) ), 1/lp) + lambda_parameter*tf.math.pow( tf.math.reduce_sum( tf.math.pow( tf.math.abs(f - tf.linalg.matmul(B_mat,z) ),lq ) ), 1/lq) return cost_z #print(B_mat.shape) #l2_cocycle=np.zeros((B_mat.shape[1],1)) tf.random.set_seed(1) optimizer = tf.optimizers.Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=False) #optimizer = tf.keras.optimizers.SGD(learning_rate=1e-4) #Not recommended if you are not using tensorflow-gpu, may result a different result sometimes. B_mat = tf.Variable(B_mat, name="B_mat", trainable=False, dtype=tf.float64) f = tf.Variable(f, name="f", trainable=False, dtype=tf.float64) if int(sys.argv[7])>0: print('Before optim cost:',loss_function().numpy()) for i in range(int(sys.argv[7])): train = optimizer.minimize(loss_function, var_list=trainable_variables) #err = np.sum(np.abs(z.numpy() - z_init)) #print('>>> step',train.numpy(),' err=',err) print('After optim cost:',loss_function().numpy(),' in ',train.numpy(),' steps') res_tf=z.numpy() else: print('Non-optimized cost:',loss_function().numpy()) res_tf=z_init #print("Absolute differentiation =>",np.sum(np.abs(res_tf-np.asarray(coords) ))) overall_coords=overall_coords+res_tf.T[0,:] color = np.mod(res_tf.T[0,:],1) #print(color) return_list.append(color) if PLOT: fig = plt.figure(figsize=(5,5), dpi=100) plt.scatter(dataset.T[0,:],dataset.T[1,:],s=10, c=color, cmap="hsv",zorder=10) plt.clim(0,1) plt.colorbar() plt.axis('equal') plt.title('Circular coordinates {}-th cocyle (mod {} - {}*L{} + {}*L{})'.format(g+1,prime,1-lambda_parameter,lp,lambda_parameter,lq)) plt.show() edges_constant = [] thr = chosen_bar.birth #####Constatn edges #Want to check constant edges in all edges that were there when the cycle was created for s in vr: if s.dimension() != 1: continue elif s.data > thr: break if abs(color[s[0]]-color[s[1]]) <= toll: edges_constant.append([dataset[s[0],:],dataset[s[1],:]]) edges_constant = np.array(edges_constant) #pdf.savefig(fig) #print('Loop End Time:',now.strftime("%Y-%m-%d %H:%M:%S")) if PLOT: fig = plt.figure(figsize=(5,5), dpi=100) if edges_constant.T!=[]: plt.plot(edges_constant.T[0,:],edges_constant.T[1,:], c='k', alpha=.5) plt.scatter(dataset.T[0,:],dataset.T[1,:],s=10, c=color, cmap="hsv",zorder=10) plt.clim(0,1) plt.colorbar() plt.axis('equal') plt.title('Circular coordinates/constant edges, \n {}-th cocyle (mod {} - {}*L{} + {}*L{})'.format(g+1,prime,1-lambda_parameter,lp,lambda_parameter,lq)) #pdf.savefig(fig) plt.show() color_filenam = filenam+'_CircularCoordinates_'+str(lambda_parameter)+'_'+str(g)+'.txt' np.savetxt(color_filenam,color) print('Penalty function =>',(1-lambda_parameter),'*L^',lp,"+",lambda_parameter,"*L^",lq,' Coordinates=>',color_filenam,sep='') if PLOT: fig = plt.figure(figsize=(5,5), dpi=100) angle = np.arctan(dataset.T[0,:]/dataset.T[1,:]) plt.scatter(angle,color,s=10, c='b',zorder=10) plt.ylim([0,1]) plt.xlim([-np.pi/2,np.pi/2]) plt.title('Correlation plot against angle, \n {}-th cocyle (mod {} - {}*L{} + {}*L{})'.format(g+1,prime,1-lambda_parameter,lp,lambda_parameter,lq)) #pdf.savefig(fig) plt.show() embedding.extend([[np.sin(a) for a in 2*np.pi*color], [np.cos(a) for a in 2*np.pi*color]]) if PLOT: fig = plt.figure(figsize=(5,5), dpi=100) dist2 = np.sqrt(np.power(dataset.T[0,:],2)+np.power(dataset.T[1,:],2)) plt.scatter(dist2,color,s=10, c='b',zorder=10) plt.ylim([0,1]) plt.xlim([0,maxscale]) plt.title('Correlation plot aginst distance, \n {}-th cocyle (mod {} - {}*L{} + {}*L{})'.format(g+1,prime,1-lambda_parameter,lp,lambda_parameter,lq)) #pdf.savefig(fig) plt.show() emb_filenam = filenam+'_Embedding_'+str(lambda_parameter)+'.txt' np.savetxt(emb_filenam, np.array(embedding)) print('Penalty function =>',(1-lambda_parameter),'*L^',lp,"+",lambda_parameter,"*L^",lq,' Embeddings=>',emb_filenam,sep='') #We plot the final circular coordinates with all co-cycles combined. overall_edges_constant = [] overall_thr = float(sys.argv[2]) #For the combined coordinates, we choose the global threshold. for s in vr: if s.dimension() != 1: continue elif s.data > overall_thr: break if abs(overall_coords[s[0]]-overall_coords[s[1]]) <= toll: overall_edges_constant.append([dataset[s[0],:],dataset[s[1],:]]) overall_edges_constant = np.array(overall_edges_constant) if PLOT: #fig = plt.figure(figsize=(5,5), dpi=100) #if overall_edges_constant.T!=[]: # plt.plot(overall_edges_constant.T[0,:],overall_edges_constant.T[1,:], c='k', alpha=.5) #plt.scatter(dataset.T[0,:],dataset.T[1,:],s=10, c=overall_coords, cmap="hsv",zorder=10) #plt.clim(0,1) #plt.colorbar() #plt.axis('equal') #plt.title('Combined circular coordinates/constant edges \n (mod {} - {}*L{} + {}*L{})'.format(prime,1-lambda_parameter,lp,lambda_parameter,lq)) #pdf.savefig(fig) #plt.show() fig = plt.figure(figsize=(5,5), dpi=100) angle = np.arctan(dataset.T[0,:]/dataset.T[1,:]) plt.scatter(angle,overall_coords,s=10, c='b',zorder=10) plt.ylim([0,1]) plt.xlim([-np.pi/2,np.pi/2]) plt.title('Correlation plot \n (mod {} - {}*L{} + {}*L{})'.format(prime,1-lambda_parameter,lp,lambda_parameter,lq)) #pdf.savefig(fig) plt.show() fig = plt.figure(figsize=(5,5), dpi=100) dist2 = np.sqrt(np.power(dataset.T[0,:],2)+np.power(dataset.T[1,:],2)) #print(color) plt.scatter(dist2,color,s=10, c='b',zorder=10) plt.ylim([0,1]) plt.xlim([0,maxscale]) plt.title('Correlation plot aginst distance, \n {}-th cocyle (mod {} - {}*L{} + {}*L{})'.format(g+1,prime,1-lambda_parameter,lp,lambda_parameter,lq)) #pdf.savefig(fig) plt.show() now = datetime.datetime.now() print('>>>>>> End Time (GCC computation):',now.strftime("%Y-%m-%d %H:%M:%S")) return return_list
def setup_Zigzag(self, k=2): ''' Helper function for ``run_Zigzag`` that sets up inputs needed for Dionysus' zigzag persistence function. This only works for a fixed radius r. Parameters ---------- k: int, optional Max dimension for rips complex (default is 2) Returns ------- filtration: dio.Filtration Dionysis filtration containing all simplices that exist in zigzag sequence times: list List of times, where times[i] is a list containing [a,b] where simplex i appears at time a, and disappears at time b ''' # renames edges in the list based on the vert labels # so an edge [0,5] becomes [vert_labels[0], vert_labels[5]] def rename_edges(edge_list, vert_labels): vert_dict = {i: vert_labels[i] for i in range(len(vert_labels))} return np.vectorize(vert_dict.__getitem__)(edge_list) def make_undirected(A): for i in range(len(A)): for j in range(i): if A[i, j] > 0 or A[j, i] > 0: A[i, j] = 1 A[j, i] = 1 else: A[i, j] = 0 A[j, i] = 0 return A def fix_vert_nums(simp, labels): vlist = [] for v in simp: vlist.append(labels[v]) return vlist def get_tris(A, labels): A_u = make_undirected(A) A_u[A_u != 0] = 0.1 A_u[A_u == 0] = 10 np.fill_diagonal(A_u, 0) f = dio.fill_rips(squareform(A_u), k=2, r=1) tris = [fix_vert_nums(i, labels) for i in f if i.dimension() == 2] return tris simps_list = [] times_list = [] verts = np.unique(np.concatenate(self.vert_labels)) num_verts = len(verts) # Handle vertices... for v in verts: simps_list.append(dio.Simplex([v], 0)) s_times = [] simp_in = False # Find time simplex enters filtration for i in range(len(self.networks)): if v in self.vert_labels[i] and simp_in == False: if self.cplx_type == 'union': if i == 0: st = 0 else: st = i - 0.5 s_times.append(st) elif self.cplx_type == 'intersection': s_times.append(i) else: print('cplx_type not recognized...\nQuitting') return [], [] simp_in = True # Find time simplex exits filtration if v not in self.vert_labels[i] and simp_in == True: if self.cplx_type == 'union': s_times.append(i) elif self.cplx_type == 'intersection': s_times.append(i - 0.5) else: print('cplx_type not recognized...\nQuitting') return [], [] simp_in = False times_list.append(s_times) if self.verbose: print(f"Added {num_verts} vertices to filtration.") # list of lists # edges_lists[i] contains the edges in network[i] with correct vert labels # note edges are sorted in vert label order so edges are no longer directed edges_lists = [ np.sort(rename_edges( np.hstack([np.where(self.networks[i] != 0)]).T, self.vert_labels[i]), axis=1).tolist() for i in range(len(self.networks)) ] # list of unique edges across all networks unique_edges = np.unique(np.vstack( [np.array(es) for es in edges_lists]), axis=0).tolist() num_edges = len(unique_edges) # Handle edges... for e in unique_edges: simps_list.append(dio.Simplex(e, 0)) s_times = [] simp_in = False for i in range(len(self.networks)): if e in edges_lists[i] and simp_in == False: if self.cplx_type == 'union': if i == 0: st = 0 else: st = i - 0.5 s_times.append(st) elif self.cplx_type == 'intersection': s_times.append(i) else: print('cplx_type not recognized...\nQuitting') return [], [] simp_in = True if not e in edges_lists[i] and simp_in == True: if self.cplx_type == 'union': s_times.append(i) elif self.cplx_type == 'intersection': s_times.append(i - 0.5) simp_in = False times_list.append(s_times) if self.verbose: print(f"Added {num_edges} edges to filtration.") # Handle triangles... tri_lists = [ get_tris(self.networks[i], self.vert_labels[i]) for i in range(len(self.networks)) ] unique_tris = np.unique(np.vstack( [np.array(ts) for ts in tri_lists if ts != []]), axis=0).tolist() num_tris = len(unique_tris) for t in unique_tris: simps_list.append(dio.Simplex(t, 0)) s_times = [] simp_in = False for i in range(len(self.networks)): if t in tri_lists[i] and simp_in == False: if self.cplx_type == 'union': if i == 0: st = 0 else: st = i - 0.5 s_times.append(st) elif self.cplx_type == 'intersection': s_times.append(i) else: print('cplx_type not recognized...\nQuitting') return [], [] simp_in = True if t not in tri_lists[i] and simp_in == True: if self.cplx_type == 'union': s_times.append(i) elif self.cplx_type == 'intersection': s_times.append(i - 0.5) simp_in = False times_list.append(s_times) if self.verbose: print(f"Added {num_tris} triangles to filtration.") f_st = time.time() filtration = dio.Filtration(simps_list) f_end = time.time() return filtration, times_list
format(prime, 1 - lambda_parameter, lp, lambda_parameter, lq), transform=plt.gca().transAxes) pdf.savefig(fig) plt.close('all') print('Penalty function =>', (1 - lambda_parameter), '*L^', lp, "+", lambda_parameter, "*L^", lq, sep='') for g in range(len(cocycles)): chosen_cocycle = cocycles[g] chosen_bar = bars[g] vr_L2 = dionysus.Filtration( [s for s in vr if s.data <= max([bar.birth for bar in bars])]) coords = dionysus.smooth(vr_L2, chosen_cocycle, prime) l2_cocycle, f, bdry = optimizer_inputs(vr, bars, chosen_cocycle, coords, prime) l2_cocycle = l2_cocycle.reshape(-1, 1) ##It does not seem to work to have double invokes here... import tensorflow as tf B_mat = bdry.todense() #print(B_mat.shape) #l2_cocycle=np.zeros((B_mat.shape[1],1)) z = tf.Variable(l2_cocycle, trainable=True) cost_z = (1 - lambda_parameter) * tf.pow( tf.reduce_sum(tf.pow(tf.abs(f - tf.matmul(B_mat, z)), lp)), 1 / lp) + lambda_parameter * tf.pow( tf.reduce_sum(tf.pow(tf.abs(f - tf.matmul(B_mat, z)), lq)), 1 / lq)
else: lambda_list = [0,0.5,1] for lambda_parameter in lambda_list: print('>>>>>> lambda = ',lambda_parameter,'. => Analysis of Circular coordinates \n (mod {} - {}*L{} + {}*L{})'.format(prime,1-lambda_parameter,lp,lambda_parameter,lq)) embedding = [] fig = plt.figure(figsize=(5,5), dpi=100) plt.text(0.3,0.5,'Analysis of Circular coordinates \n (mod {} - {}*L{} + {}*L{})'.format(prime,1-lambda_parameter,lp,lambda_parameter,lq),transform=plt.gca().transAxes) pdf.savefig(fig) plt.close('all') print('Penalty function =>',(1-lambda_parameter),'*L^',lp,"+",lambda_parameter,"*L^",lq,sep='') for g in range(len(cocycles)): chosen_cocycle = cocycles[g] chosen_bar = bars[g] #print(chosen_cocycle,chosen_bar) NEW_THRESHOLD = max([vr[c2.index].data for c2 in chosen_cocycle]) vr_L2 = dionysus.Filtration([s for s in vr if s.data <=NEW_THRESHOLD]) coords = dionysus.smooth(vr_L2, chosen_cocycle, prime) l2_cocycle,f,bdry = optimizer_inputs(vr, bars, chosen_cocycle, coords, prime, NEW_THRESHOLD) l2_cocycle = l2_cocycle.reshape(-1, 1) ##It does not seem to work to have double invokes here... B_mat = bdry.todense() z_init = l2_cocycle z = tf.Variable(z_init, name='z', trainable=True, dtype=tf.float64) trainable_variables = [z] def loss_function(): cost_z = (1-lambda_parameter)*tf.math.pow( tf.math.reduce_sum( tf.math.pow( tf.abs(f - tf.linalg.matmul(B_mat,z) ),lp ) ), 1/lp) + lambda_parameter*tf.math.pow( tf.math.reduce_sum( tf.math.pow( tf.math.abs(f - tf.linalg.matmul(B_mat,z) ),lq ) ), 1/lq) return cost_z #print(B_mat.shape) #l2_cocycle=np.zeros((B_mat.shape[1],1)) tf.random.set_seed(1) optimizer = tf.optimizers.Adam(learning_rate=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=False)
def setup_Zigzag_fixed(self, r, k=2, verbose=False): ''' Helper function for ``run_Zigzag`` that sets up inputs needed for Dionysus' zigzag persistence function. This only works for a fixed radius r. Parameters ---------- r: float Radius for rips complex k: int, optional Max dimension for rips complex (default is 2) verbose: bool, optional If true, prints updates when running code Returns ------- filtration: dio.Filtration Dionysis filtration containing all simplices that exist in zigzag sequence times: list List of times, where times[i] is a list containing [a,b] where simplex i appears at time a, and disappears at time b ''' lst = list(self.ptclouds['PtCloud']) # simps_df = pd.DataFrame(columns = ['Simp','B,D']) simps_list = [] times_list = [] # Vertex counter vertind = 0 init_st = time.time() # Initialize A with R(X_0) rips = dio.fill_rips(lst[0], k=2, r=r) rips.sort() rips_set = set(rips) # Initialize A with set of simplices with verts in X_0 A = rips_set # # Add all simps to the list with birth,death=[0,1] simps_list = simps_list + [s for s in A] times_list = times_list + [[0, 1] for j in range(len(A))] # Initialize with vertices for X_0 verts = set( [dio.Simplex([j + vertind], 0) for j, pc in enumerate(lst[0])]) init_end = time.time() if verbose: print(f'Initializing done in {init_end-init_st} seconds...') loop_st = time.time() # Loop over the rest of the point clouds for i in range(1, len(lst)): # Calculate rips of X_{i-1} \cup X_i rips = dio.fill_rips(np.vstack([lst[i - 1], lst[i]]), k=2, r=r) # Adjust vertex numbers, sort, and make into a set rips = fix_dio_vert_nums(rips, vertind) rips.sort() rips_set = set(rips) # Increment vertex counter vertind = vertind + len(verts) # Set of vertices in R(X_i) verts_next = set( [dio.Simplex([j + vertind], 0) for j, pc in enumerate(lst[i])]) # Set of simplices with verts in X_{i} B = set(verts_next.intersection(rips_set)) # Set of simplices with verts in X_{i-1} AND X_{i} M = set() # Loop over vertices in R(X_{i-1} \cup R_i) for simp in rips: # Get list of vertices of simp bdy = get_verts(simp) # If it has no boundary and its in B, its a vertex in B and has been handled if not bdy: continue # If all of its verts are in A, it's been handled in the initialization or the previous iteration if bdy.intersection(A) == bdy: continue # If all of its verts are in B, add it to B elif bdy.intersection(B) == bdy: B.add(simp) # If it has some verts in A and some in B, it only exists in the union # Add it to M else: M.add(simp) # Add simplices in B with the corresponding birth,death times simps_list = simps_list + [s for s in B] times_list = times_list + [[i - 0.5, i + 1] for j in range(len(B))] # Add simplicies in M with corresponding birth,death times simps_list = simps_list + [s for s in M] times_list = times_list + [[i - 0.5, i] for j in range(len(M))] # Reinitialize for next iteration verts = verts_next A = B loop_end = time.time() if verbose: print(f'Preprocessing done in {loop_end-loop_st} seconds...') f_st = time.time() filtration = dio.Filtration(simps_list) f_end = time.time() return filtration, times_list
def setup_Zigzag_changing(self, r, k=2, verbose=False): ''' Helper function for ``run_Zigzag`` that sets up inputs needed for Dionysus' zigzag persistence function. This one allows r to be a list of radii, rather than one fixed value Parameters ---------- r: list List of radii for rips complex of each X_i k: int, optional Max dimension for rips complex (default is 2) verbose: bool, optional If true, prints updates when running code Returns ------- filtration: dio.Filtration Dionysis filtration containing all simplices that exist in zigzag sequence times: list List of times, where times[i] is a list containing [a,b] where simplex i appears at time a, and disappears at time b ''' lst = list(self.ptclouds['PtCloud']) # If you haven't input enough r values, just append extra of the last # list entry to make it the right number if len(r) < len(lst): if verbose: print('Warning: too few radii given, duplicating last entry') r = r + ([r[-1]] * (len(lst) - len(r))) elif len(r) > len(lst): if verbose: print('Warning: too many radii given, only using first ', len(lst)) r = r[:len(lst)] self.r = r # simps_df = pd.DataFrame(columns = ['Simp','B,D']) simps_list = [] times_list = [] # Vertex counter vertind = 0 init_st = time.time() # Initialize A with R(X_0) rips = dio.fill_rips(lst[0], k=2, r=r[0]) rips.sort() rips_set = set(rips) # Initialize A with set of simplices with verts in X_0 # In the loop, this will store simplices with verts in X_{i-1} A = rips_set # Add all simps to the list with birth,death=[0,1] simps_list = simps_list + [s for s in A] times_list = times_list + [[0, 1] for j in range(len(A))] # Initialize with vertices for X_0 # In the loop, this will store vertices in X_{i-1} verts = set( [dio.Simplex([j + vertind], 0) for j, pc in enumerate(lst[0])]) init_end = time.time() if verbose: print(f'Initializing done in {init_end-init_st} seconds...') loop_st = time.time() # Loop over the rest of the point clouds for i in range(1, len(lst)): # Calculate rips of X_{i-1} \cup X_i rips = dio.fill_rips(np.vstack([lst[i - 1], lst[i]]), k=2, r=max(r[i - 1], r[i])) # Adjust vertex numbers, sort, and make into a set rips = fix_dio_vert_nums(rips, vertind) rips.sort() rips_set = set(rips) # Increment vertex counter vertind = vertind + len(verts) # Set of vertices in X_{i} verts_next = set( [dio.Simplex([j + vertind], 0) for j, pc in enumerate(lst[i])]) # Set of simplices with verts in X_{i} B = set(verts_next.intersection(rips_set)) # Set of simplices with verts in X_{i-1} AND X_{i} # And simplicies in X_{i-1} \cup X_{i} that are not in X_{i-1} or X_i M = set() # Loop over vertices in R(X_{i-1} \cup R_i) for simp in rips: # Get list of vertices of simp bdy = get_verts(simp) #set([s for s in simp.boundary()]) # If it has no boundary and its in B, its a vertex in B and has been handled if not bdy: continue # If all of its verts are in A, it's been handled in the initialization or the previous iteration if bdy.intersection(A) == bdy: if r[i - 1] < r[i]: if simp.data > r[i - 1]: # If we haven't seen it before, add it to M if simp not in simps_list: M.add(simp) # If we've already added it to the list... else: # Edit the lists to include new birth,death times simps_list, times_list = edit_Simp_Times( simp, [i - 0.5, i], simps_list, times_list) # If all of its verts are in B... elif bdy.intersection(B) == bdy: # If r[i-1] <= r[i], anything with verts in B should also be in B if r[i - 1] <= r[i]: B.add(simp) # If r[i-1] > r[i], we need to check if it should go in M or B else: # If simplex's birth time is greater than the radius, it goes in M if simp.data > r[i]: M.add(simp) # If it's <= then it goes in B else: B.add(simp) # If it has some verts in A and some in B, it only exists in the union else: # If we haven't seen it before, add it to M if simp not in simps_list: M.add(simp) # If we've already added it to the list... else: # Edit the lists to include new birth,death times simps_list, times_list = edit_Simp_Times( simp, [i - 0.5, i], simps_list, times_list) # Add simps and times that are in B simps_list = simps_list + [simp for simp in B] times_list = times_list + [[i - 0.5, i + 1] for j in range(len(B))] # Add simps and times that are in M simps_list = simps_list + [simp for simp in M] times_list = times_list + [[i - 0.5, i] for j in range(len(M))] # Reinitialize for next iteration verts = verts_next A = B loop_end = time.time() if verbose: print(f'Preprocessing done in {loop_end-loop_st} seconds...') # Put list of simplices into Filtration format filtration = dio.Filtration(simps_list) return filtration, times_list
def compute_dynamic_filtration(self, x, hiddens, percentile=None): f = dion.Filtration() h1_id_start = x.cpu().detach().numpy().reshape(-1).shape[0] print('h1_id_start', h1_id_start) f, h1_births = conv_filtration(f, x[0], self.conv1.weight.data[:, 0, :, :], 0, h1_id_start, percentile=percentile) h2_id_start = h1_id_start + hiddens[0].cpu().detach().numpy().shape[0] print('h2_id_start', h2_id_start) f, h2_births = linear_filtration(f, hiddens[0], self.fc1, h1_births, h1_id_start, h2_id_start, percentile=percentile, last=False) h3_id_start = h2_id_start + hiddens[1].cpu().detach().numpy().shape[0] print('h3_id_start', h3_id_start) f = linear_filtration(f, hiddens[1], self.fc2, h2_births, h2_id_start, h3_id_start, percentile=percentile, last=True) # mat = conv_layer_as_matrix(self.conv1.weight.data[:,0,:,:], x[0], self.conv1.stride[0]) # x = x.cpu().detach().numpy().reshape(-1) # outer = np.absolute(mat*x) # # if percentile is None: # percentile_1 = 0 # else: # percentile_1 = np.percentile(outer, percentile) # gtzx = np.argwhere(x > 0) # # h1_id_start = x.shape[0] # print('h1_id_start', h1_id_start) # h1_births = np.zeros(mat.shape[0]) # # loop over each entry in the reshaped (column) x vector # for xi in gtzx: # # compute the product of each filter value with current x in iteration. # all_xis = np.absolute(mat[:,xi]*x[xi]) # max_xi = all_xis.max() # # set our x filtration as the highest product # f.append(dion.Simplex([xi], max_xi)) # gtpall_xis = np.argwhere(all_xis > percentile_1)[:,0] # # iterate over all products # for mj in gtpall_xis: # # if there is another filter-xi combination that has a higher # # product, save this as the birth time of that vertex. # if h1_births[mj] < all_xis[mj]: # h1_births[mj] = all_xis[mj] # f.append(dion.Simplex([xi, mj+h1_id_start], all_xis[mj])) # # h1 = hiddens[0].cpu().detach().numpy() # h2_id_start = h1_id_start + h1.shape[0] # print('h2_id_start', h2_id_start) # mat = self.fc1.weight.data.cpu().detach().numpy() # h2_births = np.zeros(mat.shape[0]) # # outer = np.absolute(mat*h1) # if percentile is None: # percentile_2 = 0 # else: # percentile_2 = np.percentile(outer, percentile) # gtzh1 = np.argwhere(h1 > 0) # # for xi in gtzh1: # all_xis = np.absolute(mat[:,xi]*h1[xi]) # max_xi = all_xis.max() # if h1_births[xi] < max_xi: # h1_births[xi] = max_xi # gtpall_xis = np.argwhere(all_xis > percentile_2)[:,0] # # for mj in gtpall_xis: # if h2_births[mj] < all_xis[mj]: # h2_births[mj] = all_xis[mj] # f.append(dion.Simplex([xi+h1_id_start, mj+h2_id_start], all_xis[mj])) # # # # now add maximum birth time for each h1 hidden vertex to the filtration. # for i in np.argwhere(h1_births > 0): # f.append(dion.Simplex([i+h1_id_start], h1_births[i])) # # # h2 = hiddens[1].cpu().detach().numpy() # h3_id_start = h2_id_start + h2.shape[0] # print('h3_id_start', h3_id_start) # mat = self.fc2.weight.data.cpu().detach().numpy() # h3_births = np.zeros(mat.shape[0]) # # outer = np.absolute(mat*h2) # if percentile is None: # percentile_3 = 0 # else: # percentile_3 = np.percentile(outer, percentile) # gtzh2 = np.argwhere(h2 > 0) # # for xi in gtzh2: # all_xis = np.absolute(mat[:,xi]*h2[xi]) # max_xi = all_xis.max() # if h2_births[xi] < max_xi: # h2_births[xi] = max_xi # gtpall_xis = np.argwhere(all_xis > percentile_3)[:,0] # # for mj in gtpall_xis: # if h3_births[mj] < all_xis[mj]: # h3_births[mj] = all_xis[mj] # f.append(dion.Simplex([xi+h2_id_start, mj+h3_id_start], all_xis[mj])) # # # # now add maximum birth time for each h2 hidden vertex to the filtration. # for i in np.argwhere(h2_births > 0): # f.append(dion.Simplex([i+h2_id_start], h2_births[i])) # # # now add maximum birth time for each h3 hidden vertex to the filtration. # for i in np.argwhere(h3_births > 0): # f.append(dion.Simplex([i+h3_id_start], h3_births[i])) print('filtration size', len(f)) print('Sorting filtration...') f.sort(reverse=True) return f
import dionysus as di import numpy as np import matplotlib.pyplot as plt from collections import defaultdict plt.close('all') x = np.random.rand(10, 2) complex = di.Filtration() di.fill_alpha2D_complex(x.tolist(), complex) alphashape = [s for s in complex if s.data[0] <= .5] plt.figure(1) d = defaultdict(int) for simplex in complex: if simplex.data[0] <= .1: verts = list(simplex.vertices) if len(verts) == 3: i, j, k = sorted(verts) d[(i, j)] += 1 d[(i, k)] += 1 d[(j, k)] += 1 for (edge, count) in d.items(): if count == 1: i, j = edge edge_pts = np.array(x[[i, j]]) plt.plot(edge_pts[:, 0], edge_pts[:, 1], 'b') plt.annotate(str(count), edge_pts.mean(axis=0)) #plt.figure(2) #edges = [list(s.vertices) for s in complex if s.data[0] <= .5 and s.data[1] and len(list(s.vertices))==2]
def compute_induced_filtration_parallel(x, hiddens, params, percentile=0, stride=1, return_nm=True, absolute_value=True): pool = mp.Pool(mp.cpu_count()) print('cpu count: {}'.format(mp.cpu_count())) global nm global id global f global wm id = 0 nm = {} wm = {} f = dion.Filtration() percentiles = np.zeros((len(params))) for l in range(len(params)): percentiles[l] = (1 / (l + 1)) * np.percentile( np.absolute(hiddens[l].cpu().detach().numpy()), percentile) x = x.cpu().detach().numpy() num_channels = x.shape[0] l = 0 print('layer: {}'.format(l)) # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] nls = hn.shape[1] * hn.shape[2] for c in range(num_channels): p = params[l].weight.data[:, c, :, :] r = pool.apply_async(first_layer, args=(x[c], p, l, c, percentiles[l], stride, nlc, nls), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 1 percentiles[l] = np.percentile(np.absolute(h), percentile) hn = hiddens[l].cpu().detach().numpy() # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): h1 = h[c, :, :] hn2 = hn[c, :, :] p = params[l] r = pool.apply_async(max_pool_layer, args=(h1, p, l, c, percentiles, hn2), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 2 # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] nls = hn.shape[1] * hn.shape[2] print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): p = params[l].weight.data[:, c, :, :] h1 = h[c, :, :] r = pool.apply_async(mid_conv, args=(h1, p, l, c, percentiles, stride, nlc, nls), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 3 hn = hiddens[l].cpu().detach().numpy() percentiles[l] = np.percentile(np.absolute(h), percentile) # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): h1 = h[c, :, :] hn2 = hn[c, :, :] p = params[l] r = pool.apply_async(max_pool_layer, args=(h1, p, l, c, percentiles, hn2), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 4 # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] nls = hn.shape[1] * hn.shape[2] print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): p = params[l].weight.data[:, c, :, :] h1 = h[c, :, :] r = pool.apply_async(mid_conv, args=(h1, p, l, c, percentiles, stride, nlc, nls), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 5 # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] nls = hn.shape[1] * hn.shape[2] print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): p = params[l].weight.data[:, c, :, :] h1 = h[c, :, :] r = pool.apply_async(mid_conv, args=(h1, p, l, c, percentiles, stride, nlc, nls), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 6 # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) hn = hiddens[l].cpu().detach().numpy() nlc = hn.reshape((hn.shape[0], -1)).shape[1] nls = hn.shape[1] * hn.shape[2] print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): p = params[l].weight.data[:, c, :, :] h1 = h[c, :, :] r = pool.apply_async(mid_conv, args=(h1, p, l, c, percentiles, stride, nlc, nls), callback=collect_result) pool.close() pool.join() h = hiddens[l].cpu().detach().numpy() num_channels = h.shape[0] l = 7 percentiles[l] = np.percentile(np.absolute(h), percentile) # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) print('layer: {}'.format(l)) pool = mp.Pool(mp.cpu_count()) for c in range(num_channels): h1 = h[c, :, :] p = params[l] r = pool.apply_async(last_pool, args=(h1, p, l, c, percentiles), callback=collect_result) pool.close() pool.join() h1 = hiddens[l].cpu().detach().numpy() enums = [([spec_hash((l + 1, 0, i[0]))], [h1[i]]) for i in np.argwhere(h1 > percentiles[l])] l = 8 print('layer: {}'.format(l)) p = params[l] percentiles[l] = np.percentile( np.absolute(h1 * p.weight.data.cpu().detach().numpy()), percentile) m1, h0_births, h1_births = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l]) enums += m1 comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, 0, i[0]))], [h0_births[i]]) for i in np.argwhere(h0_births > comp_percentile)] h1 = hiddens[l].cpu().detach().numpy() l = 9 # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) print('layer: {}'.format(l)) p = params[l] percentiles[l] = np.percentile( np.absolute(h1 * p.weight.data.cpu().detach().numpy()), percentile) m1, h0_births, h1_births_9 = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l]) enums += m1 max1 = np.maximum.reduce([h0_births, h1_births]) comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, 0, i[0]))], [max1[i]]) for i in np.argwhere(max1 > comp_percentile)] h1 = hiddens[l].cpu().detach().numpy() l = 10 print('layer: {}'.format(l)) # percentiles[l] = np.percentile(np.absolute(hiddens[l].cpu().detach().numpy()), percentile) p = params[l] percentiles[l] = np.percentile( np.absolute(h1 * p.weight.data.cpu().detach().numpy()), percentile) m1, h0_births, h1_births_10 = linear_filtration_fast2( h1, p, l, 0, percentile=percentiles[l]) enums += m1 max1 = np.maximum.reduce([h0_births, h1_births_9]) comp_percentile = percentiles[ l - 1] if percentiles[l - 1] < percentiles[l] else percentiles[l] enums += [([spec_hash((l, 0, i[0]))], [max1[i]]) for i in np.argwhere(max1 > comp_percentile)] enums += [([spec_hash((l + 1, 0, i[0]))], [h1_births_10[i]]) for i in np.argwhere(h1_births_10 > percentiles[l])] collect_result(enums) print('percentiles:', percentiles) # with open('quick_dump.txt', 'w') as fp: # for k, v in nm.items(): # fp.write('{}, {}\n'.format(k,v)) print('creating filtration object...') print('filtration size', len(f)) print('Sorting filtration...') f.sort(reverse=True) if return_nm: return f, nm, wm else: return f
def compute_static_filtration(self, x, hiddens, percentile=None): x_id = 0 f = dion.Filtration() mat = np.absolute( conv_layer_as_matrix(self.conv1.weight.data, x, self.conv1.stride[0])) x = x.cpu().detach().numpy().reshape(-1) if percentile is None: percentile_1 = 0 else: percentile_1 = np.percentile(mat, percentile) gtzx = np.argwhere(x > 0) h1_id_start = x.shape[0] h1_births = np.zeros(mat.shape[0]) # loop over each entry in the reshaped (column) x vector for xi in gtzx: # compute the product of each filter value with current x in iteration. all_xis = mat[:, xi] max_xi = all_xis.max() # set our x filtration as the highest product f.append(dion.Simplex([xi], max_xi)) gtpall_xis = np.argwhere(all_xis > percentile_1)[:, 0] # iterate over all products for mj in gtpall_xis: # if there is another filter-xi combination that has a higher # product, save this as the birth time of that vertex. if h1_births[mj] < all_xis[mj]: h1_births[mj] = all_xis[mj] f.append(dion.Simplex([xi, mj + h1_id_start], all_xis[mj])) h1 = hiddens[0].cpu().detach().numpy() h2_id_start = h1_id_start + h1.shape[0] mat = np.absolute(self.fc1.weight.data.cpu().detach().numpy()) h2_births = np.zeros(mat.shape[0]) if percentile is None: percentile_2 = 0 else: percentile_2 = np.percentile(mat, percentile) gtzh1 = np.argwhere(h1 > 0) for xi in gtzh1: all_xis = mat[:, xi] max_xi = all_xis.max() if h1_births[xi] < max_xi: h1_births[xi] = max_xi gtpall_xis = np.argwhere(all_xis > percentile_2)[:, 0] for mj in gtpall_xis: if h2_births[mj] < all_xis[mj]: h2_births[mj] = all_xis[mj] f.append( dion.Simplex([xi + h1_id_start, mj + h2_id_start], all_xis[mj])) # now add maximum birth time for each h1 hidden vertex to the filtration. for i in np.argwhere(h1_births > 0): f.append(dion.Simplex([i + h1_id_start], h1_births[i])) h2 = hiddens[1].cpu().detach().numpy() h3_id_start = h2_id_start + h2.shape[0] mat = np.absolute(self.fc2.weight.data.cpu().detach().numpy()) h3_births = np.zeros(mat.shape[0]) if percentile is None: percentile_3 = 0 else: percentile_3 = np.percentile(mat, percentile) gtzh2 = np.argwhere(h2 > 0) for xi in gtzh2: all_xis = mat[:, xi] max_xi = all_xis.max() if h2_births[xi] < max_xi: h2_births[xi] = max_xi gtpall_xis = np.argwhere(all_xis > percentile_3)[:, 0] for mj in gtpall_xis: if h3_births[mj] < all_xis[mj]: h3_births[mj] = all_xis[mj] f.append( dion.Simplex([xi + h2_id_start, mj + h3_id_start], all_xis[mj])) # now add maximum birth time for each h2 hidden vertex to the filtration. for i in np.argwhere(h2_births > 0): f.append(dion.Simplex([i + h2_id_start], h2_births[i])) # now add maximum birth time for each h3 hidden vertex to the filtration. for i in np.argwhere(h3_births > 0): f.append(dion.Simplex([i + h3_id_start], h3_births[i])) print('filtration size', len(f)) print('Sorting filtration...') f.sort(reverse=True) return f
# vim: ft=python foldmethod=marker foldlevel=0 import dionysus as d #---# simplices = [([2], 4), ([1, 2], 5), ([0, 2], 6), ([0], 1), ([1], 2), ([0, 1], 3)] f = d.Filtration() for vertices, time in simplices: f.append(d.Simplex(vertices, time)) f.sort() for s in f: print(s) #var> f #---# #var> f m = d.homology_persistence(f) #chk> #---# dgms = d.init_diagrams(m, f) print(dgms) #var> dgms
#draw circ 2 fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlim(-1.1,1.1) ax.set_ylim(-1.1,1.1) ax.scatter(circ_data2[:,0],circ_data2[:,1],c="k") ax.scatter(circ_data2[-1,0],circ_data2[-1,1],c="r") ax.set_aspect("equal") plt.show() #get each barcode #alpha filtration TDA test_comp = circ_alpha1[0] test_bts = circ_alpha1[1] test_f = d.Filtration() for j in range(len(test_comp)): test_f.append(d.Simplex(test_comp[j],test_bts[j])) p = d.homology_persistence(test_f) dgms1 = d.init_diagrams(p, test_f) #scatters d.plot.plot_diagram(dgms1[0]) d.plot.plot_diagram(dgms1[1]) plt.show() #alpha filtration TDA test_comp = circ_alpha2[0] test_bts = circ_alpha2[1] test_f = d.Filtration() for j in range(len(test_comp)):
def persist(self, fs): S = [dio.Simplex(v, w) for v, w in zip(self.S, fs)] F = dio.Filtration(sorted(S, key=lambda s: s.data)) H = dio.homology_persistence(F) D = dio.init_diagrams(H, F) return {'filtration': F, 'homology': H, 'diagram': D}
def compute_dynamic_filtration(self, x, hiddens, percentile=None): f = dion.Filtration() id_start = 0 num_channels = x.shape[0] for c in range(num_channels): s = x[c].cpu().detach().numpy().reshape(-1).shape[0] h1_id_start = id_start + s f, h1_births = conv_filtration(f, x[c], self.c1.weight.data[:, c, :, :], id_start, h1_id_start, percentile=percentile, stride=self.c1.stride[0]) h1_births = h1_births.reshape(hiddens[0].shape) h2_id_start = h1_id_start + hiddens[0].cpu().detach().numpy( ).flatten().shape[0] start_2 = h2_id_start h2_births = [] for d in range(hiddens[0].shape[0]): start_1 = h1_id_start + (h1_births[d].flatten().shape[0] * d) f, h2b = conv_filtration(f, hiddens[0][d], self.c2.weight.data[:, d, :, :], start_1, start_2, h0_births=h1_births[d], percentile=percentile, stride=self.c1.stride[0]) start_2 += h2b.shape[0] h2_births.append(h2b) h2_births = np.array(h2_births).reshape(-1) h3_id_start = h2_id_start + hiddens[1].cpu().detach().numpy( ).shape[0] f, h3_births = linear_filtration(f, hiddens[1], self.l1, h2_births, h2_id_start, h3_id_start, percentile=percentile, last=False) h4_id_start = h3_id_start + hiddens[2].cpu().detach().numpy( ).shape[0] f = linear_filtration(f, hiddens[2], self.l2, h3_births, h3_id_start, h4_id_start, percentile=percentile, last=True) print('filtration size', len(f)) print('Sorting filtration...') f.sort(reverse=True) return f
def EPH_mask_demo(vertex_values, simplices): times = [] t0 = time.time() s2v_lst = [[ sorted(s), sorted([[vertex_values[v], v] for v in s], key=lambda x: x[0]) ] for s in simplices] f_ord = [dionysus.Simplex(s[0], s[1][-1][0]) for s in s2v_lst] #takes max f_ext = [dionysus.Simplex([-1] + s[0], s[1][0][0]) for s in s2v_lst] #takes min ord_dict = {tuple(s[0]): s[1][-1][1] for s in s2v_lst} ext_dict = {tuple([-1] + s[0]): s[1][0][1] for s in s2v_lst} t1 = time.time() times.append(t1 - t0) t0 = time.time() f_ord.sort(key=lambda s: (s.data, len(s))) f_ext.sort(key=lambda s: (-s.data, len(s))) t1 = time.time() times.append(t1 - t0) t0 = time.time() #computes persistence f = dionysus.Filtration([dionysus.Simplex([-1], -float('inf'))] + f_ord + f_ext) m = dionysus.homology_persistence(f) t1 = time.time() times.append(t1 - t0) t0 = time.time() dgms = [[[], []], [[], []], [[], []], [[], []]] #H0ord, H0ext, H1rel, H1ext for i in range(len(m)): dim = f[i].dimension() if m.pair(i) < i: continue # skip negative simplices to avoid double counting if m.pair( i ) != m.unpaired: #should be no unpaired apart from H0 from fictitious -1 vertex pos, neg = f[i], f[m.pair(i)] if pos.data != neg.data: #off diagonal if -1 in pos and -1 in neg: #rel1 dgms[2][0].append(ext_dict[tuple(neg)]) dgms[2][1].append(ext_dict[tuple(pos)]) elif -1 not in pos and -1 not in neg: #ord0 dgms[1][0].append(ord_dict[tuple(pos)]) dgms[1][1].append(ord_dict[tuple(neg)]) else: if dim == 0: #H0ext dgms[0][0].append(ord_dict[tuple(pos)]) dgms[0][1].append(ext_dict[tuple(neg)]) if dim == 1: #H1ext dgms[3][0].append(ext_dict[tuple(neg)]) dgms[3][1].append(ord_dict[tuple(pos)]) t1 = time.time() times.append(t1 - t0) t0 = time.time() dgms = dionysus.init_diagrams(m, f) t1 = time.time() times.append(t1 - t0) return times