def egograph(self, node, delete_center=False): t0 = time.time() res = nx.ego_graph(self.graph, node, radius=self.radius) if self.delete_center: res.remove_node(node) if self.print_flag: print('Finish node %s in %s' % (node, precision_format(time.time() - t0, 3))) return res
def sw_dist(self): swdgms = dgms2swdgms(self.dgms) res = sw([swdgms[0]], [swdgms[1]], kernel_type='sw', n_directions=10, bandwidth=1.0, K=1, p=1)[0][0] sw_dist = np.log(res) * (-2) return precision_format(sw_dist)
def evaluate_tda_kernel(tda_kernel, Y, best_result_so_far, print_flag='off'): """ TODO: figure this out :param tda_kernel: :param Y: :param best_result_so_far: :param print_flag: :return: """ t1 = time.time() n = np.shape(tda_kernel)[0] grid_search_re = train_svm(np.zeros((n, 23)), Y, print_flag=print_flag, kernel=tda_kernel, kernel_flag=True, nonlinear_flag=False) # X is dummy here if grid_search_re['score'] < best_result_so_far[0] - 4: print('Saved one unnecessary evaluation of bad kernel') return (0, 0, {}, 0) cv_score = [] for seed in range(5): clf = svm.SVC(kernel='precomputed', C=grid_search_re['param']['C']) k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) scores = cross_val_score(clf, tda_kernel, Y, cv=k_fold, scoring='accuracy', n_jobs=-1) cv_score.append(scores.mean()) cv_score = np.array(cv_score) t2 = time.time() svm_time = precision_format(t2 - t1, 1) return (precision_format(100 * cv_score.mean(), 1), precision_format(100 * cv_score.std(), 1), grid_search_re, svm_time)
def alldgms(gs, radius=1, n = 100, dataset = 'blogcatalog', recompute_flag=False, method = 'serial', verbose = 5, zigzag = False): """ :param gs: a list of egographs :param radius: radius of egograph. # todo not very useful. :param n: :param dataset: :param recompute_flag: whether to recompute or not :param method: serial or parallel :param verbose: :param zigzag: :return: """ t0 = time.time() dir = os.path.join('/home/cai.507/Documents/DeepLearning/deep-persistence/EigenPro2/emb', dataset, '') # the file to save the emb file = dir + 'dgms_radius_' + str(radius) + '_' + str(n) + '.emb' if recompute_flag: os.remove(file) if os.path.exists(file) else 'File do not exist' try: # load existing dgms with open(file, "r") as f: print(file) diags = json.load(f) print('load existing dgms takes %s\n'%(precision_format(time.time() - t0))) dgms = diags2dgms(diags) return dgms except IOError or FileNotFoundError: kwargs_ = {'key': 'fv', 'subflag': 'True', 'one_homology_flag': False} if method == 'parallel': diags = Parallel(n_jobs=-1, verbose=verbose)(delayed(wrapper_getdiagram)(g, zigzag=zigzag) for g in gs) # the cpu usage is only 250%. TODO: optimize dgms = diags2dgms(diags) print('Dgms Parallel version finished') elif method == 'serial': dgms = Parallel(n_jobs=1, verbose=verbose)(delayed(wrapper_getdiagram)(g, parallel_flag=False, zigzag=zigzag) for g in gs) print('Dgms Serial version finished') else: raise Exception('No method %s'%method) # save the computed dgms with open(file, 'w') as f: diags = dgms2diags(dgms) json.dump(diags, f) print ('Finish computing and saving %s dgms using method %s. It takes %s\n'%(len(dgms), method, time.time()-t0)) return diags2dgms(diags)
def sw_parallel(dgms1, dgms2, kernel_type='sw', parallel_flag=True, granularity=25, **featkwargs): """ build on top of function sw :param dgms1: a list of array. :param dgms2: :param kernel_type: sw, pss, wg :param parallel_flag: Ture if want to compute in parallel :param granularity: parameter for parallel computing. :param featkwargs: kwargs for sw/pss/wg :return: """ t1 = time.time() assert_sw_dgm(dgms1) assert_sw_dgm(dgms2) n1 = len(dgms1) n2 = len(dgms2) kernel = np.zeros((n2, n1)) if parallel_flag: # parallel version kernel = Parallel(n_jobs=-1, backend='multiprocessing')( delayed(sw)(dgms1, dgms2[i:min(i + granularity, n2)], kernel_type=kernel_type, **featkwargs) for i in range(0, n2, granularity)) kernel = (np.vstack(kernel)) else: # used as verification for i in range(n2): kernel[i] = sw(dgms1, [dgms2[i]], kernel_type=kernel_type, **featkwargs) t = precision_format(time.time() - t1, 1) print('Finish computing %s kernel of shape %s. Takes %s' % (kernel_type, kernel.shape, t)) return (kernel / float(np.max(kernel)), t)
def load_existing_graph(graph, file): start = time.time() if os.path.isfile(file): print('Loading existing files') if graph == 'reddit_12K': file = os.path.join( '/home/cai.507/Documents/DeepLearning/deep-persistence/dataset/datasets/', 'reddit_12K' + '.graph') with open(file, 'rb') as f: data = pickle.load(f, encoding='latin1') graphs, labels = data['graph'], data['labels'] else: with open(file, 'rb') as f: graphs, labels = pickle.load(f, encoding='latin1') print(('Loading takes %s' % precision_format( (time.time() - start), 1))) if graph == 'ptc': graphs[151] = graphs[152] # small hack return graphs, labels
def format_(v, idx=0): # idx 0 for training error, idx 1 for test error from Esme.helper.format import precision_format res = (1 - list(list(v.values())[0][idx].values())[0][1]) * 100 res = precision_format(res, 1) return res
def bd_dist(self): return precision_format(d.bottleneck_distance(self.dgm1, self.dgm2))
def dgms_vecs(self, **kwargs): """ :param kwargs: pass all kwargs here. PI('bandwidth', 'weight', 'im_range', 'resolution'), PL('num_landscapes', 'resolution') :return: np.array of shape (n_dgms, n_dim) where all zero columns are removed """ self.param = kwargs t1 = time.time() def arctan(C, p): return lambda x: C * np.arctan(np.power(x[1], p)) if self.vec_type == 'pi': if True: diagsT = DiagramPreprocessor(use=True, scalers=[ ([0, 1], BirthPersistenceTransform()) ]).fit_transform(self.diags) PI = PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[100, 100]) res = PI.fit_transform(diagsT) if False: diagsT = tda.DiagramPreprocessor( use=True, scalers=[([0, 1], tda.BirthPersistenceTransform()) ]).fit_transform(self.diags) PI = tda.PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[100, 100]) res = PI.fit_transform(diagsT) if False: diagsT = tda.DiagramPreprocessor( use=True, scalers=[([0, 1], tda.BirthPersistenceTransform()) ]).fit_transform(self.diags) kwargs = filterdict( kwargs, ['bandwidth', 'weight', 'im_range', 'resolution']) kwargs['weight'] = arctan(kwargs['weight'][0], kwargs['weight'][1]) # PI = tda.PersistenceImage(**kwargs) PI = tda.PersistenceImage(bandwidth=1., weight=lambda x: x[1], im_range=[0, 10, 0, 10], resolution=[100, 100]) # PI = tda.PersistenceImage(bandwidth=1.0, weight=arctan(1.0, 1.0), im_range=[0, 1, 0, 1], resolution=[25, 25]) res = PI.fit_transform(diagsT) elif self.vec_type == 'pi_': kwargs_ = filterdict( kwargs, ['bandwidth', 'weight', 'im_range', 'resolution']) diagsT = DiagramPreprocessor(use=True, scalers=[ ([0, 1], BirthPersistenceTransform()) ]).fit_transform(self.diags) PI = PersistenceImage( **kwargs_ ) #(bandwidth=1., weight=lambda x: x[1], im_range=[0, 2, 0, 2], resolution=[20, 20]) res = PI.fit_transform(diagsT) elif self.vec_type == 'pl': kwargs_ = filterdict(kwargs, ['num_landscapes', 'resolution']) LS = tda.Landscape(**kwargs_) # LS = tda.Landscape(num_landscapes=5, resolution=100) # print('self.diags', self.diags[1], self.diags[2]) # diags = [np.array(diag) for diag in self.diags] # D = np.array([[0., 4.], [1., 2.], [3., 8.], [6., 8.]]) # res = LS.fit_transform([D, D]) # matheiu's implementation # LS = Landscape(resolution=1000) # D = np.array([[0., 4.], [1., 2.], [3., 8.], [6., 8.]]) # diags = [D] res = LS.fit_transform(self.diags) elif self.vec_type == 'pervec': # permutation vector, i.e. the historgram of coordinates of dgm dgms = self.dgms kwargs = filterdict(kwargs, ['dim']) res = coordinate(dgms[0], **kwargs) for i in range(1, len(dgms)): tmp = coordinate(dgms[i], **kwargs) res = np.concatenate((res, tmp), axis=0) assert res.shape[0] == len(dgms) else: raise Exception('Unknown vec_type. You can only chose pi or pl') t2 = time.time() t = precision_format((t2 - t1), 1) self.t = t if kwargs.get('keep_zero', None) == True: return normalize_(res, axis=self.axis) return rm_zerocol(normalize_(res, axis=self.axis), cor_flag=False)