def getDiff(graph_dir, lcc_dir): csv = "Sample, Full nodes, LCC nodes, Full edges, LCC edges\n" full_vert = [] full_edge = [] lcc_vert = [] lcc_edge = [] for fn in os.listdir(graph_dir): g = sio.loadmat(os.path.join(graph_dir, fn))["fibergraph"] ##lcc = np.load(os.path.join(lcc_dir, (fn.split('_')[0] + '_concomp.npy'))) G_lcc = loadAdjMat(os.path.join(graph_dir, fn), os.path.join(lcc_dir, (fn.split("_")[0] + "_concomp.npy"))) full_vert.append(g.shape[0]) full_edge.append(g.nnz) lcc_vert.append(G_lcc.shape[0]) lcc_edge.append(G_lcc.nnz / 2) print "%s ==> full (n,e)= (%d, %d), lcc (n,e) = (%d, %d)" % ( fn, g.shape[0], g.nnz, G_lcc.shape[0], G_lcc.nnz / 2, ) # lcc.view().item().shape[1] , lcc.view().item().nnz) csv += "%s, %d, %d, %d, %d\n" % (fn, g.shape[0], G_lcc.shape[0], g.nnz, G_lcc.nnz / 2) f = open("lcc-full_comp", "w") f.write(csv) f.close() np.save("full_vert", full_vert) np.save("full_edge", full_edge) np.save("lcc_vert", lcc_vert) np.save("lcc_edge", lcc_edge)
def getDiff(graph_dir, lcc_dir): csv = "Sample, Full nodes, LCC nodes, Full edges, LCC edges\n" full_vert = [] full_edge = [] lcc_vert = [] lcc_edge = [] for fn in os.listdir(graph_dir): g = sio.loadmat(os.path.join(graph_dir, fn))['fibergraph'] ##lcc = np.load(os.path.join(lcc_dir, (fn.split('_')[0] + '_concomp.npy'))) G_lcc = loadAdjMat( os.path.join(graph_dir, fn), os.path.join(lcc_dir, (fn.split('_')[0] + '_concomp.npy'))) full_vert.append(g.shape[0]) full_edge.append(g.nnz) lcc_vert.append(G_lcc.shape[0]) lcc_edge.append(G_lcc.nnz / 2) print "%s ==> full (n,e)= (%d, %d), lcc (n,e) = (%d, %d)" % ( fn, g.shape[0], g.nnz, G_lcc.shape[0], G_lcc.nnz / 2 ) # lcc.view().item().shape[1] , lcc.view().item().nnz) csv += "%s, %d, %d, %d, %d\n" % (fn, g.shape[0], G_lcc.shape[0], g.nnz, G_lcc.nnz / 2) f = open('lcc-full_comp', 'w') f.write(csv) f.close() np.save('full_vert', full_vert) np.save('full_edge', full_edge) np.save('lcc_vert', lcc_vert) np.save('lcc_edge', lcc_edge)
def load_and_store(dirg): """" load up and LCC adjacency matrix and save it elsewhere *NOTE: This is not a general script and is specific to the bg1:/data/public/MR/MIGRAINE data Positional Args: =============== dirg - the directory with a graph """ print "** Processing dataset: %s ... **\n" % dirg graphs = glob(os.path.join(dirg, "*")) if dirg.endswith("//"): dirg = dirg[:-1] base_dir = os.path.dirname(dirg) save_dir = os.path.join(base_dir, "big_lcc_graphs") if not os.path.exists(save_dir): print "Making %s ..." % save_dir os.makedirs(save_dir) for g_fn in graphs: print "\nProcessing %s ..." % g_fn fn_root = g_fn.split("/")[-1][:-13] lcc_fn = os.path.join(base_dir, "big_lcc", fn_root + "big_lcc.npy") if os.path.exists(g_fn) and os.path.exists(lcc_fn): g = loadAdjMat(g_fn, lcc_fn) fn = os.path.join(save_dir, fn_root + "big_lcc_adjmat") print "Saving %s ..." % fn sio.savemat(os.path.join(save_dir, fn), {"data": g}) else: if not os.path.exists(g_fn): print "Graph path %s does not exist ..." % g_fn if not os.path.exists(lcc_fn): print "Lcc path %s does not exist ..." % lcc_fn print "** Done with %s ** \n\n" % dirg
def load_and_store(dirg): """" load up and LCC adjacency matrix and save it elsewhere *NOTE: This is not a general script and is specific to the bg1:/data/public/MR/MIGRAINE data Positional Args: =============== dirg - the directory with a graph """ print "** Processing dataset: %s ... **\n" % dirg graphs = glob( os.path.join(dirg, "*")) if dirg.endswith("//"): dirg = dirg[:-1] base_dir = os.path.dirname(dirg) save_dir = os.path.join(base_dir, "big_lcc_graphs") if not os.path.exists(save_dir): print "Making %s ..." % save_dir os.makedirs(save_dir) for g_fn in graphs: print "\nProcessing %s ..." % g_fn fn_root = g_fn.split("/")[-1][:-13] lcc_fn = os.path.join(base_dir, "big_lcc", fn_root+"big_lcc.npy") if os.path.exists(g_fn) and os.path.exists(lcc_fn): g = loadAdjMat(g_fn, lcc_fn) fn = os.path.join(save_dir, fn_root+"big_lcc_adjmat") print "Saving %s ..." % fn sio.savemat( os.path.join(save_dir, fn), {"data":g} ) else: if not os.path.exists(g_fn): print "Graph path %s does not exist ..." % g_fn if not os.path.exists(lcc_fn): print "Lcc path %s does not exist ..." % lcc_fn print "** Done with %s ** \n\n" % dirg
def compute(inv_dict, save=True): ''' Actual function that computes invariants and saves them to a location positional arguments: ===================== inv_dict: is a dict optinally containing any of these: - inv_dict['edge']: boolean for global edge count - inv_dict['ver']: boolean for global vertex number - inv_dict['tri']: boolean for local triangle count - inv_dict['tri_fn']: the path of a precomputed triangle count (.npy) - inv_dict['eig']: boolean for eigenvalues and eigenvectors - inv_dict['eigvl_fn']: the path of a precomputed eigenvalues (.npy) - inv_dict['eigvect_fn']: the path of a precomputed eigenvectors (.npy) - inv_dict['deg']: boolean for local degree count - inv_dict['deg_fn']: the path of a precomputed triangle count (.npy) - inv_dict['ss1']: boolean for scan 1 statistic - inv_dict['cc']: boolean for clustering coefficient - inv_dict['mad']: boolean for maximum average degree - inv_dict['save_dir']: the base path where all invariants will create sub-dirs & be should be saved optional arguments: =================== save: boolean for auto save or not. TODO: use this ''' # Popualate inv_dict inv_dict = populate_inv_dict(inv_dict) if inv_dict['save_dir'] is None: inv_dict['save_dir'] = os.path.dirname(inv_dict['graph_fn']) if (inv_dict.has_key('G')): if inv_dict['G'] is not None: G = inv_dict['G'] elif (inv_dict['graphsize'] == 'b' or inv_dict['graphsize'] == 'big'): G = loadAdjMat(inv_dict['graph_fn'], inv_dict['lcc_fn']) # small graphs else: G = loadAnyMat(inv_dict['graph_fn'], inv_dict['data_elem']) if isinstance(G, str): print G return G # Error message num_nodes = G.shape[0] # number of nodes # CC requires deg_fn and tri_fn. Load if available if inv_dict['cc']: # if either #tri or deg is undefined if not inv_dict['tri_fn']: inv_dict['tri'] = True if not inv_dict['deg_fn']: inv_dict['deg'] = True cc_array = np.zeros(num_nodes) # All invariants that require eigenvalues if ((inv_dict['tri'] and not inv_dict['tri_fn']) or (inv_dict['mad'])): if not inv_dict['eigvl_fn']: inv_dict['eig'] = True # Only create arrays if the computation will be done if inv_dict['tri']: if inv_dict['tri_fn']: tri_array = np.load(inv_dict['tri_fn']) # load if precomputed else: tri_array = np.zeros(num_nodes) # local triangle count if inv_dict['deg'] or inv_dict['edge']: # edge is global number of edges inv_dict['deg'] = True if inv_dict['deg_fn']: deg_array = np.load(inv_dict['deg_fn']) else: deg_array = np.zeros(num_nodes) # Vertex degrees of all vertices if (inv_dict['ss1']): ss1_array = np.zeros(num_nodes) # Induced subgraph edge number i.e scan statistic if (not inv_dict['k'] or inv_dict['k'] > 100 or inv_dict['k'] > G.shape[0] - 2): inv_dict['k'] = 100 if G.shape[0]-2 > 101 else G.shape[0] - 2 # Maximum of 100 eigenvalues start = time() # Calculate Eigenvalues & Eigen vectors if inv_dict['eig']: if not (inv_dict['eigvl_fn'] or inv_dict['eigvect_fn']): l, u = arpack.eigs(G, k=inv_dict['k'], which='LM') # LanczosMethod(A,0) print 'Time taken to calc Eigenvalues: %f secs\n' % (time() - start) else: try: l = np.load(inv_dict['eigvl_fn']) u = l = np.load(inv_dict['eigvect_fn']) except Exception: return "[IOERROR: ]Eigenvalues failed to load" # All other invariants start = time() #### For loop #### if (inv_dict['cc'] or inv_dict['ss1'] or (inv_dict['tri'] and not inv_dict['tri_fn'])\ or (inv_dict['deg'] and not inv_dict['deg_fn']) ): # one of the others for j in range(num_nodes): # tri if not inv_dict['tri_fn'] and inv_dict['tri']: # if this is still None we need to compute it tri_array[j] = abs(round((sum( np.power(l.real,3) * (u[j][:].real**2)) ) / 6.0)) # Divide by six because we count locally # ss1 & deg if inv_dict['ss1'] or (not inv_dict['deg_fn'] and inv_dict['deg']): nbors = G[:,j].nonzero()[0] # deg if (not inv_dict['deg_fn'] and inv_dict['deg']): deg_array[j] = nbors.shape[0] # ss1 if inv_dict['ss1']: if (nbors.shape[0] > 0): nbors_mat = G[:,nbors][nbors,:] ss1_array[j] = nbors.shape[0] + (nbors_mat.nnz/2.0) # scan stat 1 # Divide by two because of symmetric matrix else: ss1_array[j] = 0 # zero neighbors hence zero cardinality enduced subgraph # cc if inv_dict['cc']: if (deg_array[j] > 2): cc_array[j] = (2.0 * tri_array[j]) / ( deg_array[j] * (deg_array[j] - 1) ) # Jari et al else: cc_array[j] = 0 print 'Time taken to compute loop dependent invariants: %f secs\n' % (time() - start) ### End For ### # global edge if inv_dict['edge']: edge_count = deg_array.sum() # global vertices is num_nodes ''' MAD ''' if (inv_dict['mad']): max_ave_deg = np.max(l.real) # Computation complete - handle the saving now ... ''' Top eigenvalues & eigenvectors ''' if not inv_dict['eigvl_fn'] and inv_dict['eig'] : eigvDir = os.path.join(inv_dict['save_dir'], "Eigen") #if eigvDir is None else eigvDir # Immediately write eigs to file inv_dict['eigvl_fn'] = os.path.join(eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') inv_dict['eigvect_fn'] = os.path.join(eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') createSave(inv_dict['eigvl_fn'], l.real) # eigenvalues createSave(inv_dict['eigvect_fn'], u) # eigenvectors print 'Eigenvalues and eigenvectors saved as ' + inv_dict['eigvect_fn'] ''' Triangle count ''' if not inv_dict['tri_fn'] and inv_dict['tri']: triDir = os.path.join(inv_dict['save_dir'], "Triangle") #if triDir is None else triDir inv_dict['tri_fn'] = os.path.join(triDir, getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # TODO HERE createSave(inv_dict['tri_fn'], tri_array) print 'Triangle Count saved as ' + inv_dict['tri_fn'] ''' Degree count''' if not inv_dict['deg_fn'] and inv_dict['deg']: degDir = os.path.join(inv_dict['save_dir'], "Degree") #if degDir is None else degDir inv_dict['deg_fn'] = os.path.join(degDir, getBaseName(inv_dict['graph_fn']) + '_degree.npy') createSave(inv_dict['deg_fn'], deg_array) print 'Degree saved as ' + inv_dict['deg_fn'] ''' MAD ''' if inv_dict['mad']: MADdir = os.path.join(inv_dict['save_dir'], "MAD") #if MADdir is None else MADdir inv_dict['mad_fn'] = os.path.join(MADdir, getBaseName(inv_dict['graph_fn']) + '_mad.npy') createSave(inv_dict['mad_fn'], max_ave_deg) print 'Maximum average Degree saved as ' + inv_dict['mad_fn'] ''' Scan Statistic 1''' if inv_dict['ss1']: ss1Dir = os.path.join(inv_dict['save_dir'], "SS1") #if ss1Dir is None else ss1Dir inv_dict['ss1_fn'] = os.path.join(ss1Dir, getBaseName(inv_dict['graph_fn']) + '_scanstat1.npy') createSave(inv_dict['ss1_fn'], ss1_array) # save it print 'Scan 1 statistic saved as ' + inv_dict['ss1_fn'] ''' Clustering coefficient ''' if inv_dict['cc']: ccDir = os.path.join(inv_dict['save_dir'], "ClustCoeff") #if ccDir is None else ccDir inv_dict['cc_fn'] = os.path.join(ccDir, getBaseName(inv_dict['graph_fn']) + '_clustcoeff.npy') createSave(inv_dict['cc_fn'], cc_array) # save it print 'Clustering coefficient saved as ' + inv_dict['cc_fn'] ''' Global Vertices ''' if inv_dict['ver']: vertDir = os.path.join(inv_dict['save_dir'], "Globals") #if vertDir is None else vertDir inv_dict['ver_fn'] = os.path.join(vertDir, getBaseName(inv_dict['graph_fn']) + '_numvert.npy') createSave(inv_dict['ver_fn'], num_nodes) # save it print 'Global vertices number saved as ' + inv_dict['ver_fn'] ''' Global number of edges ''' if inv_dict['edge']: edgeDir = os.path.join(inv_dict['save_dir'], "Globals") #if edgeDir is None else edgeDir inv_dict['edge_fn'] = os.path.join(edgeDir, getBaseName(inv_dict['graph_fn']) + '_numedges.npy') createSave(inv_dict['edge_fn'], edge_count) # save it print 'Global edge number saved as ' + inv_dict['edge_fn'] #if test: # bench test # tri_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # eigvl_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') # eigvect_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') # MAD_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_MAD.npy') return inv_dict # TODO: Fix code this breaks. Originally was [tri_fn, deg_fn, MAD_fn, eigvl_fn, eigvect_fn]