def onebipartite(g, fp, degdir, analysis, namekey, mpkey, bipkey): """ Processes a single bipartite projection, sending it to the next step in the hierarchy towards writing degree seqeunces. Input: bipkey string, indicates the projection (a or b) that generated this subgraph of the original. """ if sg.weighted(g, fp) == 1 or sg.multigraph(g) == 1: processmultigraph(g, fp, degdir, analysis, namekey, mpkey=0, bipkey=bipkey) elif g.is_directed(): processdirected(g, fp, degdir, analysis, namekey=namekey, mpkey=0, bipkey=bipkey) else: checkconnected(g, fp, degdir, analysis, namekey=namekey, mpkey=0, bipkey=bipkey)
def processmultigraph(g, fp, degdir, analysis, namekey='', mpkey=0, bipkey=0): """ Processes a multigraph or weighted graph by ignoring multiedges and weights, then sends the simplified graph on. """ mgkey = 0 weighkey = 0 if sg.multigraph(g) == 1: namekey += '_multigraphsimplified' mgkey = 'simplified' if sg.weighted(g) == 1: namekey += '_weightedsimplified' weighkey = 'simplified' g.simplify() if sg.directed(g): processdirected(g, fp, degdir, analysis, namekey=namekey, mpkey=mpkey, bipkey=bipkey, mgkey=mgkey, weighkey=weighkey) else: checkconnected(g, fp, degdir, analysis, namekey=namekey, mpkey=mpkey, bipkey=bipkey, mgkey=mgkey, weighkey=weighkey)
def buildGMLcatalog(gml_dir): """ Walks through the subdirectories of a root to find all gml files, then catalogs the relevant information about the contained networks. Input: gmldirpath string, path to the root directory where gmls are Output: df DataFrame, catalog of the existing gml files """ df = pd.DataFrame(columns=['fp_gml', 'Weighted', 'Directed', 'Bipartite', 'Multigraph', 'Multiplex']) # make list of file paths to gmls fpV = [] for root, dirs, files in os.walk(gml_dir): for name in files: # leave out the bipartite projections so we can make our own if name.endswith('.gml'): fpV.append(os.path.join(root, name)) # create the catalog for fp in fpV: g = igraph.read(fp) splitfp = fp.split('/') name = splitfp[-1] # add new row or overwrite existing row df.loc[name] = np.nan df.loc[name]['fp_gml'] = fp df.loc[name]['Weighted'] = sg.weighted(g, fp) df.loc[name]['Directed'] = sg.directed(g) df.loc[name]['Bipartite'] = sg.bipartite(g, fp) df.loc[name]['Multigraph'] = sg.multigraph(g) df.loc[name]['Multiplex'] = sg.multiplex(g) if (df.loc[name] == 'error').any(): # this catches bad bipartite gmls df = df.drop(name) print('dropping {} from the considered gmls'.format(name)) return df
def processmultiplex(g, fp, degdir, analysis): """ Processes a multiplex graph. Splits along the edge types, so that each edge type gets its own new graph. Then sends each of these new graphs through the structural hierarchy and sends them along the appropriate path for further processing. Input: g igraph Graph object, known to be multiplex fp file path, leads to gml file """ # project onto layers # pull out list of attributes attributes = g.es.attributes() # if there are multiple edge types, split on these. if len(attributes)>1: for att in attributes: # assume that these attribute values are weights, so should be numeric attkeyword = att+'_notin' # list of non-numeric values to avoid notthese = ['','Nan', 'n'] # pull out edges that correspond to non empty weights edgeseq = g.es(**{attkeyword:notthese}) # project onto the subgraph graph = g.subgraph_edges(edgeseq) namekey = '_multiplex'+att mpkey = 'sub_'+att if sg.bipartite(graph, fp)==1: processbipartite(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.multigraph(graph)==1 or sg.weighted(graph)==1: processmultigraph(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.directed(graph)==1: processdirected(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) else: readdeg(graph, fp,degdir,analysis, namekey=namekey, mpkey=mpkey) # If, however, there is one edge type, assume the split is in this, and # look at the values of this attribute else: att = attributes[0] # get just the unique values the attribute takes types = np.unique(g.es[att]) # initilize empty list of edge seqs for each projection subgraph edgeseqs = [] attkeyword = att+'_eq' for typ in types: edgeseqs.append((g.es(**{attkeyword:typ}))) # project onto the subgraphs subgraphs = [g.subgraph_edges(edgeseq) for edgeseq in edgeseqs] # process all the subgraphs for i in range(len(subgraphs)): graph = subgraphs[i] namekey = '_multiplex'+str(types[i]) mpkey = 'sub_'+str(i) if sg.bipartite(graph, fp)==1: processbipartite(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.multigraph(graph)==1 or sg.weighted(graph)==1: processmultigraph(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.directed(graph)==1: processdirected(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) else: readdeg(graph, fp,degdir,analysis, namekey=namekey, mpkey=mpkey) # process the union graph graph = g namekey = '_multiplexunion' mpkey = 'union' if sg.bipartite(graph, fp)==1: processbipartite(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.multigraph(graph)==1: processmultigraph(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.weighted(graph)==1: processweighted(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) elif sg.directed(graph)==1: processdirected(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey) else: readdeg(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
def buildGMLcatalog(gmldirpath, df, avoiddirs, overwrite): """ Walks through the subdirectories of a root to find all gml files, then catalogs the relevant information about the contained networks. Input: gmldirpath string, path to the root directory where gmls are df DataFrame, catalog of the existing gml files avoiddirs list, contains names of any directories to avoid i.e. 'n7' overwrite boolean, if true, forces overwrite of rows that already exist in the catalog. Otherwise only new files are added. Output: df DataFrame, catalog of the existing gml files """ # make list of file paths to gmls fpV = [] for root, dirs, files in os.walk(gmldirpath): # avoid gmls that are known to crash igraph.read() if 'bad' in dirs: dirs.remove('bad') if 'crash' in dirs: dirs.remove('crash') # avoid any other directories for avdir in avoiddirs: if avdir in dirs: dirs.remove(avdir) for name in files: # leave out the bipartite projections so we can make our own if name.endswith('.gml') and '1mode' not in name: fpV.append(os.path.join(root, name)) # check which gmls are already in df if overwrite == False: newfpV = set(fpV).difference(set(df['fp_gml'])) else: newfpV = fpV # update the catalog for fp in newfpV: g = igraph.read(fp) splitfp = fp.split('/') name = splitfp[-1] # add new row or overwrite existing row df.loc[name] = np.nan df.loc[name]['fp_gml'] = fp df.loc[name]['Domain'] = splitfp[-4] df.loc[name]['Subdomain'] = splitfp[-3] # cut out the 'n' df.loc[name]['Graph_order'] = int(splitfp[-2][1:]) df.loc[name]['Weighted'] = sg.weighted(g, fp) df.loc[name]['Directed'] = sg.directed(g) df.loc[name]['Bipartite'] = sg.bipartite(g, fp) df.loc[name]['Multigraph'] = sg.multigraph(g) df.loc[name]['Multiplex'] = sg.multiplex(g) if (df.loc[name] == 'error').any(): # this catches bad bipartite gmls df = df.drop(name) return df