def split_by_cc(fname:str, targets:str=None, order:str=None, slice=None, edge_predicate:str=edge_predicate) -> tuple: """Return names of targets written""" if not targets: name, ext = os.path.splitext(fname) targets = name + '_{}' + ext elif not isinstance(targets, str): raise ValueError("Target should be a filename to write") elif '{}' not in targets: raise ValueError("Target should be a filename to write containing '{}'") graph = graph_from_file(fname, edge_predicate=edge_predicate) writtens = [] ccs = networkx.connected_components(graph) if order in {'biggest first', 'smaller last'}: ccs = sorted(tuple(ccs), key=len, reverse=True) elif order in {'biggest last', 'smaller first'}: ccs = sorted(tuple(ccs), key=len) elif order == 'random': ccs = list(ccs) random.shuffle(ccs) if slice: try: if len(slice) != 2 or any(not isinstance(v, int) for v in slice): raise TypeError # trigger the exception handling except TypeError: # slice is not iterable raise ValueError("Slice must be an iterable of two integers") start, end = slice ccs = tuple(ccs)[start:end] for idx, cc_nodes in enumerate(ccs, start=1): cc = graph.subgraph(cc_nodes) target = targets.format(idx) graph_to_file(cc, target) writtens.append(target) return tuple(writtens)
def test_graphml(): file = 'data/test.graphml' one = comparable_graph(graph_from_file(file)) two = comparable_graph(graph_from_standard_file(file)) assert one == two assert one == frozenset(map(frozenset, ({'1', '2'}, {'1', '3'}, {'2', '4'}, {'3', '4'})))
def test_many_atoms(): file = 'data/many-atoms.lp' one = comparable_graph(graph_from_file(file)) assert len(one) == 9, one assert one == frozenset({ frozenset({'a', 'd'}), frozenset({'a', 'e'}), frozenset({'b', 'd'}), frozenset({'b', 'e'}), frozenset({'c', 'd'}), frozenset({'c', 'e'}), frozenset({'d', 'e'}), frozenset({'d', 'f'}), frozenset({'e', 'f'}), })
def convert(fname:str, target:str=None, anonymize:bool=False, normalize:bool=False, edge_predicate:str=edge_predicate, target_edge_predicate:str=edge_predicate) -> dict: """Write in target the very same graph as input, but in an clean ASP expanded format. normalize -- avoid special characters in node names. anonymize -- rename nodes into integers. target -- file to write. If None or equal to fname, overwrite. target_edge_predicate -- edge predicate to use in rewritten file. """ fname = commons.normalize_filename(fname) if target: target = commons.normalize_filename(target) if not target: target = fname graph = graph_from_file(fname, edge_predicate=edge_predicate) if anonymize: graph = anonymized(graph) if normalize: graph = normalized(graph) graph_to_file(graph, target, edge_predicate=target_edge_predicate)
def randomize(fname: str, target: str, iterations: int, per_cc: bool = False, edge_predicate: str = edge_predicate): """Write in file of given name a randomized version of input graph. """ fname = commons.normalize_filename(fname) target = commons.normalize_filename(target) graph = graph_from_file(fname, edge_predicate=edge_predicate) if per_cc: graphs = (graph.subgraph(nodes).copy() for nodes in networkx.connected_components(graph)) else: graphs = [graph] def run(): for graph in graphs: print(tuple(graph.edges)) nb_edge = graph.number_of_edges() total_iterations = iterations * graph.number_of_edges() try: yield networkx.algorithms.double_edge_swap( graph, nswap=total_iterations, max_tries=100 * total_iterations) except networkx.exception.NetworkXError as err: print(err.args[0]) yield graph except networkx.exception.NetworkXAlgorithmError: print( "Maximum number of swap attempts reached, or graph can't be swapped. Ignored." ) yield graph if per_cc: graph = networkx.compose_all(run()) else: graph = next(run()) return graph_to_file(graph, target, edge_predicate=edge_predicate)
def extract_by_node(fname: str, target: str = None, nodes: iter = (), order: int = 1, edge_predicate: str = edge_predicate): """Write in file of given name a subgraph of input one. """ fname = commons.normalize_filename(fname) if target: target = commons.normalize_filename(target) if not target: target = fname graph = graph_from_file(fname, edge_predicate=edge_predicate) nodes = set(nodes) all_neighbors = networkx.classes.function.all_neighbors for _ in range(order): nodes |= set( itertools.chain.from_iterable( all_neighbors(graph, node) for node in nodes)) return graph_to_file(graph.subgraph(nodes), target, edge_predicate=edge_predicate)
def yield_info(fname: str, info_motifs: int = 0, info_ccs: bool = True, graphics: bool = False, outdir: str = '.', special_nodes: bool = False, heavy_computations: bool = False, graph_properties: bool = False, negative_results: bool = True, edge_predicate: str = edge_predicate) -> dict: """Yield (field, value) infos of targets written info_motifs -- print info about the n first motifs in the graph info_ccs -- print info about connected components in the graph """ outdir = commons.normalize_filename(outdir) graph = graph_from_file(fname, edge_predicate=edge_predicate) nb_node, nb_edge = len(graph.nodes), len(graph.edges) nb_self_loops = sum(1 for _ in graph.selfloop_edges()) def density(nb_node, nb_edge): try: return 2 * nb_edge / (nb_node * (nb_node - 1)) except ZeroDivisionError: import math return math.nan yield '#node', nb_node yield '#edge', nb_edge if nb_self_loops: yield '#loop', nb_self_loops yield '#edge - #loop', nb_edge - nb_self_loops else: yield 'no loop', True yield 'density', density(nb_node, nb_edge) if info_motifs: for motif in (): clyngor.solve() if info_ccs: ccs_nodes = tuple(networkx.connected_components(graph)) ccs = tuple(graph.subgraph(cc) for cc in ccs_nodes) yield '#cc', len(ccs_nodes) if len(ccs_nodes) > 1: node_per_cc = tuple(map(len, ccs_nodes)) yield '#node/cc', node_per_cc yield '#node/cc (prop)', tuple(nb / nb_node for nb in node_per_cc) yield '#node/cc (mean)', sum(node_per_cc) / len(node_per_cc) yield 'density/cc', tuple( density(len(nodes), len(tuple(cc.edges))) for cc, nodes in zip(ccs, ccs_nodes)) if graphics: # TODO: degree distribution (lin-lin, log-lin, lin-log, log-log) # TODO: motif size distribution (if info_motifs > 1) # TODO: degree function to clustering coefficient ... if heavy_computations: # TODO: concept and AOC poset size and ratio. ... if special_nodes: # TODO: equivalences arti_points = tuple(networkx.articulation_points(graph)) yield '#articulation points', len(arti_points) if arti_points: yield 'articulation points', arti_points if graph_properties: non_implemented = [] for attrname, attr in vars(networkx).items(): if attrname.startswith('is_'): attrname = attrname[3:] if getfullargspec(attr).args == ['G']: # only 1 arg try: yield attrname, attr(graph) # discard the 'is_' except networkx.exception.NetworkXNotImplemented as err: non_implemented.append(attrname) except networkx.exception.NetworkXError as err: non_implemented.append(attrname) properties = ('transitivity', 'average_clustering', 'average_node_connectivity', 'average_shortest_path_length') for attrname in properties: try: yield attrname, getattr(networkx, attrname)(graph) except networkx.exception.NetworkXError as err: non_implemented.append(attrname) if non_implemented and negative_results: yield 'non implemented', non_implemented
def test_gml(): file = 'data/test.gml' one = comparable_graph(graph_from_file(file)) two = comparable_graph(graph_from_standard_file(file)) assert one == two