def undo_cycles(self): vertices = self.states_df.columns nx_graph = self.new_filled_nx_graph() dir_edge_to_freq = {} bad_dir_edges = [] cycles = list(nx.simple_cycles(nx_graph)) num_cyc = len(cycles) while num_cyc > 0: for cyc in cycles: for dir_edge in cyc: if dir_edge not in dir_edge_to_freq.keys: dir_edge_to_freq[dir_edge] = 1 else: dir_edge_to_freq[dir_edge] += 1 max_freq_edge = max(dir_edge_to_freq, key=dir_edge_to_freq.get) bad_dir_edges.append(max_freq_edge) beg_vtx, end_vtx = max_freq_edge self.vtx_to_parents[end_vtx].remove(beg_vtx) nx_graph.remove_edge(beg_vtx, end_vtx) cycles = list(nx.simple_cycles(nx_graph)) num_cyc = len(cycles) for (beg_vtx, end_vtx) in reversed(bad_dir_edges): self.vtx_to_parents[beg_vtx].append(end_vtx)
def _check_graph(self, ebunch=None, delete_graph=False): """ Checks for self loops and cycles in the graph. If finds any, reverts the graph to previous state or in case when called from __init__ deletes the graph. """ if delete_graph: if ebunch is not None: for edge in ebunch: if edge[0] == edge[1]: del self raise ValueError("Self Loops are not allowed", edge) simple_cycles = [loop for loop in nx.simple_cycles(self)] if simple_cycles: del self raise ValueError("Cycles are not allowed", simple_cycles) return True else: for edge in ebunch: if edge[0] == edge[1]: raise ValueError("Self loops are not allowed", edge) import copy test_G = copy.deepcopy(self) nx.DiGraph.add_edges_from(test_G, ebunch) simple_cycles = [loop for loop in nx.simple_cycles(test_G)] if simple_cycles: del test_G raise ValueError("Cycles are not allowed", simple_cycles) return True
def test_simple_cycles_small(self): G = nx.DiGraph() G.add_path([1, 2, 3, 1]) c = sorted(nx.simple_cycles(G)) assert_equal(c, [[1, 2, 3, 1]]) G.add_path([10, 20, 30, 10]) c = sorted(nx.simple_cycles(G)) assert_equal(c, [[1, 2, 3, 1], [10, 20, 30, 10]])
def follow_path(self, node): """Given a head node, follow_path will parse through all connecting nodes and translate the path into the execution portion of the script. """ #declare variables temp_script3 = "" var = 1 original_node = node cycle_element_used = set() while var == 1: #check for edges that have been traversed and ignore them original_walked = [self.graph.edge[j][k]['walked'] for j,k in self.graph.out_edges(original_node)] edges = self.graph.out_edges(node) for j,k in edges: if self.graph.edge[j][k]['walked'] == True: edges.remove((j,k)) #if a loop is detected, parse through and translate everything into a for loop for the script if len(nx.simple_cycles(self.graph)) > 0: cycles_list = nx.simple_cycles(self.graph)[0] if j in cycles_list and not cycle_element_used: temp_script3 = temp_script3 + "for n in range(10):\n" #cycles_list.pop(), this was commented out due to the fact that the order of the cycle was incorrect cycles_list.remove(cycles_list[0]) for i in range(len(cycles_list)): temp_script3 = temp_script3 + " " + cycles_list[i].add_calc(cycles_list[i-1].var, self.graph.edge[cycles_list[i-1]][cycles_list[i]]['msname']) + '\n' cycle_element_used.add(j) cycle_element_used.add(k) #move to the next node and add any lines of code that have not already been included in the script walk_list = {self.graph.edge[j][k]['walk_value']:(j,k) for j,k in edges} if len(walk_list) > 0: x,y = walk_list[min(walk_list)] self.graph.edge[x][y]['walked'] = True if y.add_calc(x.var, self.graph.edge[x][y]['msname']) not in temp_script3: temp_script3 = temp_script3 + y.add_calc(x.var, self.graph.edge[x][y]['msname']) + '\n' #if there are more than one edges branching off of a node, take the one with the lowest walk_value #else:simply take the edge and follow it to the next node if len(self.graph.successors(node)) > 1: node = y else: node = self.graph.successors(node)[0] #if all edges down one path has been traveres, check the head node for any other paths and follow the one #with the next least walk_value else: node = original_node #if everything has been traversed, reset all walked values to False to ensure that the next runthrough #succeeds if False not in original_walked: for j,k in self.graph.out_edges(): self.graph.edge[j][k]['walked'] = False break #apply changes to the execution portion of the script self.script_execution = temp_script3
def test_simple_cycles_small(self): G = nx.DiGraph() G.add_path([1,2,3,1]) c=sorted(nx.simple_cycles(G)) assert_equal(c,[[1,2,3,1]]) G.add_path([10,20,30,10]) c=sorted(nx.simple_cycles(G)) ca=[[1,2,3,1],[10,20,30,10]] for (a,b) in zip(c,ca): assert_true(self.is_cyclic_permuatation(a[:-1],b[:-1]))
def test_simple_cycles_small(self): G = nx.DiGraph() G.add_cycle([1,2,3]) c=sorted(nx.simple_cycles(G)) assert_equal(len(c),1) assert_true(self.is_cyclic_permutation(c[0],[1,2,3])) G.add_cycle([10,20,30]) cc=sorted(nx.simple_cycles(G)) ca=[[1,2,3],[10,20,30]] for c in cc: assert_true(any(self.is_cyclic_permutation(c,rc) for rc in ca))
def checkInstructionGraphCycles(instructions): import networkx as nx g = nx.DiGraph() for i in instructions: g.add_node(i) for ix in range(i.flowsToCount()): flowsTo = i.flowsTo(ix) if flowsTo in instructions: g.add_edge(i, flowsTo) cycles = nx.simple_cycles(g) for c in cycles: if not checkCycleHasEntrypoint(c): print "************************************" print "No entrypoint in the following cycle: " for i in c: print i print "children:" for sub in i.children(): print "\t", repr(sub) print "************************************" else: print "************************************" print "cycle with ", len(c), " is OK" for i in c: if i.getTypedJumpTarget(): print "*** ", else: print " ", print repr(i) print "************************************"
def get_voltage_equations(circuit, symbols, V): g = get_circuit_graph(circuit) loops = [i for i in nx.simple_cycles(g) if len(i) > 3] result = [] for loop in loops: T = 0 for n in range(len(loop) - 1): n1 = loop[n] n2 = loop[n+1] t = [n1, n2] t.sort() I = [i for i in current_symbols if current_symbols[i] == t][0] R = circuit[n1][n2] if R != V: if n1 > n2: T += I * R else: T -= I * R else: if n1 > n2: T += V else: T -= V result.append(T) return result
def analyse_cycles(sdfg): vectors = core.check_consistency( sdfg ) s = vectors['s'] q = vectors['q'] print("HSDF graph size: {}".format( sum(q.values()) )) par = {} for cycle in nx.simple_cycles( sdfg ): edges = [ (cycle[i - 1], cycle[i]) for i in range(len(cycle)) ] wtsum = 0 multiple = 1 z = {} for v, w in edges: data = sdfg.get_edge_data( v, w ) tokens = data.get('tokens', 0) prates = data.get('production', core.cyclic(1)) wtsum += s[ (v, w) ] * tokens z[v] = prates.sum() * s[ (v, w) ] multiple = core.lcm( multiple, z[v] ) if wtsum % multiple == 0: for v in cycle: parv = wtsum // z[ v ] par[v] = parv if v not in par else min(par[v], parv) print("Cycle {}: tokens = {:.3f}, integral: {}".format( cycle, wtsum / multiple, wtsum % multiple == 0 )) for v in par: if q[v] % par[v] == 0: q[v] = q[v] // par[v] elif par[v] % q[v] == 0: q[v] = 1 print("New HSDF graph size: {}".format( sum(q.values()) ))
def get_all_substance_combinations_with_cycles(alpha, beta): try: import numpy alpha = numpy.array(alpha) beta = numpy.array(beta) except ImportError: print('This method requires that alpha and beta are NumPy arrays.' 'NumPy does not appear to be installed. Please install NumPy.') raise # alpha, beta are stoichiometry matrices as used throughout code # number of reactions = number of columns of alpha no_rxn = alpha.shape[1] # number of substance = number of rows of alpha no_sub = alpha.shape[0] # check if no_rxn != beta.shape[1] or no_sub != beta.shape[0]: raise # get substance adjacency matrix subs_adj = get_substance_adjacency(alpha, beta) # get directed substance graph subs_G = nx.from_numpy_matrix(subs_adj, create_using=nx.DiGraph()) # get cycles in substance graph subs_cycles = nx.simple_cycles(subs_G) # remove substance index repetitions for c_i in range(len(subs_cycles)): subs_cycles[c_i] = list(set(subs_cycles[c_i]))
def dependency_list(self): r''' Returns a list of dependencies in the order with which they should be called to ensure data is calculated by one model before it's asked for by another. Notes ----- This raises an exception if the graph has cycles which means the dependencies are unresolvable (i.e. there is no order which the models can be called that will work). In this case it is possible to visually inspect the graph using ``dependency_graph``. See Also -------- dependency_graph dependency_map ''' dtree = self.dependency_graph() cycles = list(nx.simple_cycles(dtree)) if cycles: raise Exception('Cyclic dependency found: ' + ' -> '.join( cycles[0] + [cycles[0][0]])) d = nx.algorithms.dag.lexicographical_topological_sort(dtree, sorted) return list(d)
def comb_fas( graph): '''@param: graph, a nx.DiGraph obj ''' assert isinstance( graph, nx.DiGraph) origin_weight = nx.get_edge_attributes( graph, 'weight') weight = origin_weight.copy() assert len(weight) == graph.number_of_edges(), "Some edge doesnot has a weight attr." fas = [] while( not nx.is_directed_acyclic_graph(graph) ): c = list( nx.simple_cycles(graph) )[0] mini_weight = min( [ weight[edge] for edge in get_edges(c)] ) cycle_edges_weight = {edge:weight[edge] for edge in get_edges(c) } for eachEdge in cycle_edges_weight.keys(): cycle_edges_weight[eachEdge] -= mini_weight weight[eachEdge ] -= mini_weight if cycle_edges_weight[eachEdge] == 0: fas.append( eachEdge ) graph.remove_edge( eachEdge[0], eachEdge[1] ) for eachEdge in copy.copy(fas): graph.add_edge( eachEdge[0], eachEdge[1], {'weight' : origin_weight[eachEdge]} ) if nx.is_directed_acyclic_graph( graph): fas.remove(eachEdge) continue else: graph.remove_edge( eachEdge[0], eachEdge[1] ) return fas
def cycles(request, graph): offset, limit = _getPaging(request) icycles = nx.simple_cycles(graph) icycles = islice(icycles, offset, offset + limit) request.respondJson({'cycles': tuple(icycles)})
def _solve_bff(self, bff_str): # Construct the directed graph bffs = [int(e.strip()) for e in bff_str.split(' ')] nodes = [i+1 for i in xrange(len(bffs))] gr = nx.DiGraph() gr.add_nodes_from(nodes) gr.add_edges_from([e for e in zip(nodes, bffs)]) max_length = 0 tree = self._build_tree(bffs) paths = [] # For each simple cycles in the graph for cycle in nx.simple_cycles(gr): if len(cycle) == 2: # If cycle length is two, we can add more nodes to form a path path_length = self._find_path_length(cycle, tree) # All the paths can be chained to form a circle paths.append(path_length) elif len(cycle) > max_length: # If cycle length is three, we cannot add more nodes max_length = len(cycle) total_path_length = sum(paths) if total_path_length > max_length: max_length = total_path_length return max_length
def check(self): if not self.graph.is_acyclic(): err = "Graph cannot be processed because it contains cycles in it:" # FIXME(mattymo): GraphSolver cannot be used to call this method err += ', '.join(six.moves.map(str, nx.simple_cycles( nx.DiGraph(self.graph)))) err += '\n' raise errors.InvalidData(err) non_existing_tasks = [] invalid_tasks = [] for node_key, node_value in six.iteritems(self.graph.node): if not node_value.get('id'): successors = self.graph.successors(node_key) predecessors = self.graph.predecessors(node_key) neighbors = successors + predecessors non_existing_tasks.append(node_key) invalid_tasks.extend(neighbors) if non_existing_tasks: raise errors.InvalidData( "Tasks '{non_existing_tasks}' can't be in requires" "|required_for|groups|tasks for [{invalid_tasks}]" " because they don't exist in the graph".format( non_existing_tasks=', '.join( str(x) for x in sorted(non_existing_tasks)), invalid_tasks=', '.join( str(x) for x in sorted(set(invalid_tasks)))))
def find_cycles(instance): bn_graph = convert_to_graph(instance) cycles_found = list(nx.simple_cycles(bn_graph)) return cycles_found
def MaximalNonBranchingPaths(G): ''' G: a networkx DiGraph or MultiDiGraph; http://rosalind.info/problems/ba3m/ ''' paths = [] for v in G.nodes(): if G.in_degree(v) != 1 or G.out_degree(v) != 1: if G.out_degree(v) > 0: for i in G.out_edges(v): non_branching_path = [*i] w = i[1] while G.in_degree(w) == 1 and G.out_degree(w) == 1: u = G.out_edges(w)[0][1] non_branching_path.append(u) w = u paths.append(non_branching_path) for cycle in nx.simple_cycles(G): branch = 0 for v in cycle: if G.in_degree(v) != 1 or G.out_degree(v) != 1: branch = 1 if branch == 0: cycle.append(cycle[0]) paths.append(cycle) return paths
def test_simple_graph_with_reported_bug(self): G = nx.DiGraph() edges = [ (0, 2), (0, 3), (1, 0), (1, 3), (2, 1), (2, 4), (3, 2), (3, 4), (4, 0), (4, 1), (4, 5), (5, 0), (5, 1), (5, 2), (5, 3), ] G.add_edges_from(edges) cc = sorted(nx.simple_cycles(G)) assert_equal(len(cc), 26) rcc = sorted(nx.recursive_simple_cycles(G)) assert_equal(len(cc), len(rcc)) for c in cc: assert_true(any(self.is_cyclic_permutation(c, rc) for rc in rcc)) for rc in rcc: assert_true(any(self.is_cyclic_permutation(rc, c) for c in cc))
def solver(sent_tuples, featureset): stats = {} globalStats = {} #outs = [enum(x) for x in sent_tuples] count = 1 for combination in itertools.product(*sent_tuples): # print combination # if count % 1000 == 0: # print count # count = count+1 G = nx.DiGraph() for c in combination: key = c[0][0] val = c[0][1] for v in val: G.add_edge(key,v) if c not in stats: stats[c] = 0 try: k = nx.simple_cycles(G).next() except StopIteration: for f in featureset: feature = f(combination) for k in feature: if k in globalStats: globalStats[k] += feature[k] else: globalStats[k] = feature[k] for c in combination: stats[c] = stats[c] + 1 return (stats, globalStats)
def simple_loops(g, u): """ iterator over the list of simple loops of graph g at the undersample rate u """ gx = graph2nx(num2CG(g2num(undersample(g,u)), len(g))) for l in networkx.simple_cycles(gx): yield l
def is_graph_fair(self,graph): control_dict = nx.get_node_attributes(graph,'control') for cycle in nx.simple_cycles(graph): controls = [control_dict[n] for n in cycle] if self.is_system_fair(controls) == False: return False return True
def find_attractor_old(G_transition_graph): ''': Arguments: G_transition_graph [networkx Graph object] networkx graph directed graph showing how network configurations map to one another Return: attractors [dict of list of lists of ints] ['fixed'] = [[532][948]] ['cycle'] = [] --> The output of this tells me all the cycles in the network, but it does not tell me whether they are accessible from the initial state that i'm interested in. need to figure this out. ''' attractor_list = nx.simple_cycles(G_transition_graph) #in case of deterministic system, any cycle without considering edge direction will be directed cycle. attractors = {} attractors['fixed'] = [] attractors['cycle'] = [] for network_ID in attractor_list: # print network_ID if len(network_ID) == 1: attractors['fixed'].append(network_ID) else: attractors['cycle'].append(network_ID) return attractors #this outputs decID of attractor states (fixed and cyclic)
def get_syntax_paths(syntax, from_token, to_token): """ :param nx.DiGraph graph: :param Token from_token: :param Token to_token: :return dict: """ assert isinstance(from_token, Token) assert isinstance(to_token, Token) paths = {} for idx, syntax_tree in syntax.syntax_trees.iteritems(): neutralized_graph = nx.Graph(syntax_tree.graph) if from_token == to_token: cycles = [cycle for cycle in nx.simple_cycles(syntax_tree.graph) if from_token.index in cycle] if len(cycles) == 0: return paths else: path = min(cycles, key=lambda cycle: len(cycle)) else: if not nx.has_path(neutralized_graph, from_token.index, to_token.index): continue path = nx.shortest_path(neutralized_graph, from_token.index, to_token.index) token_path = [syntax.tokens[token_idx] for token_idx in path] syntax_path = SyntaxPath(syntax, idx, token_path) paths[idx] = syntax_path return paths
def _prune_states(K, graph, source, sink): """ Removes cycles and redundant nodes (that are not reachable from source) from the subgraph of graph defined by the nodes in K. """ # Create a subgraph with the nodes now in K # Find and remove cycles by deleting the edge between # the second to last node and the last node of the cycle, # thus keeping nodes that may be important # to the trust calculation. subgraph = graph.subgraph(K) cycles = nx.simple_cycles(subgraph) if cycles: for cycle in cycles: subgraph.remove_edges_from([(cycle[-2], cycle[-1])]) # Get all paths from source to sink without cycles and redundant nodes simple_paths = list(nx.all_simple_paths(G=graph, source=source, target=sink)) relevant_nodes = set(chain.from_iterable(simple_paths)) # Remove nodes no longer used (not in simple_paths) for n in K: if n not in relevant_nodes: subgraph.remove_node(n) return subgraph
def add_edge(self, u, v, label): """ Add an edge between u and v. The nodes u and v will be automatically added if they are not already in the graph. Parameters ---------- u,v: nodes Nodes can be any hashable (and not None) Python object. label: string Label should be value of the variable observed. (underscore separated if multiple variables) attr_dict: dictionary, optional (default= no attributes) Dictionary of edge attributes. Key/Value pairs will update existing data associated with the edge. attr: Keyword arguments, optional Edge data can be assigned using keyword arguments. Examples -------- >>> from pgmpy.factors import TreeCPD, Factor >>> tree = TreeCPD([('B', Factor(['A'], [2], [0.8, 0.2]), 0), ... ('B', 'C', 1)]) >>> tree.add_edge('C', Factor(['A'], [2], [0.1, 0.9]), label=0) """ if u != v: super(TreeCPD, self).add_edge(u, v, label=label) if list(nx.simple_cycles(self)): super(TreeCPD, self).remove_edge(u, v) raise ValueError("Self Loops and Cycles are not allowed") else: raise ValueError("Self Loops and Cycles are not allowed")
def find_attractor(decStateTransMap): ''' Arguments: -- 1. decStateTransMap Return: -- attractor ''' attractor_list = nx.simple_cycles(decStateTransMap) #in case of deterministic system, any cycle without considering edge direction will be directed cycle. attractors = {} #attractors['fixed'] = [] #attractors['cycle'] = [] undirectedMap = nx.DiGraph.to_undirected(decStateTransMap) for u in attractor_list: attractors[u[0]] = {} if len(u) == 1: attractors[u[0]]['type'] = 'fixed' else: attractors[u[0]]['type'] = 'cycle' for v in attractors.iterkeys(): basin = nx.node_connected_component(undirectedMap, v) attractors[v]['basin'] = basin attractors[v]['basin-size'] = len(basin) sorted_attractors = OrderedDict(sorted(attractors.items(), key=lambda kv: kv[1]['basin-size'], reverse=True)) return sorted_attractors
def test_simple_cycles(self): edges = [(0, 0), (0, 1), (0, 2), (1, 2), (2, 0), (2, 1), (2, 2)] G = nx.DiGraph(edges) cc = sorted(nx.simple_cycles(G)) ca = [[0], [0, 1, 2], [0, 2], [1, 2], [2]] for c in cc: assert_true(any(self.is_cyclic_permutation(c, rc) for rc in ca))
def find_most_repeated_cycles(di_graph): """ Returns a list filled with this format for each element: [edge : amount_of_appearances]. Args: di_graph : nx.DiGraph() A networkx DiGraph class for representing DAG Returns: MATRIX[[TUPLE, INT], [TUPLE, INT], [TUPLE, INT], ...] If we have at least one edge with one appearance MATRIX[] If we don't have edges """ list_all_cycles = [] cycles = list(nx.simple_cycles(di_graph)) for i in range(0, len(cycles)): list_all_cycles.append(find_cycle_edges(cycles[i], di_graph.edges(cycles[i]))) flatted_edges = sum(list_all_cycles, []) # This flattens the nested list of edges # This list contains a list of edges and their appearances on the list, but only appearances bigger than 0 checked_edges = [] while len(flatted_edges) > 0: cont = flatted_edges.count(flatted_edges[0]) if cont > 0: # Amount of appearances bigger than 1 checked_edges.append([flatted_edges[0], cont]) # This remove a value from a list flatted_edges[:] = (value for value in flatted_edges if value != flatted_edges[0]) return checked_edges
def _validate(G): ''' Validates dependency graph to ensure it has no missing or cyclic dependencies ''' for name in G.nodes(): if 'value' not in G.node[name] and 'template' not in G.node[name]: msg = 'Dependency unsatisfied in variable "%s"' % name raise ParamException(msg) if not nx.is_directed_acyclic_graph(G): graph_cycles = nx.simple_cycles(G) variable_names = [] for cycle in graph_cycles: try: variable_name = cycle[0] except IndexError: continue variable_names.append(variable_name) variable_names = ', '.join(sorted(variable_names)) msg = ('Cyclic dependency found in the following variables: %s. Likely the variable is ' 'referencing itself' % (variable_names)) raise ParamException(msg)
def sq(): with open("rosalind_sq.txt") as f: lines = f.readlines() # remove empty lines lines = [line for line in lines if line.strip()] # Num test cases t = int(lines[0]) del lines[0] for i in xrange(t): n, e = map(int, lines[0].split()) edge_list = map(lambda x: map(int, x.strip().split()), lines[1:e+1]) del lines[:e+1] # Create the graph G = nx.DiGraph() G.add_nodes_from(range(1,n+1)) for edge in edge_list: G.add_edge(edge[0], edge[1]) G.add_edge(edge[1], edge[0]) cycles = [c for c in nx.simple_cycles(G) if len(c)==4] if cycles: print 1, else: print -1, print ""
def test_unsortable(self): # TODO What does this test do? das 6/2013 G = nx.DiGraph() nx.add_cycle(G, ['a', 1]) c = list(nx.simple_cycles(G))
def enumMaximumMatchingIter(g, match, all_matches, add_e=None): '''Recurively search maximum matchings. <g>: undirected bipartite graph. Nodes are separated by their 'bipartite' attribute. <match>: list of edges forming one maximum matching of <g>. <all_matches>: list, each is a list of edges forming a maximum matching of <g>. Newly found matchings will be appended into this list. <add_e>: tuple, the edge used to form subproblems. If not None, will be added to each newly found matchings. Return <all_matches>: updated list of all maximum matchings. Author: guangzhi XU ([email protected]; [email protected]) Update time: 2017-05-21 20:09:06. ''' #---------------Form directed graph D--------------- d = formDirected(g, match) #-----------------Find cycles in D----------------- cycles = list(nx.simple_cycles(d)) if len(cycles) == 0: #---------If no cycle, find a feasible path--------- all_uncovered = set(g.node).difference(set([ii[0] for ii in match])) all_uncovered = all_uncovered.difference(set([ii[1] for ii in match])) all_uncovered = list(all_uncovered) #--------------If no path, terminiate-------------- if len(all_uncovered) == 0: return all_matches #----------Find a length 2 feasible path---------- idx = 0 uncovered = all_uncovered[idx] while True: if uncovered not in nx.isolates(g): paths = nx.single_source_shortest_path(d, uncovered, cutoff=2) len2paths = [vv for kk, vv in paths.items() if len(vv) == 3] if len(len2paths) > 0: reversed = False break #----------------Try reversed path---------------- paths_rev = nx.single_source_shortest_path(d.reverse(), uncovered, cutoff=2) len2paths = [ vv for kk, vv in paths_rev.items() if len(vv) == 3 ] if len(len2paths) > 0: reversed = True break idx += 1 if idx > len(all_uncovered) - 1: return all_matches uncovered = all_uncovered[idx] #-------------Create a new matching M'------------- len2path = len2paths[0] if reversed: len2path = len2path[::-1] len2path = zip(len2path[:-1], len2path[1:]) new_match = [] for ee in d.edges(): if ee in len2path: if g.node[ee[1]]['bipartite'] == 0: new_match.append((ee[1], ee[0])) else: if g.node[ee[0]]['bipartite'] == 0: new_match.append(ee) if add_e is not None: for ii in add_e: new_match.append(ii) all_matches.append(new_match) #---------------------Select e--------------------- e = set(len2path).difference(set(match)) e = list(e)[0] #-----------------Form subproblems----------------- g_plus = g.copy() g_minus = g.copy() g_plus.remove_node(e[0]) g_plus.remove_node(e[1]) g_minus.remove_edge(e[0], e[1]) add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = enumMaximumMatchingIter(g_minus, match, all_matches, add_e) all_matches = enumMaximumMatchingIter(g_plus, new_match, all_matches, add_e_new) else: #----------------Find a cycle in D---------------- cycle = cycles[0] cycle.append(cycle[0]) cycle = zip(cycle[:-1], cycle[1:]) #-------------Create a new matching M'------------- new_match = [] for ee in d.edges(): if ee in cycle: if g.node[ee[1]]['bipartite'] == 0: new_match.append((ee[1], ee[0])) else: if g.node[ee[0]]['bipartite'] == 0: new_match.append(ee) if add_e is not None: for ii in add_e: new_match.append(ii) all_matches.append(new_match) #-----------------Choose an edge E----------------- e = set(match).intersection(set(cycle)) e = list(e)[0] #-----------------Form subproblems----------------- g_plus = g.copy() g_minus = g.copy() g_plus.remove_node(e[0]) g_plus.remove_node(e[1]) g_minus.remove_edge(e[0], e[1]) add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = enumMaximumMatchingIter(g_minus, new_match, all_matches, add_e) all_matches = enumMaximumMatchingIter(g_plus, match, all_matches, add_e_new) return all_matches
def run( self, max_cores=None, dry=False, set_successful=True, cmd_wrapper=signature.default_cmd_fxn_wrapper, log_out_dir_func=default_task_log_output_dir, max_gpus=None, do_cleanup_atexit=True, lethal_signals=TERMINATION_SIGNALS, ): """ Runs this Workflow's DAG :param int max_cores: The maximum number of cores to use at once. A value of None indicates no maximum. :param int max_attempts: The maximum number of times to retry a failed job. Can be overridden with on a per-Task basis with Workflow.add_task(..., max_attempts=N, ...) :param callable log_out_dir_func: A function that returns a Task's logging directory (must be unique). It receives one parameter: the Task instance. By default a Task's log output is stored in log/stage_name/task_id. See _default_task_log_output_dir for more info. :param callable cmd_wrapper: A decorator which will be applied to every Task's cmd_fxn. :param bool dry: If True, do not actually run any jobs. :param bool set_successful: Sets this workflow as successful if all tasks finish without a failure. You might set this to False if you intend to add and run more tasks in this workflow later. :param do_cleanup_atexit: if False, do not attempt to cleanup unhandled exits. :param lethal_signals: signals to catch and shutdown Returns True if all tasks in the workflow ran successfully, False otherwise. If dry is specified, returns None. """ if cmd_wrapper == signature.default_cmd_fxn_wrapper: warnings.warn( f"Having functions return bash strings as the default behavior is deprecated. While " f"this behavior will be supported, it is recommended that you set cmd_wrapper to " f"cosmos.api.py_call which will be the new default." f"See examples/ex3.py. ") try: try: assert os.path.exists(os.getcwd( )), "current working dir does not exist! %s" % os.getcwd() assert hasattr( self, "cosmos_app" ), "Workflow was not initialized using the Workflow.start method" assert hasattr( log_out_dir_func, "__call__"), "log_out_dir_func must be a function" assert self.session, "Workflow must be part of a sqlalchemy session" session = self.session self.log.info( "Preparing to run %s using DRM `%s`, cwd is `%s`", self, self.cosmos_app.default_drm, os.getcwd(), ) try: user = getpass.getuser() except: # fallback to uid if we can't respove a user name user = os.getuid() self.log.info("Running as %s@%s, pid %s", user, os.uname()[1], os.getpid()) self.max_cores = max_cores self.max_gpus = max_gpus # # Run some validation checks # # check GPU env variables are set correctly if self.max_gpus is not None and self.cosmos_app.default_drm == "local": if "COSMOS_LOCAL_GPU_DEVICES" not in os.environ: raise EnvironmentError( "COSMOS_LOCAL_GPU_DEVICES environment variable must be set to a " "comma delimited list of gpu devices if using a local DRM to manage " "GPUs") # check for duplicate output files output_fnames_to_task_and_key = dict() for task in self.tasks: for key, fname in list(task.output_map.items()): current_value = output_fnames_to_task_and_key.setdefault( fname, (task, key)) if current_value != (task, key): task2, key2 = current_value raise ValueError( "Duplicate output files detected!: " '{task}.params["{key}"] == {task2}.params["{key2}"] == {fname}' .format(**locals())) output_fnames_to_task_and_key[fname] = (task, key) from ..job.JobManager import JobManager if self.jobmanager is None: self.jobmanager = JobManager( get_submit_args=self.cosmos_app.get_submit_args, cmd_wrapper=cmd_wrapper, log_out_dir_func=log_out_dir_func, logger=self.log, session=self.session, workflow=self, ) self.status = WorkflowStatus.running self.successful = False if self.started_on is None: self.started_on = datetime.datetime.now() task_graph = self.task_graph() stage_graph = self.stage_graph() assert len(set(self.stages)) == len( self.stages), "duplicate stage name detected: %s" % (next( duplicates(self.stages))) # renumber stages stage_graph_no_cycles = nx.DiGraph() stage_graph_no_cycles.add_nodes_from(stage_graph.nodes()) stage_graph_no_cycles.add_edges_from(stage_graph.edges()) for cycle in nx.simple_cycles(stage_graph): stage_graph_no_cycles.remove_edge(cycle[-1], cycle[0]) for i, s in enumerate(topological_sort(stage_graph_no_cycles)): s.number = i + 1 if s.status != StageStatus.successful: s.status = StageStatus.no_attempt # Make sure everything is in the sqlalchemy session session.add(self) successful = list( [t for t in task_graph.nodes() if t.successful]) # print stages for s in sorted(self.stages, key=lambda s: s.number): self.log.info("%s %s" % (s, s.status)) # Create Task Queue task_queue = _copy_graph(task_graph) self.log.info("Skipping %s successful tasks..." % len(successful)) task_queue.remove_nodes_from(successful) if do_cleanup_atexit: handle_exits(self) if self.max_cores is not None: self.log.info("Ensuring there are enough cores...") # make sure we've got enough cores for t in task_queue: assert int(t.core_req) <= self.max_cores, ( "%s requires more cpus (%s) than `max_cores` (%s)" % ( t, t.core_req, self.max_cores, )) # Run this thing! self.log.info("Committing to SQL db...") session.commit() except KeyboardInterrupt: # haven't started submitting yet, just raise the exception self.log.fatal("ctrl+c caught") self.terminate(due_to_failure=False) raise if not dry: _run(self, session, task_queue, lethal_signals=lethal_signals) # set status if self.status == WorkflowStatus.failed_but_running: self.status = WorkflowStatus.failed # set stage status to failed for s in self.stages: if s.status == StageStatus.running_but_failed: s.status = StageStatus.failed session.commit() return False elif self.status == WorkflowStatus.running: if set_successful: self.status = WorkflowStatus.successful session.commit() return True else: self.log.warning('%s exited with status "%s"', self, self.status) session.commit() return False else: self.log.info("Workflow dry run is complete") return None except Exception as ex: self.log.fatal("Exception was raised") self.log.fatal(ex, exc_info=True) self.terminate(due_to_failure=False) raise
def _enumMaximumMatchingIter2(adj, matchadj, all_matches, n1, add_e=None, check_cycle=True): """Recurively search maximum matchings. Similar to _enumMaximumMatching but implemented using adjacency matrix of graph for a slight speed boost. Parameters ---------- # g : # Undirected bipartite graph. Nodes are separated by their # 'bipartite' attribute. # match : # List of edges forming one maximum matching of `g`. # all_matches : # List, each is a list of edges forming a maximum matching of `g`. # Newly found matchings will be appended into this list. add_e : tuple, optional Edge used to form subproblems. If not `None`, will be added to each newly found matchings. Returns ------- list Updated list of all maximum matchings. Author ------ guangzhi XU ([email protected]; [email protected]) Update time: 2017-05-21 20:09:06. """ import networkx as nx from scipy import sparse #-------------------Find cycles------------------- if check_cycle: d = matchadj.multiply(adj) d[n1:, :] = adj[n1:, :] - matchadj[n1:, :].multiply(adj[n1:, :]) dg = nx.from_numpy_matrix(d.toarray(), create_using=nx.DiGraph()) cycles = list(nx.simple_cycles(dg)) if len(cycles) == 0: check_cycle = False else: check_cycle = True if check_cycle: cycle = cycles[0] cycle.append(cycle[0]) cycle = zip(cycle[:-1], cycle[1:]) #--------------Create a new matching-------------- new_match = matchadj.copy() for ee in cycle: if matchadj[ee[0], ee[1]] == 1: new_match[ee[0], ee[1]] = 0 new_match[ee[1], ee[0]] = 0 e = ee else: new_match[ee[0], ee[1]] = 1 new_match[ee[1], ee[0]] = 1 if add_e is not None: for ii in add_e: new_match[ii[0], ii[1]] = 1 all_matches.append(new_match) #-----------------Form subproblems----------------- g_plus = adj.copy() g_minus = adj.copy() g_plus[e[0], :] = 0 g_plus[:, e[1]] = 0 g_plus[:, e[0]] = 0 g_plus[e[1], :] = 0 g_minus[e[0], e[1]] = 0 g_minus[e[1], e[0]] = 0 add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = _enumMaximumMatchingIter2(g_minus, new_match, all_matches, n1, add_e, check_cycle) all_matches = _enumMaximumMatchingIter2(g_plus, matchadj, all_matches, n1, add_e_new, check_cycle) else: #---------------Find uncovered nodes--------------- uncovered = np.where(np.sum(matchadj, axis=1) == 0)[0] if len(uncovered) == 0: return all_matches #---------------Find feasible paths--------------- paths = [] for ii in uncovered: aa = adj[ii, :].dot(matchadj) if aa.sum() == 0: continue paths.append((ii, int(sparse.find(aa == 1)[1][0]))) if len(paths) > 0: break if len(paths) == 0: return all_matches #----------------------Find e---------------------- feas1, feas2 = paths[0] e = (feas1, int(sparse.find(matchadj[:, feas2] == 1)[0])) #----------------Create a new match---------------- new_match = matchadj.copy() new_match[feas2, :] = 0 new_match[:, feas2] = 0 new_match[feas1, e[1]] = 1 new_match[e[1], feas1] = 1 if add_e is not None: for ii in add_e: new_match[ii[0], ii[1]] = 1 all_matches.append(new_match) #-----------------Form subproblems----------------- g_plus = adj.copy() g_minus = adj.copy() g_plus[e[0], :] = 0 g_plus[:, e[1]] = 0 g_plus[:, e[0]] = 0 g_plus[e[1], :] = 0 g_minus[e[0], e[1]] = 0 g_minus[e[1], e[0]] = 0 add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = _enumMaximumMatchingIter2(g_minus, matchadj, all_matches, n1, add_e, check_cycle) all_matches = _enumMaximumMatchingIter2(g_plus, new_match, all_matches, n1, add_e_new, check_cycle) #if len(all_matches) % 1000 == 0: # print('len', len(all_matches)) #print('another') return all_matches
def simple_cycles(self): return list(nx.simple_cycles(self.graph))
file_list[i] = os.path.basename(f) i += 1 # rename .for file is case of scalasca if scorep: tmp1 = glob.glob(os.path.join(srcOutDir, '*.for')) tmp2 = glob.glob(os.path.join(srcOutDir, '*.FOR')) for f in tmp1 + tmp2: fn = os.path.splitext(f)[0] + '.f' os.rename(f, fn) # create the graph G = createDiGraph(mod_dict, use_dict) # check for cyclic dependencies cyc = list(nx.simple_cycles(G)) # draw the graph # saveGraph(G,cyc,3000,"graph.pdf") # raise for cyclic dependency if cyc != []: raise Exception("Error, cyclic dependency found!") file_list_dep = [] while 1: x = [x for x in G.nodes_iter() if G.in_degree(x) == 0] #print '-->'," ".join(x) if x == []: break file_list_dep.extend(x) G.remove_nodes_from(x)
DG1.remove_nodes_from(remove) # Array to store the number of directed cycles found for each order # accumulated over randomizations nksum_directed = np.zeros(kmax + 1, dtype=int) for j_rand in range(n_randomizations): # Randomize the network edges = list(DG1.edges()) for edge in edges: revert_edge = random.randint(0, 1) if revert_edge: DG1.remove_edge(edge[0], edge[1]) DG1.add_edge(edge[1], edge[0]) # Find the cycles directed_cycles = list(nx.simple_cycles(DG1)) # Store their distribution for cycle in directed_cycles: k = len(cycle) if k <= kmax: nksum_directed[k] += 1 # Calculate the mean number of cycles per order nk_directed = nksum_directed.astype(float) / n_randomizations # Save the results to file data = np.vstack((np.arange(kmax + 1, dtype=int), nk_directed)) np.savetxt("cycledistribution_rand_sevaseviene.dat", data, fmt="%6d")
# Find cycles. exchange_graph = nx.DiGraph() fees = {} for asset_key, asset in asset_pairs.iteritems(): asset_name = str(asset_key.split('.')[0]) base = str(asset['base']) quote = str(asset['quote']) assert asset_name == base + quote, '%s != %s + %s' % (asset_name, base, quote) fee = 1 - asset['fees'][0][1] / 100. fees[base, quote] = fee fees[quote, base] = fee exchange_graph.add_edge(base, quote) exchange_graph.add_edge(quote, base) cycles = map(tuple, nx.simple_cycles(exchange_graph)) edges = { cycle: tuple(zip(cycle[:-1], cycle[1:]) + [(cycle[-1], cycle[0])]) for cycle in cycles } mongodb_address = None client = pymongo.MongoClient(mongodb_address) db = client['kraken'] collection = db['tickers'] cursor = collection.find() rows = [] for doc in tqdm.tqdm(cursor, total=cursor.count()): timestamp = utc_to_tz(doc['timestamp'], tz.gettz('PST')) prices = get_prices(asset_pairs, doc['ticker'])
def init_schema(filename, out_filename=None): """ Initialize an `ObjTables` schema from a tabular declarative specification in :obj:`filename`. :obj:`filename` can be a XLSX, CSV, or TSV file. Schemas (classes and attributes) should be defined using the following tabular format. Classes and their attributes can be defined in any order. .. table:: Format for specifying classes. :name: class_tabular_schema ========================================== ========================= ================================================= ======== Python Tabular column Tabular column values Optional ========================================== ========================= ================================================= ======== Class name !Name Valid Python name Class !Type ``Class`` Superclass !Parent Empty or the name of another class :obj:`obj_tables.Meta.table_format` !Format ``row``, ``column``, ``multiple_cells``, ``cell`` :obj:`obj_tables.Meta.verbose_name` !Verbose name String Y :obj:`obj_tables.Meta.verbose_name_plural` !Verbose name plural String Y :obj:`obj_tables.Meta.description` !Description Y ========================================== ========================= ================================================= ======== .. table:: Format for specifying attributes of classes. :name: attribute_tabular_schema =========================================================== ==================== ========================================== ======== Python Tabular column Tabular column values Optional =========================================================== ==================== ========================================== ======== Name of instance of subclass of :obj:`obj_tables.Attribute` !Name a-z, A-Z, 0-9, _, :, >, ., -, [, ], or ' ' :obj:`obj_tables.Attribute` !Type ``Attribute`` Parent class !Parent Name of the parent class Subclass of :obj:`obj_tables.Attribute` !Format ``Boolean`, ``Float`, ``String``, etc. :obj:`obj_tables.Attribute.verbose_name` !Verbose name String Y :obj:`obj_tables.Attribute.verbose_name_plural` !Verbose name plural String Y :obj:`obj_tables.Attribute.description` !Description String Y =========================================================== ==================== ========================================== ======== Args: filename (:obj:`str`): path to out_filename (:obj:`str`, optional): path to save schema Returns: :obj:`tuple`: * :obj:`types.ModuleType`: module with classes * :obj:`str`: schema name Raises: :obj:`ValueError`: if schema specification is not in a supported format, an XLSX schema file does not contain a worksheet with the name ``!!_Schema`` which specifies the schema, the class inheritance structure is cyclic, or the schema specification is invalid (e.g., a class is defined multiple defined) """ from obj_tables.io import WorkbookReader base, ext = os.path.splitext(filename) if ext in ['.xlsx']: sheet_name = '!!' + SCHEMA_SHEET_NAME elif ext in ['.csv', '.tsv']: if '*' in filename: sheet_name = '!!' + SCHEMA_SHEET_NAME else: sheet_name = '' else: raise ValueError('{} format is not supported.'.format(ext)) wb = wc_utils.workbook.io.read(filename) if sheet_name not in wb: raise ValueError( 'Schema file must contain a sheet with name "{}".'.format( sheet_name)) ws = wb[sheet_name] name_col_name = '!Name' type_col_name = '!Type' parent_col_name = '!Parent' format_col_name = '!Format' verbose_name_col_name = '!Verbose name' verbose_name_plural_col_name = '!Verbose name plural' desc_col_name = '!Description' col_names = [ name_col_name, type_col_name, parent_col_name, format_col_name, verbose_name_col_name, verbose_name_plural_col_name, desc_col_name, ] class_type = 'Class' attr_type = 'Attribute' rows = ws doc_metadata, model_metadata, _ = WorkbookReader.read_worksheet_metadata( sheet_name, rows) doc_schema_name = doc_metadata.get('schema', None) schema_schema_name = model_metadata.get('name', None) assert not doc_schema_name or not schema_schema_name or doc_schema_name == schema_schema_name, \ "Schema names must be None or equal" schema_name = doc_schema_name or schema_schema_name module_name = schema_name or rand_schema_name() if model_metadata.get('type', None) != SCHEMA_TABLE_TYPE: raise ValueError( "The type of the schema must be '{}'.".format(SCHEMA_TABLE_TYPE)) # parse model specifications header_row = rows[0] rows = rows[1:] if name_col_name not in header_row: raise ValueError('Schema must have column "{}"'.format(name_col_name)) if type_col_name not in header_row: raise ValueError('Schema must have column "{}"'.format(type_col_name)) if parent_col_name not in header_row: raise ValueError( 'Schema must have column "{}"'.format(parent_col_name)) if format_col_name not in header_row: raise ValueError( 'Schema must have column "{}"'.format(format_col_name)) extra_headers = set(header_row) - set(col_names) if extra_headers: raise ValueError('Schema has unrecognized columns:\n {}'.format( '\n '.join(natsorted(extra_headers, alg=ns.IGNORECASE)))) cls_specs = {} explicit_model_names = [] implicit_model_names = [] for i_row, row_list in enumerate(rows): # ignore empty rows if all(cell in [None, ''] for cell in row_list): continue # ignore comment rows if len(row_list) == 1 and isinstance( row_list[0], str) and row_list[0].startswith( '%/') and row_list[0].endswith('/%'): continue # convert cells to strings for i_cell, cell in enumerate(row_list): if cell is not None and not isinstance(cell, str): row_list[i_cell] = str(cell) row = {} for header, cell in zip(header_row, row_list): row[header] = cell if row[type_col_name] == class_type: cls_name = row[name_col_name] if not cls_name: raise ValueError( 'Class at row {} of the schema must have a name'.format( i_row + 1)) if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', cls_name): raise ValueError( ("Invalid class name '{}' at row {} of the schema. " "Class names must start with a letter or underscore, " "and consist of letters, numbers, and underscores." ).format(cls_name, i_row + 1)) if cls_name in cls_specs: cls = cls_specs[cls_name] if cls['explictly_defined']: raise ValueError( 'Class "{}" can only be defined once in the schema.'. format(cls_name)) cls['explictly_defined'] = True else: cls = cls_specs[cls_name] = { 'super_class': None, 'name': cls_name, 'attrs': {}, 'attr_order': [], 'explictly_defined': True, } if row[parent_col_name]: cls['super_class'] = row[parent_col_name] if (row[format_col_name] or 'row') not in TableFormat.__members__: raise ValueError( "Invalid class format '{}' at row {} of the schema".format( row[format_col_name], i_row + 1)) cls['tab_format'] = TableFormat[row[format_col_name] or 'row'] def_verbose_name = cls_name cls['verbose_name'] = row.get(verbose_name_col_name, def_verbose_name) or def_verbose_name if row.get(verbose_name_col_name, None): def_plural_verbose_name = inflect.engine().plural( row[verbose_name_col_name]) else: def_plural_verbose_name = cls_name cls['verbose_name_plural'] = row.get( verbose_name_plural_col_name, def_plural_verbose_name) or def_plural_verbose_name cls['desc'] = row.get(desc_col_name, None) or None explicit_model_names.append(cls_name) elif row[type_col_name] == attr_type: cls_name = row[parent_col_name] if not cls_name: raise ValueError( 'Parent class of attribute at row {} must be defined'. format(i_row + 1)) if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', cls_name): raise ValueError(( "Parent class of attribute at row {} of the schema has an invalid name '{}'. " "Class names must start with a letter or underscore, " "and consist of letters, numbers, and underscores." ).format(i_row + 1, cls_name)) if cls_name in cls_specs: cls = cls_specs[cls_name] else: cls = cls_specs[cls_name] = { 'explictly_defined': False, 'super_class': None, 'name': cls_name, 'attrs': {}, 'attr_order': [], 'tab_format': TableFormat.row, 'verbose_name': cls_name, 'verbose_name_plural': cls_name, 'desc': None, } implicit_model_names.append(cls_name) attr_name = row[name_col_name] if not attr_name: raise ValueError( 'Attribute at row {} of the schema must have a name'. format(i_row + 1)) if not re.match(r'^[a-zA-Z_:>\.\- \[\]][a-zA-Z0-9_:>\.\- \[\]]*$', attr_name): raise ValueError( ("Invalid attribute name '{}' at row {} of the schema. " "Attribute names must consist of alphanumeric " "characters, underscores, colons, forward carets, " "dots, dashes, square brackets, and spaces and " "begin with a non-numeric character.").format( attr_name, i_row + 1)) attr_name = re.sub(r'[^a-zA-Z0-9_]', '_', attr_name) attr_name = stringcase.snakecase(attr_name) attr_name = re.sub(r'_+', '_', attr_name) if attr_name == 'Meta': raise ValueError( '"{}" cannot have attribute with name "Meta" at row {} of the schema.' .format(cls_name, i_row + 1) ) # pragma: no cover # unreachable because snake case is all lowercase if attr_name in cls['attrs']: raise ValueError( 'Attribute "{}" of "{}" can only be defined once.'.format( row[name_col_name], cls_name)) cls['attrs'][attr_name] = { 'name': attr_name, 'type': row[format_col_name], 'desc': row.get(desc_col_name, None), 'verbose_name': row.get(verbose_name_col_name, row[name_col_name]) } cls['attr_order'].append(attr_name) else: if row[type_col_name]: raise ValueError( 'Type "{}" is not supported at row {} of the schema.'. format(row[type_col_name], i_row + 1)) else: raise ValueError( 'Type must be defined at row {} of the schema.'.format( row[type_col_name], i_row + 1)) # check that the inheritance graph is valid (i.e. acyclic) inheritance_graph = networkx.DiGraph() sub_classes = {'obj_tables.Model': []} for cls_name, cls_spec in cls_specs.items(): if cls_spec['super_class']: if cls_spec['super_class'] not in cls_specs: raise ValueError( 'Superclass "{}" for class "{}" must be defined'.format( cls_spec['super_class'], cls_name)) inheritance_graph.add_edge(cls_spec['super_class'], cls_name) if cls_spec['super_class'] not in sub_classes: sub_classes[cls_spec['super_class']] = [] sub_classes[cls_spec['super_class']].append(cls_name) else: inheritance_graph.add_edge('obj_tables.Model', cls_name) sub_classes['obj_tables.Model'].append(cls_name) if list(networkx.simple_cycles(inheritance_graph)): raise ValueError('The schema inheritance graph must be acyclic.') # create classes module = type(module_name, (types.ModuleType, ), {}) all_attrs = get_attrs() classes_to_construct = list(sub_classes['obj_tables.Model']) while classes_to_construct: cls_name = classes_to_construct.pop() cls_spec = cls_specs[cls_name] # if not cls_spec['explictly_defined']: # raise ValueError('Class "{}" is not defined in the schema'.format(cls_name)) classes_to_construct.extend(sub_classes.get(cls_name, [])) meta_attrs = { 'table_format': cls_spec['tab_format'], 'attribute_order': tuple(cls_spec['attr_order']), 'description': cls_spec['desc'], } if cls_spec['verbose_name']: meta_attrs['verbose_name'] = cls_spec['verbose_name'] if cls_spec['verbose_name_plural']: meta_attrs['verbose_name_plural'] = cls_spec['verbose_name_plural'] attrs = { '__module__': module_name, '__doc__': cls_spec['desc'], 'Meta': type('Meta', (Model.Meta, ), meta_attrs), } for attr_spec in cls_spec['attrs'].values(): attr_type_spec, _, args = attr_spec['type'].partition('(') if attr_type_spec not in all_attrs: raise ValueError( 'Attribute "{}" is not defined in the schema'.format( attr_type_spec)) attr_type = all_attrs[attr_type_spec] attr_spec['python_type'] = attr_type_spec + 'Attribute' if args: attr_spec['python_args'] = args[0:-1] if attr_spec['verbose_name']: attr_spec['python_args'] += ", verbose_name='{}'".format( attr_spec['verbose_name'].replace("'", "\\'")) else: attr_spec['python_args'] = '' if attr_spec['verbose_name']: attr_spec['python_args'] = "verbose_name='{}'".format( attr_spec['verbose_name'].replace("'", "\\'")) if args: attr = eval('func(' + args, {}, {'func': attr_type}) else: attr = attr_type() attr.verbose_name = attr_spec['verbose_name'] attr.description = attr_spec['desc'] attrs[attr_spec['name']] = attr if cls_spec['super_class'] is None or cls_spec[ 'super_class'] == 'obj_tables.Model': super_class = Model else: super_class = getattr(module, cls_spec['super_class']) cls = type(cls_spec['name'], (super_class, ), attrs) setattr(module, cls_spec['name'], cls) # optionally, generate a Python file if out_filename: with open(out_filename, 'w') as file: # print documentation file.write( '# Schema automatically generated at {:%Y-%m-%d %H:%M:%S}\n\n'. format(datetime.now())) # print import statements imported_modules = set(['obj_tables']) for cls_spec in cls_specs.values(): for attr_spec in cls_spec['attrs'].values(): imported_modules.add( 'obj_tables.' + attr_spec['python_type'].rpartition('.')[0]) if 'obj_tables.' in imported_modules: imported_modules.remove('obj_tables.') for imported_module in imported_modules: file.write('import {}\n'.format(imported_module)) # print definition of * import behavior file.write('\n') file.write('\n') file.write('__all__ = [\n') file.write(''.join(" '{}',\n".format(cls_name) for cls_name in sorted(cls_specs.keys()))) file.write(']\n') # print class definitions classes_to_define = list(sub_classes['obj_tables.Model']) while classes_to_define: cls_name = classes_to_define.pop(0) cls_spec = cls_specs[cls_name] classes_to_define.extend(sub_classes.get(cls_name, [])) if cls_spec['super_class']: super_class = cls_spec['super_class'] else: super_class = 'obj_tables.Model' file.write('\n') file.write('\n') file.write('class {}({}):\n'.format(cls_spec['name'], super_class)) if cls_spec['desc']: file.write(' """ {} """\n\n'.format(cls_spec['desc'])) for attr_name in cls_spec['attr_order']: attr_spec = cls_spec['attrs'][attr_name] file.write(' {} = obj_tables.{}({})\n'.format( attr_spec['name'], attr_spec['python_type'], attr_spec['python_args'])) file.write('\n') file.write(' class Meta(obj_tables.Model.Meta):\n') file.write( " table_format = obj_tables.TableFormat.{}\n". format(cls_spec['tab_format'].name)) file.write(" attribute_order = (\n{} )\n".format( "".join(" '{}',\n".format(attr) for attr in cls_spec['attr_order']))) if cls_spec['verbose_name']: file.write(" verbose_name = '{}'\n".format( cls_spec['verbose_name'].replace("'", "\\'"))) if cls_spec['verbose_name_plural']: file.write(" verbose_name_plural = '{}'\n".format( cls_spec['verbose_name_plural'].replace("'", "\\'"))) if cls_spec['desc']: file.write(" description = '{}'\n".format( cls_spec['desc'].replace("'", "\\'"))) # get models in order of their definition model_names = det_dedupe(explicit_model_names + implicit_model_names) models = [getattr(module, model_name) for model_name in model_names] # return the created module and its name return (module, schema_name, models)
def main(): # Command line arguments parser = argparse.ArgumentParser( description='Extract model subsets from the National Hydrologic Model') parser.add_argument('-O', '--output_dir', help='Output directory for subset') parser.add_argument('-p', '--param_filename', help='Name of output parameter file') parser.add_argument('-s', '--streamflow_filename', help='Name of streamflow data file') parser.add_argument('-P', '--paramdb_dir', help='Location of parameter database') parser.add_argument('-M', '--merged_paramdb_dir', help='Location of merged parameter database') parser.add_argument('-C', '--cbh_dir', help='Location of CBH files') parser.add_argument('-g', '--geodatabase_filename', help='Full path to NHM geodatabase') parser.add_argument('-j', '--job', help='Job directory to work in') parser.add_argument('-v', '--verbose', help='Output additional information', action='store_true') parser.add_argument('--check_DAG', help='Verify the streamflow network', action='store_true') parser.add_argument('--output_cbh', help='Output CBH files for subset', action='store_true') parser.add_argument('--output_shapefiles', help='Output shapefiles for subset', action='store_true') parser.add_argument('--output_streamflow', help='Output streamflows for subset', action='store_true') parser.add_argument('--cbh_netcdf', help='Enable netCDF output for CBH files', action='store_true') parser.add_argument('--param_netcdf', help='Enable netCDF output for parameter file', action='store_true') parser.add_argument( '--add_gages', metavar="KEY=VALUE", nargs='+', help= 'Add arbitrary streamgages to POIs of form gage_id=segment. Segment must exist in the model subset. Additional streamgages are marked as poi_type=0.' ) parser.add_argument( '--no_filter_params', help='Output all parameters regardless of modules selected', action='store_true') args = parser.parse_args() stdir = os.getcwd() # TODO: Add to command line arguments single_poi = False if args.job: if os.path.exists(args.job): # Change into job directory before running extraction os.chdir(args.job) # print('Working in directory: {}'.format(args.job)) else: print('ERROR: Invalid jobs directory: {}'.format(args.job)) exit(-1) # Setup the logging bandit_log = logging.getLogger('bandit') bandit_log.setLevel(logging.DEBUG) log_fmt = logging.Formatter('%(levelname)s: %(name)s: %(message)s') # Handler for file logs flog = logging.FileHandler('bandit.log') flog.setLevel(logging.DEBUG) flog.setFormatter(log_fmt) # Handler for console logs clog = logging.StreamHandler() clog.setLevel(logging.ERROR) clog.setFormatter(log_fmt) bandit_log.addHandler(flog) bandit_log.addHandler(clog) bandit_log.info('========== START {} =========='.format( datetime.datetime.now().isoformat())) addl_gages = None if args.add_gages: addl_gages = parse_gages(args.add_gages) bandit_log.info('Additionals streamgages specified on command line') config = bc.Cfg('bandit.cfg') # Override configuration variables with any command line parameters for kk, vv in iteritems(args.__dict__): if kk not in [ 'job', 'verbose', 'cbh_netcdf', 'add_gages', 'param_netcdf', 'no_filter_params' ]: if vv: bandit_log.info( 'Overriding configuration for {} with {}'.format(kk, vv)) config.update_value(kk, vv) # Where to output the subset outdir = config.output_dir # The control file to use control_filename = config.control_filename # What to name the output parameter file param_filename = config.param_filename # Location of the NHM parameter database paramdb_dir = config.paramdb_dir # Location of the merged parameter database merged_paramdb_dir = config.merged_paramdb_dir streamgage_file = config.streamgage_file # List of outlets # dsmost_seg = config.outlets # List of upstream cutoffs # uscutoff_seg = config.cutoffs # List of additional HRUs (have no route to segment within subset) # hru_noroute = config.hru_noroute # List of output variables to sbuset try: include_model_output = config.include_model_output output_vars_dir = config.output_vars_dir output_vars = config.output_vars except KeyError: include_model_output = False # Control what is checked and output for subset check_dag = config.check_DAG try: output_cbh = config.output_cbh # Location of the NHM CBH files cbh_dir = config.cbh_dir except KeyError: output_cbh = False try: output_streamflow = config.output_streamflow # What to name the streamflow output file obs_filename = config.streamflow_filename except KeyError: output_streamflow = False try: output_shapefiles = config.output_shapefiles # Full path and filename to the geodatabase to use for outputting shapefile subsets geo_file = config.geodatabase_filename except KeyError: output_shapefiles = False # Load the control file ctl = ControlFile(control_filename) if ctl.has_dynamic_parameters: if config.dyn_params_dir: if os.path.exists(config.dyn_params_dir): dyn_params_dir = config.dyn_params_dir else: bandit_log.error('dyn_params_dir: {}, does not exist.'.format( config.dyn_params_dir)) exit(2) else: bandit_log.error( 'Control file has dynamic parameters but dyn_params_dir is not specified in the config file' ) exit(2) # Load master list of valid parameters vpdb = ValidParams() # Build list of parameters required for the selected control file modules required_params = vpdb.get_params_for_modules(modules=ctl.modules.values()) # TODO: make sure dynamic parameter filenames are correct # Write an updated control file # ctl.write('somefile') # Date range for pulling NWIS streamgage observations if isinstance(config.start_date, datetime.date): st_date = config.start_date else: st_date = datetime.datetime( *[int(x) for x in re.split('-| |:', config.start_date)]) if isinstance(config.end_date, datetime.date): en_date = config.end_date else: en_date = datetime.datetime( *[int(x) for x in re.split('-| |:', config.end_date)]) # =============================================================== params_file = '{}/{}'.format(merged_paramdb_dir, PARAMETERS_XML) # Output revision of NhmParamDb and the revision used by merged paramdb nhmparamdb_revision = git_version(paramdb_dir) bandit_log.info('Parameters based on NhmParamDb revision: {}'.format( nhmparamdb_revision)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Read hru_nhm_to_local and hru_nhm_to_region # Create segment_nhm_to_local and segment_nhm_to_region # TODO: since hru_nhm_to_region and nhru_nhm_to_local are only needed for # CBH files we should 'soft-fail' if the files are missing and just # output a warning and turn off CBH output if it was selected. # hru_nhm_to_region = get_parameter('{}/hru_nhm_to_region.msgpack'.format(cbh_dir)) # hru_nhm_to_local = get_parameter('{}/hru_nhm_to_local.msgpack'.format(cbh_dir)) # Load the NHMparamdb print('Loading NHM ParamDb') pdb = ParamDb(merged_paramdb_dir) nhm_params = pdb.parameters nhm_global_dimensions = pdb.dimensions # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get tosegment_nhm # NOTE: tosegment is now tosegment_nhm and the regional tosegment is gone. tosegment = nhm_params.get('tosegment').data nhm_seg = nhm_params.get('nhm_seg').data if args.verbose: print('Generating stream network from tosegment_nhm') # Build the stream network dag_ds = nx.DiGraph() for ii, vv in enumerate(tosegment): # dag_ds.add_edge(ii+1, vv) if vv == 0: dag_ds.add_edge(ii + 1, 'Out_{}'.format(ii + 1)) else: dag_ds.add_edge(ii + 1, vv) # nx.draw_networkx(dag_ds) bandit_log.debug('Number of NHM downstream nodes: {}'.format( dag_ds.number_of_nodes())) bandit_log.debug('Number of NHM downstream edges: {}'.format( dag_ds.number_of_edges())) if check_dag: if not nx.is_directed_acyclic_graph(dag_ds): bandit_log.error('Cycles and/or loops found in stream network') for xx in nx.simple_cycles(dag_ds): bandit_log.error('Cycle found for segment {}'.format(xx)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build dictionary which maps poi_gage_id to poi_gage_segment # poi_gage_segment_tmp = get_parameter('{}/poi_gage_segment.msgpack'.format(merged_paramdb_dir))['data'] # poi_gage_id_tmp = get_parameter('{}/poi_gage_id.msgpack'.format(merged_paramdb_dir))['data'] poi_gage_segment_tmp = nhm_params.get('poi_gage_segment').data poi_gage_id_tmp = nhm_params.get('poi_gage_id').data # Create dictionary to lookup nhm_segment for a given poi_gage_id poi_id_to_seg = dict(zip(poi_gage_id_tmp, poi_gage_segment_tmp)) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Read streamgage ids from file - one streamgage id per row with open(streamgage_file, 'r') as fhdl: streamgages = fhdl.read().splitlines() # ===================================== # dag_ds should not change below here # For each streamgage: # 1) lookup nhm_segment (if any) and use as outlet # 2) create output directory # 3) subset the stream network, HRUs, params, etc uscutoff_seg = [] for sg in streamgages: print('Working on streamgage {}'.format(sg)) while True: # Create the upstream graph dag_us = dag_ds.reverse() bandit_log.debug('Number of NHM upstream nodes: {}'.format( dag_us.number_of_nodes())) bandit_log.debug('Number of NHM upstream edges: {}'.format( dag_us.number_of_edges())) # Trim the u/s graph to remove segments above the u/s cutoff segments try: for xx in uscutoff_seg: try: dag_us.remove_nodes_from( nx.dfs_predecessors(dag_us, xx)) # Also remove the cutoff segment itself dag_us.remove_node(xx) except KeyError: print( 'WARNING: nhm_segment {} does not exist in stream network' .format(xx)) except TypeError: bandit_log.error( '\nSelected cutoffs should at least be an empty list instead of NoneType. ({})' .format(outdir)) exit(200) bandit_log.debug( 'Number of NHM upstream nodes (trimmed): {}'.format( dag_us.number_of_nodes())) bandit_log.debug( 'Number of NHM upstream edges (trimmed): {}'.format( dag_us.number_of_edges())) # Lookup the outlet for the current streamgage try: dsmost_seg = [poi_id_to_seg[sg]] if dsmost_seg[0] == 0: # POI stream segment was never properly assigned in paramdb bandit_log.error( 'Streamgage {} has segment = 0. Skipping.'.format(sg)) break elif len(dsmost_seg) > 1: # Should never have more than one segment per streamgage bandit_log.info( 'Streamgage {} has more than one stream segment.'. format(sg)) break except KeyError: bandit_log.error( 'Streamgage {} does not exist in poi_gage_id'.format(sg)) break sg_dir = '{}/{}'.format(outdir, sg) try: os.makedirs(sg_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise else: pass # ======================================= # Given a d/s segment (dsmost_seg) create a subset of u/s segments if args.verbose: print('\tExtracting model subset') # Get all unique segments u/s of the starting segment uniq_seg_us = set() if dsmost_seg: for xx in dsmost_seg: try: pred = nx.dfs_predecessors(dag_us, xx) uniq_seg_us = uniq_seg_us.union( set(pred.keys()).union(set(pred.values()))) except KeyError: bandit_log.error( 'KeyError: Segment {} does not exist in stream network' .format(xx)) # print('\nKeyError: Segment {} does not exist in stream network'.format(xx)) # Get a subgraph in the dag_ds graph and return the edges dag_ds_subset = dag_ds.subgraph(uniq_seg_us).copy() # 2018-02-13 PAN: It is possible to have outlets specified which are not truly # outlets in the most conservative sense (e.g. a point where # the stream network exits the study area). This occurs when # doing headwater extractions where all segments for a headwater # are specified in the configuration file. Instead of creating # output edges for all specified 'outlets' the set difference # between the specified outlets and nodes in the graph subset # which have no edges is performed first to reduce the number of # outlets to the 'true' outlets of the system. node_outlets = [ee[0] for ee in dag_ds_subset.edges()] true_outlets = set(dsmost_seg).difference(set(node_outlets)) bandit_log.debug('node_outlets: {}'.format(','.join( map(str, node_outlets)))) bandit_log.debug('true_outlets: {}'.format(','.join( map(str, true_outlets)))) # Add the downstream segments that exit the subgraph for xx in true_outlets: dag_ds_subset.add_edge(xx, 'Out_{}'.format(xx)) else: # No outlets specified so pull the CONUS dag_ds_subset = dag_ds # Create list of toseg ids for the model subset try: # networkx 1.x toseg_idx = list( set(xx[0] for xx in dag_ds_subset.edges_iter())) except AttributeError: # networkx 2.x toseg_idx = list(set(xx[0] for xx in dag_ds_subset.edges)) toseg_idx0 = [xx - 1 for xx in toseg_idx] # 0-based version of toseg_idx bandit_log.info('Number of segments in subset: {}'.format( len(toseg_idx))) # NOTE: With monolithic nhmParamDb files hru_segment becomes hru_segment_nhm and the regional hru_segments are gone. # 2019-09-16 PAN: This initially assumed hru_segment in the monolithic paramdb was ALWAYS # ordered 1..nhru. This is not always the case so the nhm_id parameter # needs to be loaded and used to map the nhm HRU ids to their # respective indices. hru_segment = nhm_params.get('hru_segment').data nhm_id = nhm_params.get('nhm_id').data nhm_id_to_idx = {} for ii, vv in enumerate(nhm_id): # keys are 1-based, values are 0-based nhm_id_to_idx[vv] = ii bandit_log.info('Number of NHM hru_segment entries: {}'.format( len(hru_segment))) # Create a dictionary mapping segments to HRUs seg_to_hru = {} for ii, vv in enumerate(hru_segment): # keys are 1-based, values in arrays are 1-based seg_to_hru.setdefault(vv, []).append(ii + 1) # Get HRU ids ordered by the segments in the model subset - entries are 1-based hru_order_subset = [] for xx in toseg_idx: if xx in seg_to_hru: for yy in seg_to_hru[xx]: hru_order_subset.append(yy) else: bandit_log.warning( 'Stream segment {} has no HRUs connected to it.'. format(xx)) # raise ValueError('Stream segment has no HRUs connected to it.') # Append the additional non-routed HRUs to the list # if len(hru_noroute) > 0: # for xx in hru_noroute: # if hru_segment[xx-1] == 0: # bandit_log.info('User-supplied HRU {} is not connected to any stream segment'.format(xx)) # hru_order_subset.append(xx) # else: # bandit_log.error('User-supplied HRU {} routes to stream segment {} - Skipping.'.format(xx, # hru_segment[xx-1])) hru_order_subset0 = [xx - 1 for xx in hru_order_subset] bandit_log.info('Number of HRUs in subset: {}'.format( len(hru_order_subset))) # Use hru_order_subset to pull selected indices for parameters with nhru dimensions # hru_order_subset contains the in-order indices for the subset of hru_segments # toseg_idx contains the in-order indices for the subset of tosegments # Renumber the tosegment list new_tosegment = [] # Map old DAG_subds indices to new for xx in toseg_idx: if list(dag_ds_subset.neighbors(xx))[0] in toseg_idx: new_tosegment.append( toseg_idx.index(list(dag_ds_subset.neighbors(xx))[0]) + 1) else: # Outlets should be assigned zero new_tosegment.append(0) # Renumber the hru_segments for the subset new_hru_segment = [] for xx in toseg_idx: # if DAG_subds.neighbors(xx)[0] in toseg_idx: if xx in seg_to_hru: for _ in seg_to_hru[xx]: # The new indices should be 1-based from PRMS new_hru_segment.append(toseg_idx.index(xx) + 1) # Append zeroes to new_hru_segment for each additional non-routed HRU # if len(hru_noroute) > 0: # for xx in hru_noroute: # if hru_segment[xx-1] == 0: # new_hru_segment.append(0) bandit_log.info('Size of hru_segment for subset: {}'.format( len(new_hru_segment))) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Subset hru_deplcrv hru_deplcrv = nhm_params.get('hru_deplcrv').data bandit_log.info('Size of NHM hru_deplcrv: {}'.format( len(hru_deplcrv))) # Get subset of hru_deplcrv using hru_order # A single snarea_curve can be referenced by multiple HRUs hru_deplcrv_subset = np.array(hru_deplcrv)[ tuple(hru_order_subset0), ] uniq_deplcrv = list(set(hru_deplcrv_subset)) uniq_deplcrv0 = [xx - 1 for xx in uniq_deplcrv] # Create new hru_deplcrv and renumber new_hru_deplcrv = [ uniq_deplcrv.index(cc) + 1 for cc in hru_deplcrv_subset ] bandit_log.info('Size of hru_deplcrv for subset: {}'.format( len(new_hru_deplcrv))) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Subset poi_gage_segment new_poi_gage_segment = [] new_poi_gage_id = [] new_poi_type = [] if nhm_params.exists('poi_gage_segment'): poi_gage_segment = nhm_params.get('poi_gage_segment').tolist() bandit_log.info('Size of NHM poi_gage_segment: {}'.format( len(poi_gage_segment))) poi_gage_id = nhm_params.get('poi_gage_id').data poi_type = nhm_params.get('poi_type').data # We want to get the indices of the poi_gage_segments that match the # segments that are part of the subset. We can then use these # indices to subset poi_gage_id and poi_type. # The poi_gage_segment will need to be renumbered for the subset of segments. # To subset poi_gage_segment we have to lookup each segment in the subset # Reset the cutoff list uscutoff_seg = [] # for ss in uniq_seg_us: try: # networkx 1.x for ss in nx.nodes_iter(dag_ds_subset): if ss in poi_gage_segment: new_poi_gage_segment.append( toseg_idx.index(ss) + 1) new_poi_gage_id.append( poi_gage_id[poi_gage_segment.index(ss)]) new_poi_type.append( poi_type[poi_gage_segment.index(ss)]) except AttributeError: # networkx 2.x for ss in dag_ds_subset.nodes: if ss in poi_gage_segment: new_poi_gage_segment.append( toseg_idx.index(ss) + 1) new_poi_gage_id.append( poi_gage_id[poi_gage_segment.index(ss)]) new_poi_type.append( poi_type[poi_gage_segment.index(ss)]) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add any valid user-specified streamgage, nhm_seg pairs if addl_gages: for ss, vv in iteritems(addl_gages): if ss in new_poi_gage_id: idx = new_poi_gage_id.index(ss) bandit_log.warning( 'Existing NHM POI, {}, overridden on commandline (was {}, now {})' .format(ss, new_poi_gage_segment[idx], toseg_idx.index(vv) + 1)) new_poi_gage_segment[idx] = toseg_idx.index(vv) + 1 new_poi_type[idx] = 0 elif toseg_idx.index(vv) + 1 in new_poi_gage_segment: sidx = new_poi_gage_segment.index( toseg_idx.index(vv) + 1) bandit_log.warning( 'User-specified streamgage ({}) has same nhm_seg ({}) as existing POI ({}), replacing streamgage ID' .format(ss, toseg_idx.index(vv) + 1, new_poi_gage_id[sidx])) new_poi_gage_id[sidx] = ss new_poi_type[sidx] = 0 elif vv not in seg_to_hru.keys(): bandit_log.warning( 'User-specified streamgage ({}) has nhm_seg={} which is not part of the model subset - Skipping.' .format(ss, vv)) else: new_poi_gage_id.append(ss) new_poi_gage_segment.append( toseg_idx.index(vv) + 1) new_poi_type.append(0) bandit_log.info( 'Added user-specified POI streamgage ({}) at nhm_seg={}' .format(ss, vv)) # ================================================================== # ================================================================== # Process the parameters and create a parameter file for the subset params = list(nhm_params.keys()) # Remove the POI-related parameters if we have no POIs if len(new_poi_gage_segment) == 0: bandit_log.warning( 'No POI gages found for subset; removing POI-related parameters.' ) for rp in ['poi_gage_id', 'poi_gage_segment', 'poi_type']: # params.pop(rp, None) try: params.remove(rp) except ValueError: print('ERROR: unable to remove {}'.format(rp)) pass params.sort() dims = {} for kk in nhm_global_dimensions.values(): dims[kk.name] = kk.size # Resize dimensions to the model subset crap_dims = dims.copy() # need a copy since we modify dims for dd, dv in iteritems(crap_dims): # dimensions 'nmonths' and 'one' are never changed if dd in HRU_DIMS: dims[dd] = len(hru_order_subset0) elif dd == 'nsegment': dims[dd] = len(toseg_idx0) elif dd == 'ndeplval': dims[dd] = len(uniq_deplcrv0) * 11 # if 'ndepl' not in dims: dims['ndepl'] = len(uniq_deplcrv0) elif dd == 'npoigages': dims[dd] = len(new_poi_gage_segment) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build a ParameterSet for output # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ new_ps = ParameterSet() for dd, dv in iteritems(dims): new_ps.dimensions.add(dd, dv) if dd == 'npoigages': # 20170217 PAN: nobs is missing from the paramdb but is necessary new_ps.dimensions.add('nobs', dv) new_params = list(required_params) # WARNING: 2019-04-23 PAN # Very hacky way to remove parameters that shouldn't always get # included. Need to figure out a better way. check_list = [ 'basin_solsta', 'gvr_hru_id', 'hru_solsta', 'humidity_percent', 'irr_type', 'obsout_segment', 'rad_conv', 'rain_code', 'hru_lon' ] for xx in check_list: if xx in new_params: if xx in ['basin_solsta', 'hru_solsta', 'rad_conv']: if not new_ps.dimensions.exists('nsol'): new_params.remove(xx) elif new_ps.dimensions.get('nsol') == 0: new_params.remove(xx) elif xx == 'humidity_percent': if not new_ps.dimensions.exists('nhumid'): new_params.remove(xx) elif new_ps.dimensions.get('nhumid') == 0: new_params.remove(xx) elif xx == 'irr_type': if not new_ps.dimensions.exists('nwateruse'): new_params.remove(xx) elif new_ps.dimensions.get('nwateruse') == 0: new_params.remove(xx) elif xx == 'gvr_hru_id': if ctl.get('mapOutON_OFF').values == 0: new_params.remove(xx) elif xx in [ 'hru_lat', 'hru_lon', ]: if not nhm_params.exists(xx): new_params.remove(xx) new_params.sort() for pp in params: if pp in new_params or args.no_filter_params: cparam = nhm_params.get(pp).tostructure() new_ps.parameters.add(cparam['name']) ndims = len(cparam['dimensions']) if args.verbose: sys.stdout.write( '\r ') sys.stdout.write('\rProcessing {} '.format( cparam['name'])) sys.stdout.flush() # Get order of dimensions and total size for parameter dim_order = [None] * ndims for dd, dv in iteritems(cparam['dimensions']): dim_order[dv['position']] = dd for dd in dim_order: # self.parameters.get(varname).dimensions.add(dd, self.dimensions.get(dd).size) new_ps.parameters.get(cparam['name']).dimensions.add( dd, new_ps.dimensions.get(dd).size) new_ps.parameters.get( cparam['name']).datatype = cparam['datatype'] first_dimension = dim_order[0] if ndims == 2: second_dimension = dim_order[1] # Write out the data for the parameter if ndims == 1: # 1D Parameters if first_dimension == 'one': outdata = np.array(cparam['data']) elif first_dimension == 'nsegment': if pp in ['tosegment']: outdata = np.array(new_tosegment) else: outdata = np.array( cparam['data'])[tuple(toseg_idx0), ] elif first_dimension == 'ndeplval': # This is really a 2D in disguise, however, it is stored in C-order unlike # other 2D arrays outdata = np.array(cparam['data']).reshape( (-1, 11))[tuple(uniq_deplcrv0), :] elif first_dimension == 'npoigages': if pp == 'poi_gage_segment': outdata = np.array(new_poi_gage_segment) elif pp == 'poi_gage_id': outdata = np.array(new_poi_gage_id) elif pp == 'poi_type': outdata = np.array(new_poi_type) else: bandit_log.error( 'Unkown parameter, {}, with dimensions {}'. format(pp, first_dimension)) elif first_dimension in HRU_DIMS: if pp == 'hru_deplcrv': outdata = np.array(new_hru_deplcrv) elif pp == 'hru_segment': outdata = np.array(new_hru_segment) else: outdata = np.array( cparam['data'])[tuple(hru_order_subset0), ] else: bandit_log.error( 'No rules to handle dimension {}'.format( first_dimension)) elif ndims == 2: # 2D Parameters outdata = np.array(cparam['data']).reshape( (-1, dims[second_dimension]), order='F') if first_dimension == 'nsegment': outdata = outdata[tuple(toseg_idx0), :] elif first_dimension in HRU_DIMS: outdata = outdata[tuple(hru_order_subset0), :] else: bandit_log.error( 'No rules to handle 2D parameter, {}, which contains dimension {}' .format(pp, first_dimension)) # Convert outdata to a list for writing if first_dimension == 'ndeplval': outlist = outdata.ravel().tolist() else: outlist = outdata.ravel(order='F').tolist() new_ps.parameters.get(cparam['name']).data = outlist # Write the new parameter file header = [ 'Written by Bandit version {}'.format(__version__), 'NhmParamDb revision: {}'.format(nhmparamdb_revision) ] if args.param_netcdf: base_filename = os.path.splitext(param_filename)[0] param_filename = '{}.nc'.format(base_filename) new_ps.write_netcdf('{}/{}'.format(sg_dir, param_filename)) else: new_ps.write_parameter_file('{}/{}'.format( sg_dir, param_filename), header=header) ctl.get('param_file').values = param_filename if args.verbose: sys.stdout.write('\n') # sys.stdout.write('\r ') # sys.stdout.write('\r\tParameter file written: {}\n'.format('{}/{}'.format(outdir, param_filename))) sys.stdout.flush() # 2019-09-16 PAN: Nasty hack to handle parameter databases that may not have # a one-to-one match between index value and nhm_id. cparam = nhm_params.get('nhm_id').tostructure() hru_order_subset_nhm_id = np.array( cparam['data'])[tuple(hru_order_subset0), ].ravel( order='F').tolist() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write CBH files # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if output_cbh: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Subset the cbh files for the selected HRUs if len(hru_order_subset) > 0: if args.verbose: print('Processing CBH files') if os.path.splitext(cbh_dir)[1] == '.nc': cbh_hdl = CbhNetcdf(src_path=cbh_dir, st_date=st_date, en_date=en_date, nhm_hrus=hru_order_subset_nhm_id) # nhm_hrus=hru_order_subset) else: # Subset the hru_nhm_to_local mapping # TODO: This section will not work with the monolithic paramdb - remove hru_order_ss = OrderedDict() for kk in hru_order_subset: hru_order_ss[kk] = hru_nhm_to_local[kk] cbh_hdl = CbhAscii(src_path=cbh_dir, st_date=st_date, en_date=en_date, nhm_hrus=hru_order_subset, indices=hru_order_ss, mapping=hru_nhm_to_region) if args.cbh_netcdf: # Pull the filename prefix off of the first file found in the # source netcdf CBH directory. file_it = glob.iglob(cbh_dir) cbh_prefix = os.path.basename( next(file_it)).split('_')[0] cbh_outfile = '{}/{}.nc'.format(outdir, cbh_prefix) cbh_hdl.write_netcdf(cbh_outfile) ctl.get('tmax_day').values = os.path.basename( cbh_outfile) ctl.get('tmin_day').values = os.path.basename( cbh_outfile) ctl.get('precip_day').values = os.path.basename( cbh_outfile) else: cbh_hdl.write_ascii(pathname=sg_dir) # bandit_log.info('{} written to: {}'.format(vv, '{}/{}.cbh'.format(outdir, vv))) else: bandit_log.error('No HRUs associated with the segments') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write output variables # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 2019-08-07 PAN: first prototype for extractions of output variables if include_model_output: if len(hru_order_subset) > 0: try: os.makedirs(f'{sg_dir}/model_output') print( 'Creating directory model_output, for model output variables' ) except OSError: print( 'Using existing model_output directory for output variables' ) for vv in output_vars: if args.verbose: sys.stdout.write( '\r ' ) sys.stdout.write( f'\rProcessing output variable: {vv} ') sys.stdout.flush() filename = f'{output_vars_dir}/{vv}.nc' if vv[0:3] == 'seg': mod_out = ModelOutput(filename=filename, varname=vv, startdate=st_date, enddate=en_date, nhm_segs=toseg_idx) else: mod_out = ModelOutput( filename=filename, varname=vv, startdate=st_date, enddate=en_date, nhm_hrus=hru_order_subset_nhm_id) mod_out.write_csv(f'{sg_dir}/model_output') sys.stdout.write('\n') sys.stdout.flush() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write dynamic parameters # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if ctl.has_dynamic_parameters: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Add dynamic parameters for cparam in ctl.dynamic_parameters: param_name = 'dyn_{}'.format(cparam) input_file = '{}/{}.nc'.format(dyn_params_dir, param_name) output_file = '{}/{}.param'.format(sg_dir, param_name) if not os.path.exists(input_file): bandit_log.warning( 'WARNING: CONUS dynamic parameter file: {}, does not exist... skipping' .format(input_file)) else: if args.verbose: print( 'Writing dynamic parameter {}'.format(cparam)) mydyn = dyn_params.DynamicParameters( input_file, cparam, st_date, en_date, hru_order_subset_nhm_id) # mydyn = dyn_params.DynamicParameters(input_file, cparam, st_date, en_date, hru_order_subset) mydyn.read_netcdf() out_order = [kk for kk in hru_order_subset_nhm_id] # out_order = [kk for kk in hru_order_subset] for cc in ['day', 'month', 'year']: out_order.insert(0, cc) header = ' '.join(map(str, out_order)) # Output ASCII files out_ascii = open(output_file, 'w') out_ascii.write('{}\n'.format(cparam)) out_ascii.write('{}\n'.format(header)) out_ascii.write('####\n') mydyn.data.to_csv(out_ascii, columns=out_order, na_rep='-999', sep=' ', index=False, header=False, encoding=None, chunksize=50) out_ascii.close() # Write an updated control file to the output directory ctl.write('{}.bandit'.format('{}/{}'.format( sg_dir, control_filename))) if output_streamflow: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Download the streamgage information from NWIS if args.verbose: print( 'Downloading NWIS streamgage observations for {} stations' .format(len(new_poi_gage_id))) streamflow = prms_nwis.NWIS(gage_ids=new_poi_gage_id, st_date=st_date, en_date=en_date, verbose=args.verbose) streamflow.get_daily_streamgage_observations() streamflow.write_prms_data( filename='{}/{}'.format(sg_dir, obs_filename)) # ******************************************* # Create a shapefile of the selected HRUs if output_shapefiles: if args.verbose: print('-' * 40) print('Writing shapefiles for model subset') if not os.path.isdir(geo_file): bandit_log.error( 'File geodatabase, {}, does not exist. Shapefiles will not be created' .format(geo_file)) else: geo_shp = prms_geo.Geo(geo_file) # Create GIS sub-directory if it doesn't already exist gis_dir = '{}/GIS'.format(sg_dir) try: os.makedirs(gis_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise else: pass # Output a shapefile of the selected HRUs # print('\tHRUs') # geo_shp.select_layer('nhruNationalIdentifier') geo_shp.select_layer('nhru') geo_shp.write_shapefile( '{}/GIS/HRU_subset.shp'.format(sg_dir), 'hru_id_nat', hru_order_subset_nhm_id, included_fields=[ 'nhm_id', 'model_idx', 'region', 'hru_id_nat' ]) # geo_shp.write_shapefile3('{}/GIS/HRU_subset.gdb'.format(outdir), 'hru_id_nat', hru_order_subset) # geo_shp.filter_by_attribute('hru_id_nat', hru_order_subset) # geo_shp.write_shapefile2('{}/HRU_subset.shp'.format(outdir)) # geo_shp.write_kml('{}/HRU_subset.kml'.format(outdir)) # Output a shapefile of the selected stream segments # print('\tSegments') geo_shp.select_layer('nsegmentNationalIdentifier') geo_shp.write_shapefile( '{}/GIS/Segments_subset.shp'.format(sg_dir), 'seg_id_nat', toseg_idx, included_fields=['seg_id_nat', 'model_idx', 'region']) # geo_shp.filter_by_attribute('seg_id_nat', uniq_seg_us) # geo_shp.write_shapefile2('{}/Segments_subset.shp'.format(outdir)) del geo_shp break # break out of while True loop bandit_log.info('========== END {} =========='.format( datetime.datetime.now().isoformat())) os.chdir(stdir)
def test_simple_cycles_empty(self): G = nx.DiGraph() assert list(nx.simple_cycles(G)) == []
def test_worst_case_graph(self): # see figure 1 in Johnson's paper for k in range(3, 10): G = self.worst_case_graph(k) l = len(list(nx.simple_cycles(G))) assert l == 3 * k
def find_subtours(): G = nx.DiGraph(X) S = list(nx.simple_cycles(G)) #print("S = {}".format(S)) return S
if book.title != 'Берегись Лиловой Пасты!': continue # print(book) # print(book.title) # print(book.sections) # print(book.end_pages) # print(book.total_pages) # print() # print(book.G) # print(repr(book.title_html)) # print(book.title_html) cycles = list(nx.simple_cycles(book.G)) print(cycles) # print(nx.recursive_simple_cycles(book.G)) cycles_nodes = {item for sublist in cycles for item in sublist} # print(cycles_nodes) # Попробуем определить какие страницы в цикле шли раньше других, чтобы # правильно удалить зациклинность for edge in cycles: new_edge = None node_1, node_2 = edge for paths in nx.all_simple_paths(book.G, book.first_page, node_1): if node_2 not in paths: new_edge = node_2, node_1
def state_cycles(self): """Cycles found in state to state graph.""" S = nx.DiGraph(sbn2sbs(self.tpm)) return nx.simple_cycles(S)
""" this is just for playing arround with the networkx API in order to make sure I understood it correctly """ import networkx as nx G = nx.DiGraph() G.add_edge(0, 1) G.add_edge(1, 2) G.add_edge(2, 3) G.add_edge(3, 0) G.add_edge(1, 4) G.add_edge(4, 3) G.add_edge(1, 5) G.add_edge(5, 6) G.add_edge(6, 3) for path in nx.all_simple_paths(G, 1, 0, 3): print(path) for c in nx.simple_cycles(G): print(c)
def read_graphs(self, file_name): ''' ''' if not file_name: self.logger.error('no file given: ' + file_name) return f = open(file_name, 'r', encoding='utf-8') pargs = [] comments = '' sent_id = '' sent_index = -1 num_tokens = -1 for line in f: # end of sentence -> build graph if there are pargs if line.strip() == '<\s>' and len(pargs) > 0: # build graph -> two pass -> add nodes and edges graph = nx.DiGraph(COMMENTS=comments, SENT_ID=sent_id, SENT_INDEX=sent_index, NUM_TOKENS=num_tokens) for i in range(num_tokens): graph.add_node(str(i + 1)) # add nodes while checking multi-word tokens for parg in pargs: self.logger.debug('parg: ' + str(parg)) # update head node, predicate d = graph.node[parg['ID_pred']] if 'CAT' not in d: d['WORD'] = parg['PRED'] d['CAT'] = parg['CAT_pred'] d['ARGS'] = {} # can have multiple arg_positions to different ids d['ARGS'][parg['ID_arg']] = parg['POS_arg'] graph.node[parg['ID_pred']] = d # update dependent, argument d = graph.node[parg['ID_arg']] if 'HEADS' not in d: d['HEADS'] = {} d['HEAD_CATS'] = {} d['WORD'] = parg['ARG'] d['HEADS'][parg['ID_pred']] = parg['POS_arg'] d['HEAD_CATS'][parg['ID_pred']] = parg['CAT_pred'] graph.node[parg['ID_arg']] = d # add relation to the graph d = { 'POS_arg': parg['POS_arg'], 'CAT_pred': parg['CAT_pred'] } if 'X' in parg: d['X'] = parg['X'] graph.add_edge(parg['ID_pred'], parg['ID_arg'], attr_dict=d) self.logger.debug('added edge from ' + parg['ID_pred'] + ' to ' + parg['ID_arg']) # if no category, assign 'N' for n in graph.nodes(): # arguments if 'CAT' not in graph.node[n] and 'WORD' in graph.node[n]: graph.node[n]['CAT'] = 'N' # probably punctuation elif 'CAT' not in graph.node[n]: graph.node[n]['WORD'] = '_' graph.node[n]['CAT'] = '_' # check for cycles if len(list(nx.simple_cycles(graph))) > 0: self.logger.debug('cycle detected in ' + file_name + ' sent: ' + str(sent_index)) self.logger.debug(list(nx.simple_cycles(graph))) self._graphs.append(graph) self.logger.debug('GRAPH:' + str(graph.graph)) self.logger.debug('NODES:' + str(graph.nodes())) self.logger.debug('EDGES:' + str(graph.edges())) # debug if len(graph.nodes()) > num_tokens: self.logger.error( 'EXTRA nodes ' + str(len(graph.nodes()) + ' ' + str(num_tokens))) for n in sorted(graph.nodes(), key=int): self.logger.debug(n + ' : ' + str(graph.node[n])) for n1, n2 in sorted(graph.edges(), key=lambda x: (int(x[1]), int(x[0]))): self.logger.debug( str((n1, n2)) + ' : ' + str(graph.edge[n1][n2])) # sentence may be empty reset at closing tag if line.strip() == '<\s>': pargs = [] comments = '' sent_id = '' sent_index = -1 num_tokens = -1 # only before the sentence elif line.strip()[0] == '<': # <s id="wsj_0013.7"> 27 comments += line.strip() i = comments.find('"') j = comments.find('"', i + 1) #print(i,j,comments[i+1:j]) # wsj_0013.7 sent_id = comments[i + 1:j] # 7 sent_index = int(sent_id.split('.')[1]) # 28 <- 27+1 num_tokens = int(comments[comments.find('>') + 1:].strip()) + 1 # pargs, also adds empty nodes and multiword tokens else: w = self.to_attr(line) pargs.append(w) f.close()
def check_for_no_cycles(self, graph, title, premise): graph.add_edge(title, premise) if len(list(nx.simple_cycles(graph))) > 0: return False else: return True
def loop_exists(g): return next(nx.simple_cycles(g), [])
def sofa_analyze(logdir, cfg): filein = [] df_gpu = [] df_cpu = [] df_vmstat = [] filein_gpu = logdir + "gputrace.csv" filein_cpu = logdir + "cputrace.csv" filein_vmstat = logdir + "vmstat_trace.csv" if os.path.isfile('%s/nvlink_topo.txt' % logdir): with open(logdir + 'nvlink_topo.txt') as f: lines = f.readlines() title = lines[0] num_gpus = 1 for word in title.split(): if re.match(r'GPU', word) != None : num_gpus = num_gpus + 1 print_info('# of GPUs: ' + str(num_gpus) ) edges = [] for i in range(num_gpus): connections = lines[1+i].split() for j in range(len(connections)): if connections[j] == 'NV1' or connections[j] == 'NV2': edges.append((i,j-1)) #print('%d connects to %d' % (i, j-1)) ring_found = False if num_gpus > 1: G = nx.DiGraph(edges) # Try to find ring with its length of num_gpus for cycle in nx.simple_cycles(G): if len(cycle) == num_gpus: print(("One of the recommended ring having length of %d" % len(cycle) )) ring_found = True os.system("mkdir -p sofalog/sofa_hints/") xring_order = ','.join(map(str, cycle)) with open("sofalog/sofa_hints/xring_order.txt", "w") as f: f.write('export CUDA_VISIBLE_DEVICES=' + xring_order) break # Try to find ring with its length of num_gpus/2 if not ring_found: for cycle in nx.simple_cycles(G): if len(cycle) == num_gpus/2: print(("One of the recommended ring having length of %d" % len(cycle) )) ring_found = True os.system("mkdir -p sofalog/sofa_hints/") xring_order = ','.join(map(str, cycle)) with open("sofalog/sofa_hints/xring_order.txt", "w") as f: f.write('export CUDA_VISIBLE_DEVICES=' + xring_order) break try: df_cpu = pd.read_csv(filein_cpu) cpu_profile(logdir, cfg, df_cpu) net_profile(logdir, cfg, df_cpu) except IOError: print_warning("cputrace.csv is not found") #quit() try: df_gpu = pd.read_csv(filein_gpu) #df_gpu.loc[:, 'timestamp'] -= df_gpu.loc[0, 'timestamp'] gpu_profile(logdir, cfg, df_gpu) if cfg.enable_deepprof: sofa_deepprof(logdir, cfg, df_cpu, df_gpu) except IOError: print_warning( "gputrace.csv is not found. If there is no need to profile GPU, just ignore it.")
def enumMaximumMatchingIter2(adj, matchadj, all_matches, n1, add_e=None, check_cycle=True): '''Similar to enumMaximumMatching() but implemented using adjacency matrix of graph. Slight speed boost. ''' #-------------------Find cycles------------------- if check_cycle: d = matchadj.multiply(adj) d[n1:, :] = adj[n1:, :] - matchadj[n1:, :].multiply(adj[n1:, :]) dg = nx.from_numpy_matrix(d.toarray(), create_using=nx.DiGraph()) cycles = list(nx.simple_cycles(dg)) if len(cycles) == 0: check_cycle = False else: check_cycle = True #if len(cycles)>0: if check_cycle: cycle = cycles[0] cycle.append(cycle[0]) cycle = zip(cycle[:-1], cycle[1:]) #--------------Create a new matching-------------- new_match = matchadj.copy() for ee in cycle: if matchadj[ee[0], ee[1]] == 1: new_match[ee[0], ee[1]] = 0 new_match[ee[1], ee[0]] = 0 e = ee else: new_match[ee[0], ee[1]] = 1 new_match[ee[1], ee[0]] = 1 if add_e is not None: for ii in add_e: new_match[ii[0], ii[1]] = 1 all_matches.append(new_match) #-----------------Form subproblems----------------- g_plus = adj.copy() g_minus = adj.copy() g_plus[e[0], :] = 0 g_plus[:, e[1]] = 0 g_plus[:, e[0]] = 0 g_plus[e[1], :] = 0 g_minus[e[0], e[1]] = 0 g_minus[e[1], e[0]] = 0 add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = enumMaximumMatchingIter2(g_minus, new_match, all_matches, n1, add_e, check_cycle) all_matches = enumMaximumMatchingIter2(g_plus, matchadj, all_matches, n1, add_e_new, check_cycle) else: #---------------Find uncovered nodes--------------- uncovered = numpy.where(numpy.sum(matchadj, axis=1) == 0)[0] if len(uncovered) == 0: return all_matches #---------------Find feasible paths--------------- paths = [] for ii in uncovered: aa = adj[ii, :].dot(matchadj) if aa.sum() == 0: continue paths.append((ii, int(sparse.find(aa == 1)[1][0]))) if len(paths) > 0: break if len(paths) == 0: return all_matches #----------------------Find e---------------------- feas1, feas2 = paths[0] e = (feas1, int(sparse.find(matchadj[:, feas2] == 1)[0])) #----------------Create a new match---------------- new_match = matchadj.copy() new_match[feas2, :] = 0 new_match[:, feas2] = 0 new_match[feas1, e[1]] = 1 new_match[e[1], feas1] = 1 if add_e is not None: for ii in add_e: new_match[ii[0], ii[1]] = 1 all_matches.append(new_match) #-----------------Form subproblems----------------- g_plus = adj.copy() g_minus = adj.copy() g_plus[e[0], :] = 0 g_plus[:, e[1]] = 0 g_plus[:, e[0]] = 0 g_plus[e[1], :] = 0 g_minus[e[0], e[1]] = 0 g_minus[e[1], e[0]] = 0 add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = enumMaximumMatchingIter2(g_minus, matchadj, all_matches, n1, add_e, check_cycle) all_matches = enumMaximumMatchingIter2(g_plus, new_match, all_matches, n1, add_e_new, check_cycle) if len(all_matches) % 1000 == 0: print 'len', len(all_matches) return all_matches
def reduce_graph(G, partial=False, debug=False, unglue=True, recursive=True): """Reduce the graph: iteratively prune the graph and glue the resulting cycles. * After gluing, each cycle is represented by the source node of its limiting step. * Edges entering the glued cycle in the pruned graph are conserved. * Edges exiting the cycle before pruning are restored with a corrected weight. By default, aim for full reduction and raise an error if a node has multiple outgoing edges or if a cycle has multiple limiting steps. Enable the *partial* parameter to relax these constraints. In case of partial reduction, only terminal elementary cycles with a single limiting step are glued. Information on the glued nodes and original source of glued edges are conserved as metadata to enable the unglue step. """ # All nodes from a glued cycle are associated to their representative, original target and weight. glued_nodes = {} # All representative nodes are associated to the list of nodes in the original cycle glued_cycles = {} # Edges entering a glued cycle in the pruned graph or exiting it in the original graph glued_edges = set() # Collect this information for all cycles in the pruned graph all_nodes = set(G.nodes) pruned_G = prune(G, partial=partial, debug=debug) for cycle in nx.simple_cycles(pruned_G): cycle_edges = list(pruned_G.out_edges(cycle, data='weight')) # Only glue cycles where all nodes have a single target (in case of partial pruning) if len(cycle_edges) != len(cycle): print("Skip non-pruned cycle: ", cycle) continue lim_s, _, lim_w = max(cycle_edges, key=lambda x: x[2]) if partial: if len([e for e in cycle_edges if e[2] == lim_w]) > 1: print("Cycle with multiple limiting steps can not be glued") continue else: check_unique(cycle_edges, lim_w, debug) cur_glued_nodes = {n: (lim_s, t, w) for n, t, w in cycle_edges} glued_nodes.update(cur_glued_nodes) glued_cycles[lim_s] = cycle # Collect glued edges: entering the cycle in the pruned graph or exiting it in the original graph other_nodes = all_nodes.difference(cur_glued_nodes) glued_edges.update( nx.algorithms.boundary.edge_boundary(pruned_G, other_nodes)) glued_edges.update( nx.algorithms.boundary.edge_boundary(G, cur_glued_nodes)) if len(glued_cycles) == 0: if debug: print("This graph has no cycle to glue!") if unglue: return unglue_graph(pruned_G, debug=debug) return pruned_G if debug: print("This graph has %s cycles to glue:" % len(glued_cycles)) for g, c in glued_cycles.items(): print(" * %s: %s" % (g, c)) print("Glued %s edges:" % len(glued_edges)) for e in glued_edges: print(" *", e) # The glued graph contains all edges between non-glued nodes of the pruned graph hidden_nodes = set(glued_nodes).difference(glued_cycles) glued_G = pruned_G.subgraph(all_nodes.difference(hidden_nodes)).copy() # Redirect, normalize, annotate and restore glued edges restored_edges = {} for s, t in glued_edges: # Assume that we will copy the edge edge_info = G.edges[(s, t)].copy() # When the target is glued, redirect and keep track of the original one! if t in glued_nodes: if debug: print("REDIRECTING EDGE TARGET!!!") if "glued_target" not in edge_info: edge_info['glued_target'] = t t = glued_nodes.get(t)[0] # When the source is glued, redirect and update the weight if s in glued_nodes: s, _, w = glued_nodes[s] wlim = glued_nodes[s][2] edge_info['weight'] += wlim - w # Add the new edge, unless a better one already exists bgw = restored_edges.get((s, t)) if bgw is not None and bgw['weight'] < edge_info['weight']: continue restored_edges[(s, t)] = edge_info # Add all glued edges glued_G.add_edges_from([(s, t, info) for (s, t), info in restored_edges.items()]) # Add metadata required to restore edges in the glued cycles for cur_repr, cur_nodes in glued_cycles.items(): cur_glued_cycle = [] for src in cur_nodes: # Add existing glued cycles prev_glued = G.nodes[src].get('glued_cycles') if prev_glued: if debug: print('Merging %s-cycle previously glued in %s' % (len(prev_glued), src)) cur_glued_cycle += prev_glued mrepr, tgt, w = glued_nodes[src] rtgt = G.edges[(src, tgt)].get('glued_target') if rtgt is not None and rtgt != tgt: if debug: print('Merging a redirected edge (%s, %s / %s, %s) !' % (src, tgt, rtgt, w)) tgt = rtgt if mrepr != cur_repr: raise 'Mismatching representative node!' if src == cur_repr: continue cur_glued_cycle.append((src, tgt, w)) glued_G.add_node(cur_repr) glued_G.nodes[cur_repr]['glued_cycles'] = cur_glued_cycle if recursive: return reduce_graph(glued_G, partial=partial, debug=debug, recursive=True, unglue=unglue) if unglue: return unglue_graph(glued_G, debug=debug) return glued_G
def run(self, max_cores=None, max_attempts=1, dry=False, set_successful=True, cmd_wrapper=signature.default_cmd_fxn_wrapper, log_out_dir_func=default_task_log_output_dir): """ Runs this Workflow's DAG :param int max_cores: The maximum number of cores to use at once. A value of None indicates no maximum. :param int max_attempts: The maximum number of times to retry a failed job. :param callable log_out_dir_func: A function that returns a Task's logging directory (must be unique). It receives one parameter: the Task instance. By default a Task's log output is stored in log/stage_name/task_id. See _default_task_log_output_dir for more info. :param bool dry: If True, do not actually run any jobs. :param bool set_successful: Sets this workflow as successful if all tasks finish without a failure. You might set this to False if you intend to add and run more tasks in this workflow later. """ assert os.path.exists(os.getcwd( )), 'current working dir does not exist! %s' % os.getcwd() assert hasattr( self, 'cosmos_app' ), 'Workflow was not initialized using the Workflow.start method' assert hasattr(log_out_dir_func, '__call__'), 'log_out_dir_func must be a function' assert self.session, 'Workflow must be part of a sqlalchemy session' session = self.session self.log.info('Preparing to run %s using DRM `%s`, cwd is `%s`' % (self, self.cosmos_app.default_drm, os.getcwd())) self.max_cores = max_cores self.max_attempts = max_attempts from ..job.JobManager import JobManager if self.jobmanager is None: self.jobmanager = JobManager( get_submit_args=self.cosmos_app.get_submit_args, cmd_wrapper=cmd_wrapper, log_out_dir_func=log_out_dir_func) self.status = WorkflowStatus.running self.successful = False if self.started_on is None: self.started_on = datetime.datetime.now() task_graph = self.task_graph() stage_graph = self.stage_graph() assert len(set(self.stages)) == len( self.stages), 'duplicate stage name detected: %s' % (next( duplicates(self.stages))) # renumber stages stage_graph_no_cycles = nx.DiGraph() stage_graph_no_cycles.add_nodes_from(stage_graph.nodes()) stage_graph_no_cycles.add_edges_from(stage_graph.edges()) for cycle in nx.simple_cycles(stage_graph): stage_graph_no_cycles.remove_edge(cycle[-1], cycle[0]) for i, s in enumerate(topological_sort(stage_graph_no_cycles)): s.number = i + 1 # Make sure everything is in the sqlalchemy session session.add(self) successful = filter(lambda t: t.successful, task_graph.nodes()) # print stages for s in sorted(self.stages, key=lambda s: s.number): self.log.info('%s %s' % (s, s.status)) # Create Task Queue task_queue = _copy_graph(task_graph) self.log.info('Skipping %s successful tasks...' % len(successful)) task_queue.remove_nodes_from(successful) handle_exits(self) if self.max_cores is not None: self.log.info('Ensuring there are enough cores...') # make sure we've got enough cores for t in task_queue: assert int( t.core_req ) <= self.max_cores, '%s requires more cpus (%s) than `max_cores` (%s)' % ( t, t.core_req, self.max_cores) # Run this thing! self.log.info('Committing to SQL db...') session.commit() if not dry: _run(self, session, task_queue) # set status if self.status == WorkflowStatus.failed_but_running: self.status = WorkflowStatus.failed # set stage status to failed for s in self.stages: if s.status == StageStatus.running_but_failed: s.status = StageStatus.failed session.commit() return False elif self.status == WorkflowStatus.running: if set_successful: self.status = WorkflowStatus.successful session.commit() return True else: self.log.warning('Workflow exited with status %s', self.status) session.commit() return False else: self.log.info('Workflow dry run is complete')
def find_cycles(self, pipeline): return nx.simple_cycles(Pipeline.graph(pipeline))
def cutoff_cycles(self, links): graph = self.graph nodes = self.__actual_nodes sub_graph = nx.DiGraph(links) gen_cycles = nx.simple_cycles(sub_graph) cycles = [cycle for cycle in gen_cycles] _related_cycle_nodes = [] if len(cycles): for cycle in cycles: for n in cycle: if n not in nodes: _related_cycle_nodes.append(n) cycle_list = [] for cycle in cycles: temp_list = [] if len(cycle) == 2: source_id = cycle[0] target_id = cycle[1] if graph.has_edge(source_id, target_id): temp = { 'source_id': str(source_id), 'target_id': str(target_id) } temp_list.append( dict(temp.items() + graph[source_id][target_id].items())) source_id = cycle[1] target_id = cycle[0] if graph.has_edge(source_id, target_id): temp = { 'source_id': str(source_id), 'target_id': str(target_id) } temp_list.append( dict(temp.items() + graph[source_id][target_id].items())) cycle_list.append(temp_list) else: for i in range(len(cycle)): if i == (len(cycle) - 1): source_id = cycle[i] target_id = cycle[0] if graph.has_edge(source_id, target_id): temp = { 'source_id': str(source_id), 'target_id': str(target_id) } temp_list.append( dict(temp.items() + graph[source_id][target_id].items())) else: source_id = cycle[i] target_id = cycle[i + 1] if graph.has_edge(source_id, target_id): temp = { 'source_id': str(source_id), 'target_id': str(target_id) } temp_list.append( dict(temp.items() + graph[source_id][target_id].items())) cycle_list.append(temp_list) cycle_result = [{ 'paths': cycle, 'type': 'circle' if len(cycle) > 2 else 'each_other' if len(cycle) == 2 else 'self', 'circle_id': create_union_id() } for cycle in cycle_list] return { 'cycle_result': cycle_result, 'actual_nodes': nodes + _related_cycle_nodes } else: return {'cycle_result': [], 'actual_nodes': nodes}
def test_simple_cycles_graph(self): with pytest.raises(nx.NetworkXNotImplemented): G = nx.Graph() c = sorted(nx.simple_cycles(G))
def simple_cycles_func(graph): """simple_cycles_func""" return list(nx.simple_cycles(graph))
# # Input: G - A genome graph # # Output: None,None if this is not a red/blue alternating cycle otherwise return the cycle and the colors # + slideshow={"slide_type": "subslide"} def red_blue_cycle_check(sub_graph, cycle): checked_cycle = None colors = [] return checked_cycle, colors G_P4_P5 = combine(genome_to_graph([P4]), genome_to_graph([P5])) # Below is an example for how you can find all the cycles test_cycles = list(nx.simple_cycles(G_P4_P5.to_directed())) edge_cycles = [] # just a cycle listed as edges for cycle in test_cycles: edge_cycle = [] a = cycle[0] for b in cycle[1:]: edge_cycle.append([a, b]) a = b edge_cycle.append([b, cycle[0]]) edge_cycles.append(edge_cycle) # Running the code on all cycles for edge_cycle in edge_cycles: #print(edge_cycle) checked_cycle, colors = red_blue_cycle_check(G_P4_P5, edge_cycle) #print(colors)
def test_complete_directed_graph(self): # see table 2 in Johnson's paper ncircuits = [1, 5, 20, 84, 409, 2365, 16064] for n, c in zip(range(2, 9), ncircuits): G = nx.DiGraph(nx.complete_graph(n)) assert len(list(nx.simple_cycles(G))) == c
def conflict_serializable(schedule): """ A schedule is conflict serializable if its conflict graph is acyclic. Aborted transactions are ignored. """ return len(list(nx.simple_cycles(conflict_graph(schedule)))) == 0