def main(argv): infile = argv[0] outdir = argv[1] if not os.path.exists(outdir): os.makedirs(outdir) # Read data file and retain data only corresponding to 5 sleep states df = pd.read_csv(infile, dtype={ 'label': object, 'user': object, 'position': object, 'dataset': object }) orig_cols = df.columns sleep_states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM'] df = df[df['label'].isin(sleep_states)].reset_index() df = df[df['dataset'] == 'UPenn'].reset_index() df = df[orig_cols] print('... Number of data samples: %d' % len(df)) ctr = Counter(df['label']) for cls in ctr: print('%s: %d (%0.2f%%)' % (cls, ctr[cls], ctr[cls] * 100.0 / len(df))) feat_cols = ['ENMO_mean','ENMO_std','ENMO_min','ENMO_max','ENMO_mad','ENMO_entropy1','ENMO_entropy2', 'ENMO_prevdiff', 'ENMO_nextdiff', \ 'angz_mean','angz_std','angz_min','angz_max','angz_mad','angz_entropy1','angz_entropy2', 'angz_prevdiff', 'angz_nextdiff', \ 'LIDS_mean','LIDS_std','LIDS_min','LIDS_max','LIDS_mad','LIDS_entropy1','LIDS_entropy2', 'LIDS_prevdiff', 'LIDS_nextdiff'] X = df[feat_cols].values y = df['label'] groups = df['user'] # Class hierarchy for sleep stages class_hierarchy = { ROOT: {"Wake", "Sleep"}, "Sleep": {"NREM", "REM"}, "NREM": {"Light", "NREM 3"}, "Light": {"NREM 1", "NREM 2"} } graph = DiGraph(class_hierarchy) outer_cv_splits = 5 inner_cv_splits = 3 factor = 10.0 results = { 'Wake': { 'precision': [], 'recall': [], 'fbeta': [] }, 'Sleep': { 'precision': [], 'recall': [], 'fbeta': [] }, 'REM': { 'precision': [], 'recall': [], 'fbeta': [] }, 'NREM': { 'precision': [], 'recall': [], 'fbeta': [] }, 'NREM 3': { 'precision': [], 'recall': [], 'fbeta': [] }, 'Light': { 'precision': [], 'recall': [], 'fbeta': [] }, 'NREM 1': { 'precision': [], 'recall': [], 'fbeta': [] }, 'NREM 2': { 'precision': [], 'recall': [], 'fbeta': [] }, 'Overall': { 'precision': [], 'recall': [], 'fbeta': [] } } # Outer CV group_kfold = GroupKFold(n_splits=outer_cv_splits) out_fold = 0 hierarchical_pred = [] for train_indices, test_indices in group_kfold.split(X, y, groups): out_fold += 1 print('Processing fold ' + str(out_fold)) out_fold_X_train = X[train_indices, :] out_fold_X_test = X[test_indices, :] out_fold_y_train = y[train_indices] out_fold_y_test = y[test_indices] out_fold_users_test = groups[test_indices] # Create a pipeline with scaler and hierarchical classifier pipe = Pipeline([ ('scaler', StandardScaler()), ( 'clf', HierarchicalClassifier( base_estimator=RandomForestClassifier(random_state=0, n_estimators=100, n_jobs=-1), class_hierarchy=class_hierarchy, prediction_depth='mlnp', progress_wrapper=tqdm, #stopping_criteria=0.7 )) ]) # Inner CV strat_kfold = StratifiedKFold(n_splits=inner_cv_splits, random_state=0, shuffle=True) custom_cv_indices = [] for grp_train_idx, grp_test_idx in strat_kfold.split( out_fold_X_train, out_fold_y_train): custom_cv_indices.append((grp_train_idx, grp_test_idx)) print('Training') search_params = {'clf__base_estimator__n_estimators':[50,100,200,300,500], \ 'clf__base_estimator__max_depth': [5,10,None]} cv_clf = RandomizedSearchCV(estimator=pipe, param_distributions=search_params, \ cv=custom_cv_indices, scoring=make_scorer(custom_h_fbeta,graph=graph), n_iter=5, \ n_jobs=-1, verbose=1) cv_clf.fit(out_fold_X_train, out_fold_y_train) print('Predicting') out_fold_y_pred = cv_clf.predict(out_fold_X_test) best_clf = cv_clf.best_estimator_ # Demonstrate using our hierarchical metrics module with MLB wrapper with multi_labeled(out_fold_y_test, out_fold_y_pred, best_clf.named_steps['clf'].graph_) \ as (y_test_, y_pred_, graph_, classes_): fold_h_prec, fold_h_rec, fold_h_fbeta = h_fbeta_score( y_test_, y_pred_, graph_) results['Overall']['precision'].append(fold_h_prec) results['Overall']['recall'].append(fold_h_rec) results['Overall']['fbeta'].append(fold_h_fbeta) print("Fold %d: precision: %0.4f, recall: %0.4f, fbeta: %0.4f" % (out_fold, fold_h_prec, fold_h_rec, fold_h_fbeta)) y_test_ = fill_ancestors(y_test_, graph=graph_) y_pred_ = fill_ancestors(y_pred_, graph=graph_) hierarchical_pred.append( (out_fold_users_test, y_test_, y_pred_, classes_)) fold_wake_prec, fold_wake_rec, fold_wake_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'Wake') fold_sleep_prec, fold_sleep_rec, fold_sleep_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'Sleep') fold_rem_prec, fold_rem_rec, fold_rem_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'REM') fold_nrem_prec, fold_nrem_rec, fold_nrem_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'NREM') fold_nrem3_prec, fold_nrem3_rec, fold_nrem3_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'NREM 3') fold_light_prec, fold_light_rec, fold_light_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'Light') fold_nrem1_prec, fold_nrem1_rec, fold_nrem1_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'NREM 1') fold_nrem2_prec, fold_nrem2_rec, fold_nrem2_fbeta, _ = get_node_metrics( y_test_, y_pred_, classes_, 'NREM 2') results['Wake']['precision'].append(fold_wake_prec) results['Wake']['recall'].append(fold_wake_rec) results['Wake']['fbeta'].append(fold_wake_fbeta) results['Sleep']['precision'].append(fold_sleep_prec) results['Sleep']['recall'].append(fold_sleep_rec) results['Sleep']['fbeta'].append(fold_sleep_fbeta) results['REM']['precision'].append(fold_rem_prec) results['REM']['recall'].append(fold_rem_rec) results['REM']['fbeta'].append(fold_rem_fbeta) results['NREM']['precision'].append(fold_nrem_prec) results['NREM']['recall'].append(fold_nrem_rec) results['NREM']['fbeta'].append(fold_nrem_fbeta) results['NREM 3']['precision'].append(fold_nrem3_prec) results['NREM 3']['recall'].append(fold_nrem3_rec) results['NREM 3']['fbeta'].append(fold_nrem3_fbeta) results['Light']['precision'].append(fold_light_prec) results['Light']['recall'].append(fold_light_rec) results['Light']['fbeta'].append(fold_light_fbeta) results['NREM 1']['precision'].append(fold_nrem1_prec) results['NREM 1']['recall'].append(fold_nrem1_rec) results['NREM 1']['fbeta'].append(fold_nrem1_fbeta) results['NREM 2']['precision'].append(fold_nrem2_prec) results['NREM 2']['recall'].append(fold_nrem2_rec) results['NREM 2']['fbeta'].append(fold_nrem2_fbeta) get_classification_report(results) save_user_report(hierarchical_pred, os.path.join(outdir, 'hierarchical_results.csv'))
def test_hamiltonian_empty_graph(): path = hamiltonian_path(DiGraph()) assert len(path) == 0
def scroll(webdriver_path=driver_path, timeout=3, graph=DiGraph(), search_ids=None): """ Use a more complex method to gather data that uses a web driver to scrape a page. It must go to the page and then scroll to the bottom so it can gather all the posts, their authors and dates published so it can also be turned into a graph Parameters ---------- webdriver_path (str) where the chrome web driver is stored for establishing the driver timeout (int) how many seconds the driver should wait for the page to complete the re-load when scrolling Returns ------- """ # Driver is currently set for version 8.1 on windows chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome(executable_path=webdriver_path, chrome_options=chrome_options) # Node and Edge containers which will be returned starting with the base site which is being collected index = ['mediumcom'] if not graph.has_node('mediumcom'): graph.add_node('mediumcom', description='Site with blogs') # Start the driver on the url and the query if it exists. if not search_ids: search_ids = ['network%20graph%20visualization'] elif isinstance(search_ids, list): # Create a search that consists of all the terms and put it at the beginning of the list if len(search_ids) > 1: search_ids.insert(0, '%20'.join(search_ids)) else: search_ids = [search_ids] # Get scroll height last_height = driver.execute_script("return document.body.scrollHeight") driver.find_elements_by_class_name('postArticle') for search_id in search_ids: # Set the driver on the search_id in the query driver.get('https://medium.com/search?q=%s' % search_id) # Normalize the ID now that the url is set search_id = search_id.replace('%20', '_') if not graph.has_node(search_id): graph.add_node(search_id, description='Search term used to search blogs') while True: # Scroll down to bottom driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Wait to load page logger.info( 'Collected %d posts. Scrolling for more...' % len(driver.find_elements_by_class_name('postArticle'))) time.sleep(timeout) # Calculate new scroll height and compare with last scroll height new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height or len(driver.find_elements_by_class_name('postArticle')) > 100: # If heights are the same it will exit the function break last_height = new_height # Collect all the posts to iterate through and assign to nodes and edges posts = driver.find_elements_by_class_name('postArticle') logger.info('Collected %s posts' % len(posts)) # Go through each post and extract an author (a_id), the post (b_id) and then create the edges for post in posts: try: author = post.find_element_by_class_name('ds-link').text link = post.find_element_by_class_name('ds-link').get_attribute("href") date = post.find_element_by_tag_name('time').text title = post.find_element_by_tag_name('h3').text claps = post.find_element_by_class_name('multirecommend').text # Create the author node a_id = ''.join(e for e in author if e.isalnum()).lower() if a_id not in index: graph.add_node(a_id, description=author, link=link) index.append(a_id) # Create the article node b_id = ''.join(e for e in title if e.isalnum()).lower() if b_id not in index: graph.add_node(b_id, description="%s by %s" % (title, author), link=link, count=claps, date=date) index.append(b_id) graph.has_node(a_id) if graph.has_node(a_id) and graph.has_node(b_id): graph.add_edge(a_id, b_id, label='Posted') graph.add_edge(b_id, 'mediumcom', label='PostedOn') graph.add_edge(search_id, b_id, label='FromSearch') except scroll_errors.NoSuchElementException as error: logger.error("Scrolling %s" % error.msg) return graph
def test_is_strongly_connected(): """Tests for a strongly connected tournament.""" G = DiGraph([(0, 1), (1, 2), (2, 0)]) assert is_strongly_connected(G)
def test_is_tournament(): G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) assert is_tournament(G)
def test_tournament_matrix(): np = pytest.importorskip("numpy") npt = pytest.importorskip("numpy.testing") G = DiGraph([(0, 1)]) m = tournament_matrix(G) npt.assert_array_equal(m.todense(), np.array([[0, 1], [-1, 0]]))
def test_same_node_is_reachable(): """Tests that a node is always reachable from it.""" # G is an arbitrary tournament on ten nodes. G = DiGraph(sorted(p) for p in combinations(range(10), 2)) assert all(is_reachable(G, v, v) for v in G)
def test_empty_graph(): assert list(dependent_node_iterator(DiGraph())) == []
def reduce_grid_brute(circuit: MultiCircuit, removed_br_idx): """ Remove the first branch found to be removed. this function is meant to be called until it returns false Args: circuit: Circuit to modify in-place removed_br_idx: branch index Returns: Nothing """ # form C m = len(circuit.branches) n = len(circuit.buses) buses_dict = {bus: i for i, bus in enumerate(circuit.buses)} C = lil_matrix((m, n), dtype=int) graph = DiGraph() # TODO: Fix the topology reduction with the GC example, see what is going on for i in range(len(circuit.branches)): # get the from and to bus indices f = buses_dict[circuit.branches[i].bus_from] t = buses_dict[circuit.branches[i].bus_to] graph.add_edge(f, t) C[i, f] = 1 C[i, t] = -1 C = csc_matrix(C) # get branch buses bus_f = circuit.branches[removed_br_idx].bus_from bus_t = circuit.branches[removed_br_idx].bus_to f = buses_dict[bus_f] t = buses_dict[bus_t] removed_bus = None removed_branch = None updated_bus = None updated_branches = list() # get the number of paths n_paths = len(list(all_simple_paths(graph, f, t))) # print('Deleting: ', circuit.branches[br_idx].name) if n_paths == 1: # get the branches that are connected to the bus f adjacent_br_idx = get_branches_of_bus(C, f) for k in adjacent_br_idx: # get the indices of the buses f2 = buses_dict[circuit.branches[k].bus_from] t2 = buses_dict[circuit.branches[k].bus_to] # re-assign the right bus if f2 == f: circuit.branches[k].bus_from = bus_t elif t2 == t2: circuit.branches[k].bus_to = bus_t # copy the state of the removed branch circuit.branches[k].active = circuit.branches[ removed_br_idx].active # remember the updated branches updated_branches.append(circuit.branches[k]) # merge buses bus_t.merge(bus_f) updated_bus = bus_t # delete bus removed_bus = circuit.buses.pop(f) # remove the branch and that's it removed_branch = circuit.branches.pop(removed_br_idx) else: # remove the branch and that's it removed_branch = circuit.branches.pop(removed_br_idx) # return the removed branch and the possible removed bus return removed_branch, removed_bus, updated_bus, updated_branches
def test_random_topology_generation(self): # ### without given variables: ### # T, var_types = CS3m.generate_random_topology(n_covariates=4, p=0.4, n_treatments=2, n_outcomes=2, n_censoring=0, given_vars=[], p_hidden=0) # test output structure: self.assertEqual(T.shape[0], T.shape[1], msg="Graph has no square shape") self.assertEqual(T.shape[0], 8, msg="Number of Graph variables {emp} " "does not match it supposed number {sup}".format( emp=T.shape[0], sup=8)) self.assertEqual(T.shape[0], var_types.size) # test number of variables of each type matches: self.assertEqual(sum(var_types == "covariate"), 4) self.assertEqual(sum(var_types == "treatment"), 2) self.assertEqual(sum(var_types == "outcome"), 2) self.assertEqual(sum(var_types == "hidden"), 0) self.assertEqual(sum(var_types == "censor"), 0) # test that each treatment is coupled with one outcome: self.assertEqual( all(T.loc[var_types == "outcome", var_types == "treatment"].sum( axis=1) == np.array([1, 1])), True, msg= "each outcome variable does not have exactly one predecessor treatment variable" ) # ### with hidden variables and censor variables: ### # T, var_types = CS3m.generate_random_topology(n_covariates=100, p=0.4, n_treatments=2, n_outcomes=2, n_censoring=2, given_vars=[], p_hidden=0.4) # test output structure: self.assertEqual(T.shape[0], T.shape[1], msg="Graph has no square shape") self.assertEqual( T.shape[0], 106, msg= "Number of Graph variables {t} does not match it supposed number {s}" .format(t=T.shape[0], s=106)) self.assertEqual(T.shape[0], var_types.size) # test number of variables of each type matches: self.assertEqual(sum(var_types == "censor"), 2) hist = var_types.value_counts() self.assertAlmostEqual(hist["hidden"] / 100.0, 0.4, delta=1e-2) # graph = nx.from_numpy_matrix(T.values.transpose(), create_using=nx.DiGraph()) # ### with given variables: ### # X = pd.DataFrame(np.random.RandomState(0).normal(size=(4800, 5))) T, var_types = CS3m.generate_random_topology(n_covariates=4, p=0.4, n_treatments=2, n_outcomes=2, n_censoring=0, given_vars=X.columns, p_hidden=0) self.assertEqual(sum(var_types == "covariate"), 9) # test that given variable has no predecessors: np.testing.assert_array_equal(T.loc[X.columns, :].sum(axis="columns"), np.zeros(5)) # Test for DAGness: from networkx import DiGraph, from_numpy_matrix, is_directed_acyclic_graph NUM_TESTS = 50 for test in range(NUM_TESTS): n_cov = np.random.randint(low=10, high=100) p = np.random.rand() # type: float n_tre_out = np.random.randint(low=1, high=4) n_cen = np.random.randint(low=0, high=n_tre_out) T, _ = CS3m.generate_random_topology(n_covariates=n_cov, p=p, n_treatments=n_tre_out, n_outcomes=n_tre_out, n_censoring=n_cen, given_vars=[], p_hidden=0) G = from_numpy_matrix(T.values.transpose(), create_using=DiGraph()) res = is_directed_acyclic_graph(G) self.assertTrue(res)
def __init__(self): super().__init__() self.conversion_graph = DiGraph()
def __init__(self, func_classes: List[Type[IFunc]], wired: List[any] = None): """ :param func_classes: :param wired: input, output """ # map from function id to a tuple (idx of function, order of function (start from 1)). self.id2order = {} # map from idx of function to its order idx2order = {} # map from tuple (id, order) of function to its dataset preference self.preferences = {} for i, func_cls in enumerate(func_classes): if func_cls.id not in self.id2order: self.id2order[func_cls.id] = [] self.id2order[func_cls.id].append( (i, len(self.id2order[func_cls.id]) + 1)) idx2order[i] = len(self.id2order[func_cls.id]) self.preferences[(func_cls.id, idx2order[i])] = {} wired = wired or [] # mapping of wired from input to output self.wired = {} # inverse mapping of wired from output to all inputs self.inv_wired = {} # applying topological sort on func_classes to determine execution order based on wiring graph = DiGraph() graph.add_nodes_from(range(len(func_classes))) # mapping preferences of argtype "dataset" to determine backend for "dataset" outputs preference_roots, preference_graph = [], DiGraph() for i, o in wired: if i[1] is None: i[1] = self.get_func_order(i[0]) if o[1] is None: o[1] = self.get_func_order(o[0]) input_arg = func_classes[self.id2order[i[0]][i[1] - 1][0]].inputs[i[2]] output_arg = func_classes[self.id2order[o[0]][o[1] - 1][0]].outputs[o[2]] if input_arg != output_arg: raise ValidationError( f"Incompatible ArgType while wiring {WiredIOArg.get_arg_name(i[0], i[1], i[2])} to {WiredIOArg.get_arg_name(o[0], o[1], o[2])}" ) input_gname = (i[0], i[1], i[2]) output_gname = (o[0], o[1], o[2]) self.wired[input_gname] = output_gname if output_gname not in self.inv_wired: self.inv_wired[output_gname] = [] self.inv_wired[output_gname].append(input_gname) graph.add_edge(self.id2order[o[0]][o[1] - 1][0], self.id2order[i[0]][i[1] - 1][0]) if output_arg.id == 'dataset': self.preferences[(o[0], o[1])][o[2]] = None node = (o[0], o[1], 'o', o[2]) # if input_ref of "dataset" output is None, we take it as a new "dataset" if output_arg.input_ref is None: preference_roots.append(node) elif output_arg.input_ref not in func_classes[self.id2order[ o[0]][o[1] - 1][0]].inputs: raise ValidationError( f"Invalid value for input_ref {output_arg.input_ref} of {output_gname} output dataset" ) elif func_classes[self.id2order[o[0]][o[1] - 1][0]].inputs[ output_arg.input_ref] != output_arg: raise ValidationError( f"Invalid ArgType for input_ref {output_arg.input_ref} of {output_gname} output dataset" ) else: # adding dummy "internal" edges within the same adapter to link "dataset" output to its input_ref preference_graph.add_edge( (o[0], o[1], 'i', output_arg.input_ref), node, preference='n/a') preference_graph.add_edge(node, (i[0], i[1], 'i', i[2]), preference=input_arg.preference) self.func_classes = [] self.idx2order = {} try: # reordering func_classes in topologically sorted order for execution for i in lexicographical_topological_sort(graph): self.func_classes.append(func_classes[i]) # changing idx of functions to map to their new order self.idx2order[len(self.func_classes) - 1] = idx2order[i] except NetworkXUnfeasible: raise ValidationError("Pipeline is not a DAG") self.schema = {} for i, func_cls in enumerate(self.func_classes): for argname in func_cls.inputs: input_gname = (func_cls.id, self.idx2order[i], argname) if input_gname in self.wired: continue argtype = func_cls.inputs[argname] self.schema[WiredIOArg.get_arg_name( *input_gname)] = fields.Raw( required=not argtype.optional, validate=argtype.is_valid, error_messages={ 'validator_failed': f"Invalid Argument type. Expected {argtype.id}" }) self.schema = Schema.from_dict(self.schema) # setting preferences for new "dataset" outputs for root in preference_roots: counter = Counter() # traversing subgraph from every new "dataset" as root and counting preferences for edge in bfs_edges(preference_graph, root): counter[preference_graph[edge[0]][edge[1]]['preference']] += 1 preference = None if counter['graph'] > counter['array']: preference = 'graph' elif counter['array'] > counter['graph']: preference = 'array' self.preferences[(root[0], root[1])][root[3]] = preference
def __init__(self, token_network_address: TokenNetworkAddress): """ Initializes a new TokenNetwork. """ self.address = token_network_address self.channel_id_to_addresses: Dict[ChannelID, Tuple[Address, Address]] = dict() self.G = DiGraph()
local_graph_filepath = os.path.join(storage.local_dirpath, "graph.gpickle") gcs_graph_filepath = os.path.join(storage.gcs_dirpath, "graph.gpickle") if os.path.exists(local_graph_filepath) and not GRAPH_DESTRUCTIVE: print("LOADING GRAPH...") graph = read_gpickle(local_graph_filepath) print(type(graph), graph.number_of_nodes(), graph.number_of_edges()) else: nodes_df = statuses_df.copy() nodes_df = nodes_df[["user_id", "screen_name", "rate", "bot"]] nodes_df.drop_duplicates(inplace=True) print(len(nodes_df)) print(nodes_df.head()) print("CREATING GRAPH...") graph = DiGraph() job.start() print("NODES...") # for each unique node in the list, add a node to the graph. for i, row in nodes_df.iterrows(): graph.add_node(row["screen_name"], user_id=row["user_id"], rate=row["rate"], bot=row["bot"]) job.counter += 1 if job.counter % GRAPH_BATCH_SIZE == 0: job.progress_report() job.end()
def test_score_sequence_edge(): G = DiGraph([(0, 1)]) assert score_sequence(G) == [0, 1]
def local_cfg(bbs: List[BasicBlock]) -> LocalGraph: """ Construct a local graph from a list of basic blocks. Nodes and edges of the resulting graph will be decorated, respectively, with assembly labels and transition types, registered with the attribute names of `labels` and `kind`. This function works based on a few assumptions: - the basic blocks are provided in the same order they appear inside the original code fragment; - the first block is the entry-point; - all jumps are local; - all blocks with a final `RETURN` transition actually return control to whoever caused the PC to reach the EP. When these conditions are satisfied, a well-formed local graph is returned. :param bbs: the list of basic blocks of which the local graph is formed :return: a LocalGraph object representing the local graph """ local_graph = DiGraph() local_symbol_table: MutableMapping[str, Hashable] = {} pending_jumps: List[Tuple[Hashable, str, Transition]] = [] terminal_nodes = [] calls = [] parent_seq_block = None pending_call = None for bb in bbs: local_graph.add_node(bb.identifier, labels=list(bb.labels), block=bb.code) if parent_seq_block is not None: # Attach the current node to the sequence-wise previous one local_graph.add_edge(parent_seq_block, bb.identifier, kind=Transition.SEQ) parent_seq_block = None elif pending_call is not None: # Set the current node as the return point of an external procedure call calls.append( ProcedureCall(pending_call[0], pending_call[1], bb.identifier)) pending_call = None # Embed the basic block's labels into the node local_symbol_table.update((lab, bb.identifier) for lab in bb.labels) outgoing_transition = bb.outgoing_flow[0] if outgoing_transition is Transition.RETURN: # The outgoing transition is a return-jump: add the node to the list of terminals. terminal_nodes.append(bb.identifier) elif outgoing_transition is Transition.CALL: # The outgoing transition is a procedure call: keep track of it so that the subsequent block will be set as # its confluence point. pending_call = bb.identifier, bb.outgoing_flow[1] else: if outgoing_transition is Transition.SEQ or outgoing_transition.branching: # In case of a sequential or branching transition, the subsequent basic block is to be attached to the # current one. parent_seq_block = bb.identifier if outgoing_transition.resolve_symbol: # In case of a jump, store its origin and symbolic destination for the coming one-pass resolution. pending_jumps.append( (bb.identifier, bb.outgoing_flow[1], bb.outgoing_flow[0])) for jumper, dst, kind in pending_jumps: # Resolve the internal symbolic jumps and add the missing edges local_graph.add_edge(jumper, local_symbol_table[dst], kind=kind) # Transform recursive calls into internal call arcs # TODO re-implement with partitions or sets ci, ce = tee(calls) for cll in filter(lambda c: c.callee in local_symbol_table, ci): local_graph.add_edge(cll.caller, cll.confluence_point, kind=Transition.CALL, callee=cll.callee) return LocalGraph([bbs[0].identifier], local_graph, filter(lambda c: c.callee not in local_symbol_table, ce), terminal_nodes)
def test_score_sequence_triangle(): G = DiGraph([(0, 1), (1, 2), (2, 0)]) assert score_sequence(G) == [1, 1, 1]
def exec_graph( cfg: LocalGraph, entry_point: Union[str, Hashable], ignore_calls: FrozenSet[str] = frozenset() ) -> DiGraph: """ Given a local CFG and an entry-point, return the graph of the node visits performed by the execution flow. The procedure consists in a recursive, depth-first visit of sub-graphs, starting from the initial node and repeating itself for every `CALL` arc encountered. Given their nasty nature, recursive calls are not expanded; instead, they are represented by special nodes with IDs of the form `call{<call destination>, <unique ID>}`. The user can specify additional calls that mustn't be expanded. Different calls to the same procedure result in differently-labeled sub-graphs being attached, so the resulting graph is more a substantiation of the execution paths than a sub-graph of the original CFG. As a consequence, don't expect a one-to-one correspondence between the CFG's nodes and the one in the execution graph. Terminal nodes reachability is guaranteed only if the graph is well formed and any external call reached by the execution flow has been internalized, if not explicitly set as ignored. :param cfg: a CFG description of some code :param entry_point: an entry-point specification for the CFG, either as a node ID or as a symbolic label :param ignore_calls: a set of calls that won't be expanded into sub-graphs :return: a directed graph representing the execution starting from the specified entry-point """ # Get the entry-point ID source = entry_point if entry_point in cfg.entry_point_ids else cfg.get_symbol_table( )[entry_point] source_labels = cfg.graph.nodes[source]['labels'] # If one of the entry-point's labels is in the ignore set, return a node summarizing the call if not ignore_calls.isdisjoint(source_labels): res = DiGraph() # The node will have a synthetic ID 'call{<call destination>, <unique ID>}', and will carry the original labels. res.add_node('call{' + str(source) + ', ' + generate_unique_node() + '}', labels=source_labels, external=True) return res # Traverse the subtree rooted at the entry-point and collect the visited nodes visited_nodes = frozenset(dfs_preorder_nodes(cfg.graph, source)) # Produce a view of the visited component visited_component: Graph = subgraph_view(cfg.graph, lambda n: n in visited_nodes) # Initialize the returned graph with the contents of the visited component res = DiGraph() res.update(visited_component) # Iterate over the CALL edges inside the visited component for edge in filter( lambda e: visited_component.edges[e]['kind'] == Transition.CALL, visited_component.edges): # Recursively compute the component of the called procedures nested_component = exec_graph(cfg, visited_component.edges[edge]['callee'], ignore_calls.union(source_labels)) # Add the nested component to the result, avoiding ID clashes relabel_nodes(nested_component, solve_graph_collision(res, nested_component), False) res.update(nested_component) # Take the root of the sub-component and its terminal nodes head = next( filter(lambda n: nested_component.in_degree(n) == 0, nested_component.nodes)) tail = filter(lambda n: nested_component.out_degree(n) == 0, nested_component.nodes) # Substitute the original edge with call and return edges toward/from the sub-component res.remove_edge(*edge) res.add_edge(edge[0], head, kind=Transition.CALL) res.add_edges_from(zip(tail, repeat(edge[1])), kind=Transition.RETURN) return res
def test_reachable_pair(): """Tests for a reachable pair of nodes.""" G = DiGraph([(0, 1), (1, 2), (2, 0)]) assert is_reachable(G, 0, 2)
def velocity_graph(adata, basis=None, vkey='velocity', which_graph='velocity', n_neighbors=10, arrows=None, arrowsize=3, alpha=.8, perc=90, edge_width=.2, edge_color='grey', edges_on_top=None, color=None, layer=None, size=None, groups=None, components=None, title=None, dpi=None, show=True, save=None, ax=None, **kwargs): """\ Plot of the velocity graph. Arguments --------- adata: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` or `None` (default: `None`) Key for annotations of observations/cells or variables/genes. which_graph: `'velocity'` or `'neighbors'` (default: `'velocity'`) Whether to show transitions from velocity graph or connectivities from neighbors graph. n_neighbors: `int` (default: 10) Number of neighbors to be included for generating connectivity / velocity graph. arrows: `bool` (default: `None`) Whether to display arrows instead of edges. Recommended to be used only on a cluster by setting groups parameter. arrowsize: `int` (default: 3) Size of the arrow heads. {scatter} Returns ------- `matplotlib.Axis` if `show==False` """ basis = default_basis(adata) if basis is None else get_basis(adata, basis) kwargs.update({ "basis": basis, "title": which_graph + ' graph' if title is None else title, "alpha": alpha, "components": components, "groups": groups, "dpi": dpi, "show": False, "save": None }) ax = scatter(adata, layer=layer, color=color, size=size, ax=ax, zorder=0, **kwargs) from networkx import Graph, DiGraph if which_graph in {'neighbors', 'connectivities'}: T = adata.uns['neighbors']['connectivities'].copy() if perc is not None: threshold = np.percentile(T.data, perc) T.data[T.data < threshold] = 0 T.eliminate_zeros() elif which_graph in adata.uns.keys(): T = adata.uns[which_graph].copy() if perc is not None: threshold = np.percentile(T.data, perc) T.data[T.data < threshold] = 0 T.eliminate_zeros() else: T = transition_matrix(adata, vkey=vkey, weight_indirect_neighbors=0, n_neighbors=n_neighbors, perc=perc) if groups is not None: if issparse(T): T = T.A T[~groups_to_bool(adata, groups, color)] = 0 T = csr_matrix(T) T.eliminate_zeros() with warnings.catch_warnings(): warnings.simplefilter("ignore") X_emb = adata.obsm['X_' + basis][:, get_components(components, basis)] edges = draw_networkx_edges(DiGraph(T) if arrows else Graph(T), X_emb, width=edge_width, edge_color=edge_color, arrowsize=arrowsize, ax=ax) if not arrows and not edges_on_top: edges.set_zorder(-2) edges.set_rasterized(settings._vector_friendly) savefig_or_show(dpi=dpi, save=save, show=show) if not show: return ax
def test_unreachable_pair(): """Tests for an unreachable pair of nodes.""" G = DiGraph([(0, 1), (0, 2), (1, 2)]) assert not is_reachable(G, 1, 0)
""" #first function counts feedback loops def dfs(graph, start, end): fringe = [(start, [])] while fringe: state, path = fringe.pop() if path and state == end: yield path continue for next_state in graph[state]: if next_state in path: continue fringe.append((next_state, path+[next_state])) cycles = [[node]+path for node in dictionary_we_have_created for path in dfs(dictionary_we_have_created, node, node)] print(len(cycles)) #feedback loops """ # this one counts all loops DG = DiGraph(dictionary_we_have_created) print(len(list(simple_cycles(DG)))) try: find_cycle(DG, orientation='original') except: pass print(list(find_cycle(DG, orientation='ignore')))
def test_not_strongly_connected(): """Tests for a tournament that is not strongly connected.""" G = DiGraph([(0, 1), (0, 2), (1, 2)]) assert not is_strongly_connected(G)
def __init__(self, name): self.name = name self._id = generate_uuid(variant='uuid') self._graph = DiGraph()
def test_self_loops(): """A tournament must have no self-loops.""" G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) G.add_edge(0, 0) assert not is_tournament(G)
from networkx import DiGraph from networkx.algorithms.shortest_paths.generic import shortest_path import re CONVERSIONS = DiGraph() CONVERSION_UNITS = set() def addConversion(source, dest, multiplier): CONVERSIONS.add_edge(source, dest, function=lambda x: x * float(multiplier)) CONVERSIONS.add_edge(dest, source, function=lambda x: x / float(multiplier)) CONVERSION_UNITS.add(source) CONVERSION_UNITS.add(dest) for unit in source, dest: if unit[-1] == 'b' or unit[-1] == 'B': CONVERSION_UNITS.add(unit + "ps") addConversion('R', 'B', 100) addConversion('KB', 'B', 1000) addConversion('MB', 'KB', 1000) addConversion('GB', 'MB', 1000) addConversion('TB', 'GB', 1000) addConversion('KiB', 'B', 1024) addConversion('MiB', 'B', 1048576) addConversion('GiB', 'B', 1073741824)
def test_path_is_hamiltonian(): G = DiGraph() G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)]) path = hamiltonian_path(G) assert len(path) == 4 assert all(v in G[u] for u, v in zip(path, path[1:]))
def __generate_tgen_markov_model(privcount_tmodel_src_path, tmodel_key, tgen_tmodel_dst_path): with open(privcount_tmodel_src_path, 'r') as privcount_tmodel_file: tmodel = json.load(privcount_tmodel_file) hmm = tmodel[tmodel_key] state_ctr = 0 obs_ctr = 0 name_to_id = {} G = DiGraph() id = 's{}'.format(state_ctr) name = __convert_privcount_key_to_tgen_key("start") name_to_id[name] = id state_ctr += 1 G.add_node(id, type='state', name=name) # add the state nodes and the observations nodes for state in hmm['state_space']: id = 's{}'.format(state_ctr) name = __convert_privcount_key_to_tgen_key(state) name_to_id[name] = id state_ctr += 1 G.add_node(id, type='state', name=name) for observation in hmm['observation_space']: id = 'o{}'.format(obs_ctr) name = __convert_privcount_key_to_tgen_key(observation) name_to_id[name] = id obs_ctr += 1 G.add_node(id, type="observation", name=name) # edges between states are called transitions for state in hmm['start_probability']: srcid = name_to_id[__convert_privcount_key_to_tgen_key("start")] dstid = name_to_id[__convert_privcount_key_to_tgen_key(state)] p = float(hmm['start_probability'][state]) G.add_edge(srcid, dstid, type='transition', weight=p) for srcstate in hmm['transition_probability']: for dststate in hmm['transition_probability'][srcstate]: srcid = name_to_id[__convert_privcount_key_to_tgen_key(srcstate)] dstid = name_to_id[__convert_privcount_key_to_tgen_key(dststate)] p = float(hmm['transition_probability'][srcstate][dststate]) G.add_edge(srcid, dstid, type='transition', weight=p) # edges from states to observations are called emissions for state in hmm['emission_probability']: for observation in hmm['emission_probability'][state]: srcid = name_to_id[__convert_privcount_key_to_tgen_key(state)] dstid = name_to_id[__convert_privcount_key_to_tgen_key( observation)] # params format is [prob, lognorm_mu, lognorm_sigma, exp_lambda] params = hmm['emission_probability'][state][observation] p = float(params[0]) G.add_edge(srcid, dstid, type='emission', weight=p) # after an emission happens, we have parameters to tell us how long to wait # until making the next transition if observation == 'F': # this observation is terminal, so the delay doesnt matter G[srcid][dstid]['distribution'] = "uniform" G[srcid][dstid]['param_low'] = 0.0 G[srcid][dstid]['param_high'] = 0.0 else: lognorm_mu = float(params[1]) lognorm_sigma = float(params[2]) exp_lambda = float(params[3]) if exp_lambda > 0.0: G[srcid][dstid]['distribution'] = "exponential" G[srcid][dstid]['param_rate'] = exp_lambda else: G[srcid][dstid]['distribution'] = "lognormal" G[srcid][dstid]['param_location'] = lognorm_mu G[srcid][dstid]['param_scale'] = lognorm_sigma write_graphml(G, tgen_tmodel_dst_path)
*edge)['weight'] * count_of_subbags(edge[1]) else: return 1 #count_subbags += G.get_edge_data(*edge)['weight'] return count_subbags + 1 start = time() # bags, contain = load_input("input_test_7.txt") # bags, contain = load_input("input_test2_7.txt") bags, contain = load_input("input_7.txt") loading = time() # G = nx.DiGraph() G = DiGraph() G.add_nodes_from(bags) for i, con in enumerate(contain): G.add_edges_from([(bags[i], c[1], {"weight": int(c[0])}) for c in con]) # super_bags = nx.algorithms.dag.ancestors(G, "shiny gold") super_bags = ancestors(G, "shiny gold") # print(super_bags) print(f'Number of super bags: {len(super_bags)}') total_bags = count_of_subbags() print( f'Total number bags in shiny gold: {total_bags - 1}') # -1 for shiny gold end = time() print(f"loading input: {loading - start}, solving: {end - loading}")
def build_flux_graph(soln, raw, traced_element, path_save=None, overwrite=False, i0=0, i1='eq', constV=False): """ :param mechanism: type = dict, keys include "species", "reaction", "element", etc :param raw: type = dict, keys include "mole_fraction", "net_reaction_rate", etc :param traced_element: type = str :param i0: type = int, specifying the starting point of the considered interval of the raw data :param i1: type = int or str, specifying the ending point of the considered interval of the raw data :return flux graph: type = networkx object, will be also saved as a .json file, """ element = soln.element_names species = soln.species reaction = soln.reaction n_rxn = soln.n_reactions """ -------------------------------- check if results already exist, if so, load -------------------------------- """ if path_save is not None: if overwrite is False: try: data = json.load(open(path_save, 'r')) flux_graph = json_graph.node_link_graph(data) return flux_graph except IOError: pass """ -------------------------------- if not, then compute, and save -------------------------------- """ # --------------------------------------------- # check if traced_element is legal if traced_element not in element: raise ('traced element ' + traced_element + ' is not listed in mechanism') # --------------------------------------------- # find the reaction rate during the considered interval # unit will be converted to mole/sec rr = np.reshape(raw['net_reaction_rate'][i0, :], [n_rxn, 1]) flux_graph = DiGraph() # ------------------------------------- # adding edge from reactions # one edge may contribute from multiple reactions, the list of the contributors will be stored in edge['member'] # note though in .cti id_rxn starts from 1, in soln.reaction, id_rxn starts from 0 for id_rxn in range(n_rxn): # sp_mu is a dict, where key is species, val is net stoichiometric coefficient sp_mu = reaction(id_rxn).products for sp in reaction(id_rxn).reactants.keys(): mu = reaction(id_rxn).reactants[sp] if sp in sp_mu.keys(): sp_mu[sp] -= mu else: sp_mu[sp] = -mu # ----------------------- # produced is a dict, where key is sp, val is number of traced atoms # being transferred when this sp is produced produced = {} consumed = {} for sp in sp_mu.keys(): atoms = species(sp).composition if traced_element in atoms.keys(): n = int(sp_mu[sp] * atoms[traced_element] * np.sign(rr[id_rxn])) if n > 0: produced[sp] = abs(n) elif n < 0: consumed[sp] = abs(n) # ----------------------- # consider this reaction only when traced element is transferred # note "if bool(consumed)" works the same way if bool(produced): n_sum = sum(produced.values()) for target in produced.keys(): for source in consumed.keys(): n_i2j = 1.0 * produced[target] * consumed[source] / n_sum # note that the direction (source-->target) is already assured # therefore we use abs(RR) here dw = float(n_i2j * abs(rr[id_rxn])) try: flux_graph[source][target]['flux'] += dw except KeyError: # if this edge doesn't exist, create it flux_graph.add_edge(source, target) flux_graph[source][target]['flux'] = dw flux_graph[source][target]['member'] = {} flux_graph[source][target]['member'][str(id_rxn)] = dw flux_graph[source][target][ '1/flux'] = 1.0 / flux_graph[source][target]['flux'] # ------------------------------------- # save the graph using json, which is fast, and human-readable data = json_graph.node_link_data(flux_graph) json.dump(data, open(path_save, 'w')) #print 'graph saved as',path_save return flux_graph