Beispiel #1
0
def main(argv):
    infile = argv[0]
    outdir = argv[1]

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # Read data file and retain data only corresponding to 5 sleep states
    df = pd.read_csv(infile,
                     dtype={
                         'label': object,
                         'user': object,
                         'position': object,
                         'dataset': object
                     })
    orig_cols = df.columns
    sleep_states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM']
    df = df[df['label'].isin(sleep_states)].reset_index()
    df = df[df['dataset'] == 'UPenn'].reset_index()
    df = df[orig_cols]
    print('... Number of data samples: %d' % len(df))
    ctr = Counter(df['label'])
    for cls in ctr:
        print('%s: %d (%0.2f%%)' % (cls, ctr[cls], ctr[cls] * 100.0 / len(df)))

    feat_cols = ['ENMO_mean','ENMO_std','ENMO_min','ENMO_max','ENMO_mad','ENMO_entropy1','ENMO_entropy2', 'ENMO_prevdiff', 'ENMO_nextdiff', \
                 'angz_mean','angz_std','angz_min','angz_max','angz_mad','angz_entropy1','angz_entropy2', 'angz_prevdiff', 'angz_nextdiff', \
                 'LIDS_mean','LIDS_std','LIDS_min','LIDS_max','LIDS_mad','LIDS_entropy1','LIDS_entropy2', 'LIDS_prevdiff', 'LIDS_nextdiff']

    X = df[feat_cols].values
    y = df['label']
    groups = df['user']

    # Class hierarchy for sleep stages
    class_hierarchy = {
        ROOT: {"Wake", "Sleep"},
        "Sleep": {"NREM", "REM"},
        "NREM": {"Light", "NREM 3"},
        "Light": {"NREM 1", "NREM 2"}
    }

    graph = DiGraph(class_hierarchy)

    outer_cv_splits = 5
    inner_cv_splits = 3
    factor = 10.0

    results = {
        'Wake': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'Sleep': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'REM': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'NREM': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'NREM 3': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'Light': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'NREM 1': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'NREM 2': {
            'precision': [],
            'recall': [],
            'fbeta': []
        },
        'Overall': {
            'precision': [],
            'recall': [],
            'fbeta': []
        }
    }

    # Outer CV
    group_kfold = GroupKFold(n_splits=outer_cv_splits)
    out_fold = 0
    hierarchical_pred = []
    for train_indices, test_indices in group_kfold.split(X, y, groups):
        out_fold += 1
        print('Processing fold ' + str(out_fold))
        out_fold_X_train = X[train_indices, :]
        out_fold_X_test = X[test_indices, :]
        out_fold_y_train = y[train_indices]
        out_fold_y_test = y[test_indices]
        out_fold_users_test = groups[test_indices]

        # Create a pipeline with scaler and hierarchical classifier
        pipe = Pipeline([
            ('scaler', StandardScaler()),
            (
                'clf',
                HierarchicalClassifier(
                    base_estimator=RandomForestClassifier(random_state=0,
                                                          n_estimators=100,
                                                          n_jobs=-1),
                    class_hierarchy=class_hierarchy,
                    prediction_depth='mlnp',
                    progress_wrapper=tqdm,
                    #stopping_criteria=0.7
                ))
        ])

        # Inner CV
        strat_kfold = StratifiedKFold(n_splits=inner_cv_splits,
                                      random_state=0,
                                      shuffle=True)

        custom_cv_indices = []
        for grp_train_idx, grp_test_idx in strat_kfold.split(
                out_fold_X_train, out_fold_y_train):
            custom_cv_indices.append((grp_train_idx, grp_test_idx))

        print('Training')
        search_params = {'clf__base_estimator__n_estimators':[50,100,200,300,500], \
             'clf__base_estimator__max_depth': [5,10,None]}
        cv_clf = RandomizedSearchCV(estimator=pipe, param_distributions=search_params, \
                           cv=custom_cv_indices, scoring=make_scorer(custom_h_fbeta,graph=graph), n_iter=5, \
                           n_jobs=-1, verbose=1)
        cv_clf.fit(out_fold_X_train, out_fold_y_train)
        print('Predicting')
        out_fold_y_pred = cv_clf.predict(out_fold_X_test)

        best_clf = cv_clf.best_estimator_

        # Demonstrate using our hierarchical metrics module with MLB wrapper
        with multi_labeled(out_fold_y_test, out_fold_y_pred, best_clf.named_steps['clf'].graph_) \
                                as (y_test_, y_pred_, graph_, classes_):
            fold_h_prec, fold_h_rec, fold_h_fbeta = h_fbeta_score(
                y_test_, y_pred_, graph_)
            results['Overall']['precision'].append(fold_h_prec)
            results['Overall']['recall'].append(fold_h_rec)
            results['Overall']['fbeta'].append(fold_h_fbeta)
            print("Fold %d: precision: %0.4f, recall: %0.4f, fbeta: %0.4f" %
                  (out_fold, fold_h_prec, fold_h_rec, fold_h_fbeta))

            y_test_ = fill_ancestors(y_test_, graph=graph_)
            y_pred_ = fill_ancestors(y_pred_, graph=graph_)

            hierarchical_pred.append(
                (out_fold_users_test, y_test_, y_pred_, classes_))

            fold_wake_prec, fold_wake_rec, fold_wake_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'Wake')
            fold_sleep_prec, fold_sleep_rec, fold_sleep_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'Sleep')
            fold_rem_prec, fold_rem_rec, fold_rem_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'REM')
            fold_nrem_prec, fold_nrem_rec, fold_nrem_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'NREM')
            fold_nrem3_prec, fold_nrem3_rec, fold_nrem3_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'NREM 3')
            fold_light_prec, fold_light_rec, fold_light_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'Light')
            fold_nrem1_prec, fold_nrem1_rec, fold_nrem1_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'NREM 1')
            fold_nrem2_prec, fold_nrem2_rec, fold_nrem2_fbeta, _ = get_node_metrics(
                y_test_, y_pred_, classes_, 'NREM 2')

            results['Wake']['precision'].append(fold_wake_prec)
            results['Wake']['recall'].append(fold_wake_rec)
            results['Wake']['fbeta'].append(fold_wake_fbeta)
            results['Sleep']['precision'].append(fold_sleep_prec)
            results['Sleep']['recall'].append(fold_sleep_rec)
            results['Sleep']['fbeta'].append(fold_sleep_fbeta)
            results['REM']['precision'].append(fold_rem_prec)
            results['REM']['recall'].append(fold_rem_rec)
            results['REM']['fbeta'].append(fold_rem_fbeta)
            results['NREM']['precision'].append(fold_nrem_prec)
            results['NREM']['recall'].append(fold_nrem_rec)
            results['NREM']['fbeta'].append(fold_nrem_fbeta)
            results['NREM 3']['precision'].append(fold_nrem3_prec)
            results['NREM 3']['recall'].append(fold_nrem3_rec)
            results['NREM 3']['fbeta'].append(fold_nrem3_fbeta)
            results['Light']['precision'].append(fold_light_prec)
            results['Light']['recall'].append(fold_light_rec)
            results['Light']['fbeta'].append(fold_light_fbeta)
            results['NREM 1']['precision'].append(fold_nrem1_prec)
            results['NREM 1']['recall'].append(fold_nrem1_rec)
            results['NREM 1']['fbeta'].append(fold_nrem1_fbeta)
            results['NREM 2']['precision'].append(fold_nrem2_prec)
            results['NREM 2']['recall'].append(fold_nrem2_rec)
            results['NREM 2']['fbeta'].append(fold_nrem2_fbeta)

    get_classification_report(results)
    save_user_report(hierarchical_pred,
                     os.path.join(outdir, 'hierarchical_results.csv'))
Beispiel #2
0
def test_hamiltonian_empty_graph():
    path = hamiltonian_path(DiGraph())
    assert len(path) == 0
Beispiel #3
0
def scroll(webdriver_path=driver_path, timeout=3, graph=DiGraph(), search_ids=None):
    """
    Use a more complex method to gather data that uses a web driver to scrape a page. It must go to the page and then
    scroll to the bottom so it can gather all the posts, their authors and dates published so it can also be turned into
    a graph
    Parameters
    ----------
    webdriver_path (str)
        where the chrome web driver is stored for establishing the driver
    timeout (int)
        how many seconds the driver should wait for the page to complete the re-load when scrolling
    Returns
    -------
    """
    # Driver is currently set for version 8.1 on windows
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(executable_path=webdriver_path, chrome_options=chrome_options)
    # Node and Edge containers which will be returned starting with the base site which is being collected
    index = ['mediumcom']
    if not graph.has_node('mediumcom'):
        graph.add_node('mediumcom', description='Site with blogs')
    # Start the driver on the url and the query if it exists.
    if not search_ids:
        search_ids = ['network%20graph%20visualization']
    elif isinstance(search_ids, list):
        # Create a search that consists of all the terms and put it at the beginning of the list
        if len(search_ids) > 1:
            search_ids.insert(0, '%20'.join(search_ids))
    else:
        search_ids = [search_ids]
    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")
    driver.find_elements_by_class_name('postArticle')
    for search_id in search_ids:
        # Set the driver on the search_id in the query
        driver.get('https://medium.com/search?q=%s' % search_id)
        # Normalize the ID now that the url is set
        search_id = search_id.replace('%20', '_')
        if not graph.has_node(search_id):
            graph.add_node(search_id, description='Search term used to search blogs')
        while True:
            # Scroll down to bottom
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            # Wait to load page
            logger.info(
                'Collected %d posts. Scrolling for more...' % len(driver.find_elements_by_class_name('postArticle')))
            time.sleep(timeout)
            # Calculate new scroll height and compare with last scroll height
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height or len(driver.find_elements_by_class_name('postArticle')) > 100:
                # If heights are the same it will exit the function
                break
            last_height = new_height
        # Collect all the posts to iterate through and assign to nodes and edges
        posts = driver.find_elements_by_class_name('postArticle')
        logger.info('Collected %s posts' % len(posts))
        # Go through each post and extract an author (a_id), the post (b_id) and then create the edges
        for post in posts:
            try:
                author = post.find_element_by_class_name('ds-link').text
                link = post.find_element_by_class_name('ds-link').get_attribute("href")
                date = post.find_element_by_tag_name('time').text
                title = post.find_element_by_tag_name('h3').text
                claps = post.find_element_by_class_name('multirecommend').text
                # Create the author node
                a_id = ''.join(e for e in author if e.isalnum()).lower()
                if a_id not in index:
                    graph.add_node(a_id, description=author, link=link)
                    index.append(a_id)
                # Create the article node
                b_id = ''.join(e for e in title if e.isalnum()).lower()
                if b_id not in index:
                    graph.add_node(b_id, description="%s by %s" % (title, author), link=link, count=claps, date=date)
                    index.append(b_id)
                    graph.has_node(a_id)
                if graph.has_node(a_id) and graph.has_node(b_id):
                    graph.add_edge(a_id, b_id, label='Posted')
                    graph.add_edge(b_id, 'mediumcom', label='PostedOn')
                    graph.add_edge(search_id, b_id, label='FromSearch')
            except scroll_errors.NoSuchElementException as error:
                logger.error("Scrolling %s" % error.msg)

    return graph
Beispiel #4
0
def test_is_strongly_connected():
    """Tests for a strongly connected tournament."""
    G = DiGraph([(0, 1), (1, 2), (2, 0)])
    assert is_strongly_connected(G)
Beispiel #5
0
def test_is_tournament():
    G = DiGraph()
    G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)])
    assert is_tournament(G)
Beispiel #6
0
def test_tournament_matrix():
    np = pytest.importorskip("numpy")
    npt = pytest.importorskip("numpy.testing")
    G = DiGraph([(0, 1)])
    m = tournament_matrix(G)
    npt.assert_array_equal(m.todense(), np.array([[0, 1], [-1, 0]]))
Beispiel #7
0
def test_same_node_is_reachable():
    """Tests that a node is always reachable from it."""
    # G is an arbitrary tournament on ten nodes.
    G = DiGraph(sorted(p) for p in combinations(range(10), 2))
    assert all(is_reachable(G, v, v) for v in G)
def test_empty_graph():
    assert list(dependent_node_iterator(DiGraph())) == []
Beispiel #9
0
def reduce_grid_brute(circuit: MultiCircuit, removed_br_idx):
    """
    Remove the first branch found to be removed.
    this function is meant to be called until it returns false
    Args:
        circuit: Circuit to modify in-place
        removed_br_idx: branch index

    Returns: Nothing
    """

    # form C
    m = len(circuit.branches)
    n = len(circuit.buses)
    buses_dict = {bus: i for i, bus in enumerate(circuit.buses)}
    C = lil_matrix((m, n), dtype=int)
    graph = DiGraph()

    # TODO: Fix the topology reduction with the GC example, see what is going on

    for i in range(len(circuit.branches)):
        # get the from and to bus indices
        f = buses_dict[circuit.branches[i].bus_from]
        t = buses_dict[circuit.branches[i].bus_to]
        graph.add_edge(f, t)
        C[i, f] = 1
        C[i, t] = -1

    C = csc_matrix(C)

    # get branch buses
    bus_f = circuit.branches[removed_br_idx].bus_from
    bus_t = circuit.branches[removed_br_idx].bus_to
    f = buses_dict[bus_f]
    t = buses_dict[bus_t]

    removed_bus = None
    removed_branch = None
    updated_bus = None
    updated_branches = list()

    # get the number of paths
    n_paths = len(list(all_simple_paths(graph, f, t)))

    # print('Deleting: ', circuit.branches[br_idx].name)

    if n_paths == 1:

        # get the branches that are connected to the bus f
        adjacent_br_idx = get_branches_of_bus(C, f)

        for k in adjacent_br_idx:

            # get the indices of the buses
            f2 = buses_dict[circuit.branches[k].bus_from]
            t2 = buses_dict[circuit.branches[k].bus_to]

            # re-assign the right bus
            if f2 == f:
                circuit.branches[k].bus_from = bus_t
            elif t2 == t2:
                circuit.branches[k].bus_to = bus_t

            # copy the state of the removed branch
            circuit.branches[k].active = circuit.branches[
                removed_br_idx].active

            # remember the updated branches
            updated_branches.append(circuit.branches[k])

        # merge buses
        bus_t.merge(bus_f)
        updated_bus = bus_t

        # delete bus
        removed_bus = circuit.buses.pop(f)

        # remove the branch and that's it
        removed_branch = circuit.branches.pop(removed_br_idx)

    else:
        # remove the branch and that's it
        removed_branch = circuit.branches.pop(removed_br_idx)

    # return the removed branch and the possible removed bus
    return removed_branch, removed_bus, updated_bus, updated_branches
    def test_random_topology_generation(self):
        # ### without given variables: ### #
        T, var_types = CS3m.generate_random_topology(n_covariates=4,
                                                     p=0.4,
                                                     n_treatments=2,
                                                     n_outcomes=2,
                                                     n_censoring=0,
                                                     given_vars=[],
                                                     p_hidden=0)
        # test output structure:
        self.assertEqual(T.shape[0],
                         T.shape[1],
                         msg="Graph has no square shape")
        self.assertEqual(T.shape[0],
                         8,
                         msg="Number of Graph variables {emp} "
                         "does not match it supposed number {sup}".format(
                             emp=T.shape[0], sup=8))
        self.assertEqual(T.shape[0], var_types.size)
        # test number of variables of each type matches:
        self.assertEqual(sum(var_types == "covariate"), 4)
        self.assertEqual(sum(var_types == "treatment"), 2)
        self.assertEqual(sum(var_types == "outcome"), 2)
        self.assertEqual(sum(var_types == "hidden"), 0)
        self.assertEqual(sum(var_types == "censor"), 0)
        # test that each treatment is coupled with one outcome:
        self.assertEqual(
            all(T.loc[var_types == "outcome", var_types == "treatment"].sum(
                axis=1) == np.array([1, 1])),
            True,
            msg=
            "each outcome variable does not have exactly one predecessor treatment variable"
        )

        # ### with hidden variables and censor variables: ### #
        T, var_types = CS3m.generate_random_topology(n_covariates=100,
                                                     p=0.4,
                                                     n_treatments=2,
                                                     n_outcomes=2,
                                                     n_censoring=2,
                                                     given_vars=[],
                                                     p_hidden=0.4)
        # test output structure:
        self.assertEqual(T.shape[0],
                         T.shape[1],
                         msg="Graph has no square shape")
        self.assertEqual(
            T.shape[0],
            106,
            msg=
            "Number of Graph variables {t} does not match it supposed number {s}"
            .format(t=T.shape[0], s=106))
        self.assertEqual(T.shape[0], var_types.size)
        # test number of variables of each type matches:
        self.assertEqual(sum(var_types == "censor"), 2)
        hist = var_types.value_counts()
        self.assertAlmostEqual(hist["hidden"] / 100.0, 0.4, delta=1e-2)

        # graph = nx.from_numpy_matrix(T.values.transpose(), create_using=nx.DiGraph())
        # ### with given variables: ### #
        X = pd.DataFrame(np.random.RandomState(0).normal(size=(4800, 5)))
        T, var_types = CS3m.generate_random_topology(n_covariates=4,
                                                     p=0.4,
                                                     n_treatments=2,
                                                     n_outcomes=2,
                                                     n_censoring=0,
                                                     given_vars=X.columns,
                                                     p_hidden=0)
        self.assertEqual(sum(var_types == "covariate"), 9)
        # test that given variable has no predecessors:
        np.testing.assert_array_equal(T.loc[X.columns, :].sum(axis="columns"),
                                      np.zeros(5))

        # Test for DAGness:
        from networkx import DiGraph, from_numpy_matrix, is_directed_acyclic_graph
        NUM_TESTS = 50
        for test in range(NUM_TESTS):
            n_cov = np.random.randint(low=10, high=100)
            p = np.random.rand()  # type: float
            n_tre_out = np.random.randint(low=1, high=4)
            n_cen = np.random.randint(low=0, high=n_tre_out)
            T, _ = CS3m.generate_random_topology(n_covariates=n_cov,
                                                 p=p,
                                                 n_treatments=n_tre_out,
                                                 n_outcomes=n_tre_out,
                                                 n_censoring=n_cen,
                                                 given_vars=[],
                                                 p_hidden=0)
            G = from_numpy_matrix(T.values.transpose(), create_using=DiGraph())
            res = is_directed_acyclic_graph(G)
            self.assertTrue(res)
Beispiel #11
0
 def __init__(self):
     super().__init__()
     self.conversion_graph = DiGraph()
    def __init__(self,
                 func_classes: List[Type[IFunc]],
                 wired: List[any] = None):
        """
        :param func_classes:
        :param wired: input, output
        """
        # map from function id to a tuple (idx of function, order of function (start from 1)).
        self.id2order = {}
        # map from idx of function to its order
        idx2order = {}
        # map from tuple (id, order) of function to its dataset preference
        self.preferences = {}

        for i, func_cls in enumerate(func_classes):
            if func_cls.id not in self.id2order:
                self.id2order[func_cls.id] = []
            self.id2order[func_cls.id].append(
                (i, len(self.id2order[func_cls.id]) + 1))
            idx2order[i] = len(self.id2order[func_cls.id])
            self.preferences[(func_cls.id, idx2order[i])] = {}

        wired = wired or []
        # mapping of wired from input to output
        self.wired = {}
        # inverse mapping of wired from output to all inputs
        self.inv_wired = {}
        # applying topological sort on func_classes to determine execution order based on wiring
        graph = DiGraph()
        graph.add_nodes_from(range(len(func_classes)))
        # mapping preferences of argtype "dataset" to determine backend for "dataset" outputs
        preference_roots, preference_graph = [], DiGraph()
        for i, o in wired:
            if i[1] is None:
                i[1] = self.get_func_order(i[0])
            if o[1] is None:
                o[1] = self.get_func_order(o[0])

            input_arg = func_classes[self.id2order[i[0]][i[1] -
                                                         1][0]].inputs[i[2]]
            output_arg = func_classes[self.id2order[o[0]][o[1] -
                                                          1][0]].outputs[o[2]]
            if input_arg != output_arg:
                raise ValidationError(
                    f"Incompatible ArgType while wiring {WiredIOArg.get_arg_name(i[0], i[1], i[2])} to {WiredIOArg.get_arg_name(o[0], o[1], o[2])}"
                )
            input_gname = (i[0], i[1], i[2])
            output_gname = (o[0], o[1], o[2])
            self.wired[input_gname] = output_gname
            if output_gname not in self.inv_wired:
                self.inv_wired[output_gname] = []
            self.inv_wired[output_gname].append(input_gname)
            graph.add_edge(self.id2order[o[0]][o[1] - 1][0],
                           self.id2order[i[0]][i[1] - 1][0])

            if output_arg.id == 'dataset':
                self.preferences[(o[0], o[1])][o[2]] = None
                node = (o[0], o[1], 'o', o[2])
                # if input_ref of "dataset" output is None, we take it as a new "dataset"
                if output_arg.input_ref is None:
                    preference_roots.append(node)
                elif output_arg.input_ref not in func_classes[self.id2order[
                        o[0]][o[1] - 1][0]].inputs:
                    raise ValidationError(
                        f"Invalid value for input_ref {output_arg.input_ref} of {output_gname} output dataset"
                    )
                elif func_classes[self.id2order[o[0]][o[1] - 1][0]].inputs[
                        output_arg.input_ref] != output_arg:
                    raise ValidationError(
                        f"Invalid ArgType for input_ref {output_arg.input_ref} of {output_gname} output dataset"
                    )
                else:
                    # adding dummy "internal" edges within the same adapter to link "dataset" output to its input_ref
                    preference_graph.add_edge(
                        (o[0], o[1], 'i', output_arg.input_ref),
                        node,
                        preference='n/a')
                preference_graph.add_edge(node, (i[0], i[1], 'i', i[2]),
                                          preference=input_arg.preference)

        self.func_classes = []
        self.idx2order = {}
        try:
            # reordering func_classes in topologically sorted order for execution
            for i in lexicographical_topological_sort(graph):
                self.func_classes.append(func_classes[i])
                # changing idx of functions to map to their new order
                self.idx2order[len(self.func_classes) - 1] = idx2order[i]

        except NetworkXUnfeasible:
            raise ValidationError("Pipeline is not a DAG")

        self.schema = {}
        for i, func_cls in enumerate(self.func_classes):
            for argname in func_cls.inputs:
                input_gname = (func_cls.id, self.idx2order[i], argname)
                if input_gname in self.wired:
                    continue
                argtype = func_cls.inputs[argname]
                self.schema[WiredIOArg.get_arg_name(
                    *input_gname)] = fields.Raw(
                        required=not argtype.optional,
                        validate=argtype.is_valid,
                        error_messages={
                            'validator_failed':
                            f"Invalid Argument type. Expected {argtype.id}"
                        })
        self.schema = Schema.from_dict(self.schema)

        # setting preferences for new "dataset" outputs
        for root in preference_roots:
            counter = Counter()
            # traversing subgraph from every new "dataset" as root and counting preferences
            for edge in bfs_edges(preference_graph, root):
                counter[preference_graph[edge[0]][edge[1]]['preference']] += 1
            preference = None
            if counter['graph'] > counter['array']:
                preference = 'graph'
            elif counter['array'] > counter['graph']:
                preference = 'array'
            self.preferences[(root[0], root[1])][root[3]] = preference
    def __init__(self, token_network_address: TokenNetworkAddress):
        """ Initializes a new TokenNetwork. """

        self.address = token_network_address
        self.channel_id_to_addresses: Dict[ChannelID, Tuple[Address, Address]] = dict()
        self.G = DiGraph()
    local_graph_filepath = os.path.join(storage.local_dirpath, "graph.gpickle")
    gcs_graph_filepath = os.path.join(storage.gcs_dirpath, "graph.gpickle")

    if os.path.exists(local_graph_filepath) and not GRAPH_DESTRUCTIVE:
        print("LOADING GRAPH...")
        graph = read_gpickle(local_graph_filepath)
        print(type(graph), graph.number_of_nodes(), graph.number_of_edges())
    else:
        nodes_df = statuses_df.copy()
        nodes_df = nodes_df[["user_id", "screen_name", "rate", "bot"]]
        nodes_df.drop_duplicates(inplace=True)
        print(len(nodes_df))
        print(nodes_df.head())

        print("CREATING GRAPH...")
        graph = DiGraph()

        job.start()
        print("NODES...")
        # for each unique node in the list, add a node to the graph.
        for i, row in nodes_df.iterrows():
            graph.add_node(row["screen_name"],
                           user_id=row["user_id"],
                           rate=row["rate"],
                           bot=row["bot"])

            job.counter += 1
            if job.counter % GRAPH_BATCH_SIZE == 0:
                job.progress_report()
        job.end()
Beispiel #15
0
def test_score_sequence_edge():
    G = DiGraph([(0, 1)])
    assert score_sequence(G) == [0, 1]
Beispiel #16
0
def local_cfg(bbs: List[BasicBlock]) -> LocalGraph:
    """
    Construct a local graph from a list of basic blocks.

    Nodes and edges of the resulting graph will be decorated, respectively, with assembly labels and transition types,
    registered with the attribute names of `labels` and `kind`.

    This function works based on a few assumptions:

    - the basic blocks are provided in the same order they appear inside the original code fragment;
    - the first block is the entry-point;
    - all jumps are local;
    - all blocks with a final `RETURN` transition actually return control to whoever caused the PC to reach the EP.
    When these conditions are satisfied, a well-formed local graph is returned.

    :param bbs: the list of basic blocks of which the local graph is formed
    :return: a LocalGraph object representing the local graph
    """

    local_graph = DiGraph()

    local_symbol_table: MutableMapping[str, Hashable] = {}
    pending_jumps: List[Tuple[Hashable, str, Transition]] = []

    terminal_nodes = []
    calls = []

    parent_seq_block = None
    pending_call = None

    for bb in bbs:
        local_graph.add_node(bb.identifier,
                             labels=list(bb.labels),
                             block=bb.code)

        if parent_seq_block is not None:
            # Attach the current node to the sequence-wise previous one
            local_graph.add_edge(parent_seq_block,
                                 bb.identifier,
                                 kind=Transition.SEQ)
            parent_seq_block = None
        elif pending_call is not None:
            # Set the current node as the return point of an external procedure call
            calls.append(
                ProcedureCall(pending_call[0], pending_call[1], bb.identifier))
            pending_call = None

        # Embed the basic block's labels into the node
        local_symbol_table.update((lab, bb.identifier) for lab in bb.labels)

        outgoing_transition = bb.outgoing_flow[0]
        if outgoing_transition is Transition.RETURN:
            # The outgoing transition is a return-jump: add the node to the list of terminals.
            terminal_nodes.append(bb.identifier)
        elif outgoing_transition is Transition.CALL:
            # The outgoing transition is a procedure call: keep track of it so that the subsequent block will be set as
            # its confluence point.
            pending_call = bb.identifier, bb.outgoing_flow[1]
        else:
            if outgoing_transition is Transition.SEQ or outgoing_transition.branching:
                # In case of a sequential or branching transition, the subsequent basic block is to be attached to the
                # current one.
                parent_seq_block = bb.identifier

            if outgoing_transition.resolve_symbol:
                # In case of a jump, store its origin and symbolic destination for the coming one-pass resolution.
                pending_jumps.append(
                    (bb.identifier, bb.outgoing_flow[1], bb.outgoing_flow[0]))

    for jumper, dst, kind in pending_jumps:
        # Resolve the internal symbolic jumps and add the missing edges
        local_graph.add_edge(jumper, local_symbol_table[dst], kind=kind)

    # Transform recursive calls into internal call arcs
    # TODO re-implement with partitions or sets
    ci, ce = tee(calls)
    for cll in filter(lambda c: c.callee in local_symbol_table, ci):
        local_graph.add_edge(cll.caller,
                             cll.confluence_point,
                             kind=Transition.CALL,
                             callee=cll.callee)

    return LocalGraph([bbs[0].identifier], local_graph,
                      filter(lambda c: c.callee not in local_symbol_table, ce),
                      terminal_nodes)
Beispiel #17
0
def test_score_sequence_triangle():
    G = DiGraph([(0, 1), (1, 2), (2, 0)])
    assert score_sequence(G) == [1, 1, 1]
Beispiel #18
0
def exec_graph(
    cfg: LocalGraph,
    entry_point: Union[str, Hashable],
    ignore_calls: FrozenSet[str] = frozenset()
) -> DiGraph:
    """
    Given a local CFG and an entry-point, return the graph of the node visits performed by the execution flow.

    The procedure consists in a recursive, depth-first visit of sub-graphs, starting from the initial node and repeating
    itself for every `CALL` arc encountered. Given their nasty nature, recursive calls are not expanded; instead, they
    are represented by special nodes with IDs of the form `call{<call destination>, <unique ID>}`.

    The user can specify additional calls that mustn't be expanded.

    Different calls to the same procedure result in differently-labeled sub-graphs being attached, so the resulting
    graph is more a substantiation of the execution paths than a sub-graph of the original CFG. As a consequence, don't
    expect a one-to-one correspondence between the CFG's nodes and the one in the execution graph.

    Terminal nodes reachability is guaranteed only if the graph is well formed and any external call reached by the
    execution flow has been internalized, if not explicitly set as ignored.

    :param cfg: a CFG description of some code
    :param entry_point: an entry-point specification for the CFG, either as a node ID or as a symbolic label
    :param ignore_calls: a set of calls that won't be expanded into sub-graphs
    :return: a directed graph representing the execution starting from the specified entry-point
    """

    # Get the entry-point ID
    source = entry_point if entry_point in cfg.entry_point_ids else cfg.get_symbol_table(
    )[entry_point]
    source_labels = cfg.graph.nodes[source]['labels']

    # If one of the entry-point's labels is in the ignore set, return a node summarizing the call
    if not ignore_calls.isdisjoint(source_labels):
        res = DiGraph()
        # The node will have a synthetic ID 'call{<call destination>, <unique ID>}', and will carry the original labels.
        res.add_node('call{' + str(source) + ', ' + generate_unique_node() +
                     '}',
                     labels=source_labels,
                     external=True)
        return res

    # Traverse the subtree rooted at the entry-point and collect the visited nodes
    visited_nodes = frozenset(dfs_preorder_nodes(cfg.graph, source))
    # Produce a view of the visited component
    visited_component: Graph = subgraph_view(cfg.graph,
                                             lambda n: n in visited_nodes)

    # Initialize the returned graph with the contents of the visited component
    res = DiGraph()
    res.update(visited_component)

    # Iterate over the CALL edges inside the visited component
    for edge in filter(
            lambda e: visited_component.edges[e]['kind'] == Transition.CALL,
            visited_component.edges):
        # Recursively compute the component of the called procedures
        nested_component = exec_graph(cfg,
                                      visited_component.edges[edge]['callee'],
                                      ignore_calls.union(source_labels))
        # Add the nested component to the result, avoiding ID clashes
        relabel_nodes(nested_component,
                      solve_graph_collision(res, nested_component), False)
        res.update(nested_component)

        # Take the root of the sub-component and its terminal nodes
        head = next(
            filter(lambda n: nested_component.in_degree(n) == 0,
                   nested_component.nodes))
        tail = filter(lambda n: nested_component.out_degree(n) == 0,
                      nested_component.nodes)

        # Substitute the original edge with call and return edges toward/from the sub-component
        res.remove_edge(*edge)
        res.add_edge(edge[0], head, kind=Transition.CALL)
        res.add_edges_from(zip(tail, repeat(edge[1])), kind=Transition.RETURN)

    return res
Beispiel #19
0
def test_reachable_pair():
    """Tests for a reachable pair of nodes."""
    G = DiGraph([(0, 1), (1, 2), (2, 0)])
    assert is_reachable(G, 0, 2)
Beispiel #20
0
def velocity_graph(adata,
                   basis=None,
                   vkey='velocity',
                   which_graph='velocity',
                   n_neighbors=10,
                   arrows=None,
                   arrowsize=3,
                   alpha=.8,
                   perc=90,
                   edge_width=.2,
                   edge_color='grey',
                   edges_on_top=None,
                   color=None,
                   layer=None,
                   size=None,
                   groups=None,
                   components=None,
                   title=None,
                   dpi=None,
                   show=True,
                   save=None,
                   ax=None,
                   **kwargs):
    """\
    Plot of the velocity graph.

    Arguments
    ---------
    adata: :class:`~anndata.AnnData`
        Annotated data matrix.
    vkey: `str` or `None` (default: `None`)
        Key for annotations of observations/cells or variables/genes.
    which_graph: `'velocity'` or `'neighbors'` (default: `'velocity'`)
        Whether to show transitions from velocity graph or connectivities from neighbors graph.
    n_neighbors: `int` (default: 10)
        Number of neighbors to be included for generating connectivity / velocity graph.
    arrows: `bool` (default: `None`)
        Whether to display arrows instead of edges. Recommended to be used only on a cluster by setting groups parameter.
    arrowsize: `int` (default: 3)
        Size of the arrow heads.

    {scatter}

    Returns
    -------
        `matplotlib.Axis` if `show==False`
    """
    basis = default_basis(adata) if basis is None else get_basis(adata, basis)
    kwargs.update({
        "basis": basis,
        "title": which_graph + ' graph' if title is None else title,
        "alpha": alpha,
        "components": components,
        "groups": groups,
        "dpi": dpi,
        "show": False,
        "save": None
    })
    ax = scatter(adata,
                 layer=layer,
                 color=color,
                 size=size,
                 ax=ax,
                 zorder=0,
                 **kwargs)

    from networkx import Graph, DiGraph
    if which_graph in {'neighbors', 'connectivities'}:
        T = adata.uns['neighbors']['connectivities'].copy()
        if perc is not None:
            threshold = np.percentile(T.data, perc)
            T.data[T.data < threshold] = 0
            T.eliminate_zeros()
    elif which_graph in adata.uns.keys():
        T = adata.uns[which_graph].copy()
        if perc is not None:
            threshold = np.percentile(T.data, perc)
            T.data[T.data < threshold] = 0
            T.eliminate_zeros()
    else:
        T = transition_matrix(adata,
                              vkey=vkey,
                              weight_indirect_neighbors=0,
                              n_neighbors=n_neighbors,
                              perc=perc)

    if groups is not None:
        if issparse(T): T = T.A
        T[~groups_to_bool(adata, groups, color)] = 0
        T = csr_matrix(T)
        T.eliminate_zeros()

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        X_emb = adata.obsm['X_' + basis][:, get_components(components, basis)]
        edges = draw_networkx_edges(DiGraph(T) if arrows else Graph(T),
                                    X_emb,
                                    width=edge_width,
                                    edge_color=edge_color,
                                    arrowsize=arrowsize,
                                    ax=ax)
        if not arrows and not edges_on_top:
            edges.set_zorder(-2)
            edges.set_rasterized(settings._vector_friendly)

    savefig_or_show(dpi=dpi, save=save, show=show)
    if not show: return ax
Beispiel #21
0
def test_unreachable_pair():
    """Tests for an unreachable pair of nodes."""
    G = DiGraph([(0, 1), (0, 2), (1, 2)])
    assert not is_reachable(G, 1, 0)
"""
#first function counts feedback loops

def dfs(graph, start, end):
    fringe = [(start, [])]
    while fringe:
        state, path = fringe.pop()
        if path and state == end:
            yield path
            continue
        for next_state in graph[state]:
            if next_state in path:
                continue
            fringe.append((next_state, path+[next_state]))


cycles = [[node]+path  for node in dictionary_we_have_created for path in dfs(dictionary_we_have_created, node, node)]
print(len(cycles)) #feedback loops
"""
# this one counts all loops

DG = DiGraph(dictionary_we_have_created)
print(len(list(simple_cycles(DG))))

try:
    find_cycle(DG, orientation='original')
except:
    pass

print(list(find_cycle(DG, orientation='ignore')))
Beispiel #23
0
def test_not_strongly_connected():
    """Tests for a tournament that is not strongly connected."""
    G = DiGraph([(0, 1), (0, 2), (1, 2)])
    assert not is_strongly_connected(G)
Beispiel #24
0
 def __init__(self, name):
     self.name = name
     self._id = generate_uuid(variant='uuid')
     self._graph = DiGraph()
Beispiel #25
0
def test_self_loops():
    """A tournament must have no self-loops."""
    G = DiGraph()
    G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)])
    G.add_edge(0, 0)
    assert not is_tournament(G)
from networkx import DiGraph
from networkx.algorithms.shortest_paths.generic import shortest_path
import re

CONVERSIONS = DiGraph()
CONVERSION_UNITS = set()


def addConversion(source, dest, multiplier):
    CONVERSIONS.add_edge(source,
                         dest,
                         function=lambda x: x * float(multiplier))
    CONVERSIONS.add_edge(dest,
                         source,
                         function=lambda x: x / float(multiplier))
    CONVERSION_UNITS.add(source)
    CONVERSION_UNITS.add(dest)

    for unit in source, dest:
        if unit[-1] == 'b' or unit[-1] == 'B':
            CONVERSION_UNITS.add(unit + "ps")


addConversion('R', 'B', 100)
addConversion('KB', 'B', 1000)
addConversion('MB', 'KB', 1000)
addConversion('GB', 'MB', 1000)
addConversion('TB', 'GB', 1000)
addConversion('KiB', 'B', 1024)
addConversion('MiB', 'B', 1048576)
addConversion('GiB', 'B', 1073741824)
Beispiel #27
0
def test_path_is_hamiltonian():
    G = DiGraph()
    G.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 0), (1, 3), (0, 2)])
    path = hamiltonian_path(G)
    assert len(path) == 4
    assert all(v in G[u] for u, v in zip(path, path[1:]))
def __generate_tgen_markov_model(privcount_tmodel_src_path, tmodel_key,
                                 tgen_tmodel_dst_path):
    with open(privcount_tmodel_src_path, 'r') as privcount_tmodel_file:
        tmodel = json.load(privcount_tmodel_file)
        hmm = tmodel[tmodel_key]

    state_ctr = 0
    obs_ctr = 0
    name_to_id = {}

    G = DiGraph()

    id = 's{}'.format(state_ctr)
    name = __convert_privcount_key_to_tgen_key("start")
    name_to_id[name] = id
    state_ctr += 1
    G.add_node(id, type='state', name=name)

    # add the state nodes and the observations nodes
    for state in hmm['state_space']:
        id = 's{}'.format(state_ctr)
        name = __convert_privcount_key_to_tgen_key(state)
        name_to_id[name] = id
        state_ctr += 1
        G.add_node(id, type='state', name=name)

    for observation in hmm['observation_space']:
        id = 'o{}'.format(obs_ctr)
        name = __convert_privcount_key_to_tgen_key(observation)
        name_to_id[name] = id
        obs_ctr += 1
        G.add_node(id, type="observation", name=name)

    # edges between states are called transitions
    for state in hmm['start_probability']:
        srcid = name_to_id[__convert_privcount_key_to_tgen_key("start")]
        dstid = name_to_id[__convert_privcount_key_to_tgen_key(state)]
        p = float(hmm['start_probability'][state])
        G.add_edge(srcid, dstid, type='transition', weight=p)

    for srcstate in hmm['transition_probability']:
        for dststate in hmm['transition_probability'][srcstate]:
            srcid = name_to_id[__convert_privcount_key_to_tgen_key(srcstate)]
            dstid = name_to_id[__convert_privcount_key_to_tgen_key(dststate)]
            p = float(hmm['transition_probability'][srcstate][dststate])
            G.add_edge(srcid, dstid, type='transition', weight=p)

    # edges from states to observations are called emissions
    for state in hmm['emission_probability']:
        for observation in hmm['emission_probability'][state]:
            srcid = name_to_id[__convert_privcount_key_to_tgen_key(state)]
            dstid = name_to_id[__convert_privcount_key_to_tgen_key(
                observation)]

            # params format is [prob, lognorm_mu, lognorm_sigma, exp_lambda]
            params = hmm['emission_probability'][state][observation]
            p = float(params[0])

            G.add_edge(srcid, dstid, type='emission', weight=p)

            # after an emission happens, we have parameters to tell us how long to wait
            # until making the next transition

            if observation == 'F':
                # this observation is terminal, so the delay doesnt matter
                G[srcid][dstid]['distribution'] = "uniform"
                G[srcid][dstid]['param_low'] = 0.0
                G[srcid][dstid]['param_high'] = 0.0
            else:
                lognorm_mu = float(params[1])
                lognorm_sigma = float(params[2])
                exp_lambda = float(params[3])

                if exp_lambda > 0.0:
                    G[srcid][dstid]['distribution'] = "exponential"
                    G[srcid][dstid]['param_rate'] = exp_lambda
                else:
                    G[srcid][dstid]['distribution'] = "lognormal"
                    G[srcid][dstid]['param_location'] = lognorm_mu
                    G[srcid][dstid]['param_scale'] = lognorm_sigma

    write_graphml(G, tgen_tmodel_dst_path)
Beispiel #29
0
                *edge)['weight'] * count_of_subbags(edge[1])
    else:
        return 1  #count_subbags += G.get_edge_data(*edge)['weight']

    return count_subbags + 1


start = time()
# bags, contain = load_input("input_test_7.txt")
# bags, contain = load_input("input_test2_7.txt")
bags, contain = load_input("input_7.txt")

loading = time()

# G = nx.DiGraph()
G = DiGraph()
G.add_nodes_from(bags)
for i, con in enumerate(contain):
    G.add_edges_from([(bags[i], c[1], {"weight": int(c[0])}) for c in con])

# super_bags = nx.algorithms.dag.ancestors(G, "shiny gold")
super_bags = ancestors(G, "shiny gold")
# print(super_bags)
print(f'Number of super bags: {len(super_bags)}')

total_bags = count_of_subbags()
print(
    f'Total number bags in shiny gold: {total_bags - 1}')  # -1 for shiny gold

end = time()
print(f"loading input: {loading - start}, solving: {end - loading}")
Beispiel #30
0
def build_flux_graph(soln,
                     raw,
                     traced_element,
                     path_save=None,
                     overwrite=False,
                     i0=0,
                     i1='eq',
                     constV=False):
    """
	:param mechanism:        type = dict, keys include "species", "reaction", "element", etc
	:param raw:              type = dict, keys include "mole_fraction", "net_reaction_rate", etc
	:param traced_element:   type = str
	:param i0:               type = int, specifying the starting point of the considered interval of the raw data
	:param i1:               type = int or str, specifying the ending point of the considered interval of the raw data
	:return flux graph:      type = networkx object, will be also saved as a .json file,
	"""

    element = soln.element_names
    species = soln.species
    reaction = soln.reaction
    n_rxn = soln.n_reactions
    """ --------------------------------
	check if results already exist, if so, load
	-------------------------------- """

    if path_save is not None:
        if overwrite is False:
            try:
                data = json.load(open(path_save, 'r'))
                flux_graph = json_graph.node_link_graph(data)
                return flux_graph
            except IOError:
                pass
    """ --------------------------------
	if not, then compute, and save
	-------------------------------- """

    # ---------------------------------------------
    # check if traced_element is legal

    if traced_element not in element:
        raise ('traced element ' + traced_element +
               ' is not listed in mechanism')

    # ---------------------------------------------
    # find the reaction rate during the considered interval
    # unit will be converted to mole/sec

    rr = np.reshape(raw['net_reaction_rate'][i0, :], [n_rxn, 1])
    flux_graph = DiGraph()

    # -------------------------------------
    # adding edge from reactions
    # one edge may contribute from multiple reactions, the list of the contributors will be stored in edge['member']

    # note though in .cti id_rxn starts from 1, in soln.reaction, id_rxn starts from 0
    for id_rxn in range(n_rxn):

        # sp_mu is a dict, where key is species, val is net stoichiometric coefficient
        sp_mu = reaction(id_rxn).products
        for sp in reaction(id_rxn).reactants.keys():
            mu = reaction(id_rxn).reactants[sp]
            if sp in sp_mu.keys():
                sp_mu[sp] -= mu
            else:
                sp_mu[sp] = -mu

        # -----------------------
        # produced is a dict, where key is sp, val is number of traced atoms
        # being transferred when this sp is produced
        produced = {}
        consumed = {}

        for sp in sp_mu.keys():
            atoms = species(sp).composition
            if traced_element in atoms.keys():
                n = int(sp_mu[sp] * atoms[traced_element] *
                        np.sign(rr[id_rxn]))
                if n > 0:
                    produced[sp] = abs(n)
                elif n < 0:
                    consumed[sp] = abs(n)

        # -----------------------
        # consider this reaction only when traced element is transferred
        # note "if bool(consumed)" works the same way
        if bool(produced):
            n_sum = sum(produced.values())
            for target in produced.keys():
                for source in consumed.keys():

                    n_i2j = 1.0 * produced[target] * consumed[source] / n_sum

                    # note that the direction (source-->target) is already assured
                    # therefore we use abs(RR) here
                    dw = float(n_i2j * abs(rr[id_rxn]))

                    try:
                        flux_graph[source][target]['flux'] += dw
                    except KeyError:
                        # if this edge doesn't exist, create it
                        flux_graph.add_edge(source, target)
                        flux_graph[source][target]['flux'] = dw
                        flux_graph[source][target]['member'] = {}

                    flux_graph[source][target]['member'][str(id_rxn)] = dw
                    flux_graph[source][target][
                        '1/flux'] = 1.0 / flux_graph[source][target]['flux']

    # -------------------------------------
    # save the graph using json, which is fast, and human-readable

    data = json_graph.node_link_data(flux_graph)
    json.dump(data, open(path_save, 'w'))
    #print 'graph saved as',path_save

    return flux_graph