Python estimate_cpdag Examples, pcalg.estimate_cpdag Python Examples

Example #1

0

Show file

def pc_fisherz(data,
               threshold,
               skel_method,
               pc_depth=None,
               verbose=False,
               init_graph=None):
    import pcalg
    # from ci_test.ci_tests import ci_test_gauss
    from citestfz.ci_tests import ci_test_gauss

    # dm = np.array([data for nid, data in sorted(data.items())]).transpose()
    cm = np.corrcoef(data.T)
    args = {
        "indep_test_func": ci_test_gauss,
        "data_matrix": data.values,
        "corr_matrix": cm,
        "alpha": threshold,
        "method": skel_method,
        "verbose": verbose
    }
    if pc_depth is not None and pc_depth >= 0:
        args["max_reach"] = pc_depth
    if init_graph is not None:
        args["init_graph"] = init_graph
    (g, sep_set) = pcalg.estimate_skeleton(**args)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #2

0

Show file

File: diag.py Project: ai4sre/microservices-demo

def build_causal_graph(dm, labels, init_g):
    """
    Build causal graph with PC algorithm.
    """
    cm = np.corrcoef(dm.T)
    (G, sep_set) = pcalg.estimate_skeleton(indep_test_func=ci_test_fisher_z,
                                           data_matrix=dm,
                                           alpha=SIGNIFICANCE_LEVEL,
                                           corr_matrix=cm,
                                           init_graph=init_g)
    G = pcalg.estimate_cpdag(skel_graph=G, sep_set=sep_set)

    G = nx.relabel_nodes(G, labels)

    # Exclude nodes that have no path to "s-front-end_latency" for visualization
    remove_nodes = []
    undirected_G = G.to_undirected()
    for node in G.nodes():
        if not nx.has_path(undirected_G, node, ROOT_METRIC_NODE):
            remove_nodes.append(node)
            continue
        if re.match("^s-", node):
            color = "red"
        elif re.match("^c-", node):
            color = "blue"
        elif re.match("^m-", node):
            color = "purple"
        else:
            color = "green"
        G.nodes[node]["color"] = color
    G.remove_nodes_from(remove_nodes)
    print("Number of nodes: {}".format(G.number_of_nodes()))
    return G

Example #3

0

Show file

def test_fixed_edges():
    '''
    The fixed edges shall appear in the skeleton
    '''
    data_matrix = np.array(bin_data).reshape((5000, 5))
    (graph, sep_set) = estimate_skeleton(indep_test_func=ci_test_bin,
                                         data_matrix=data_matrix,
                                         alpha=0.01)
    graph = estimate_cpdag(skel_graph=graph, sep_set=sep_set)
    assert not graph.has_edge(1, 2)

    fixed_edges = nx.DiGraph()
    fixed_edges.add_nodes_from(range(5))
    fixed_edges.add_edge(1, 2)
    with pytest.raises(ValueError):
        _ = estimate_skeleton(indep_test_func=ci_test_bin,
                              data_matrix=data_matrix,
                              alpha=0.01,
                              fixed_edges=((1, 2), ))
    with pytest.raises(ValueError):
        _ = estimate_skeleton(indep_test_func=ci_test_bin,
                              data_matrix=data_matrix,
                              alpha=0.01,
                              fixed_edges=nx.DiGraph({0: (1, )}))
    (graph, _) = estimate_skeleton(indep_test_func=ci_test_bin,
                                   data_matrix=data_matrix,
                                   alpha=0.01,
                                   fixed_edges=fixed_edges)
    assert graph.has_edge(1, 2), graph.edges

Example #4

0

Show file

File: pc_input.py Project: cpflat/LogCausalAnalysis

def pc_gsq(d_dt, threshold):
    import pcalg
    from gsq.ci_tests import ci_test_bin

    dm = np.array([data for nid, data in sorted(d_dt.iteritems())]).transpose()
    (g, sep_set) = pcalg.estimate_skeleton(indep_test_func=ci_test_bin, data_matrix=dm, alpha=threshold)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #5

0

Show file

def test_estimate_cpdag(indep_test_func, data_matrix, g_answer, alpha=0.01):
    '''
    estimate_cpdag should reveal the answer
    '''
    (graph, sep_set) = estimate_skeleton(indep_test_func=indep_test_func,
                                         data_matrix=data_matrix,
                                         alpha=alpha)
    graph = estimate_cpdag(skel_graph=graph, sep_set=sep_set)
    error_msg = 'True edges should be: %s' % (g_answer.edges(), )
    assert nx.is_isomorphic(graph, g_answer), error_msg

Example #6

0

Show file

File: pc_input.py Project: keiichishima/LogCausalAnalysis

def pc_pylib(nsdict, threshold):
    import pcalg
    from gsq.ci_tests import ci_test_bin

    dm = np.array([ns.get_values() for nid, ns
            in sorted(nsdict.iteritems())]).transpose()
    (g, sep_set) = pcalg.estimate_skeleton(indep_test_func=ci_test_bin,
                                     data_matrix=dm,
                                     alpha=threshold)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #7

0

Show file

File: pc_input.py Project: cpflat/LogCausalAnalysis

def pc_fisherz(d_dt, threshold):
    import pcalg
    from ci_test.ci_tests import ci_test_gauss

    dm = np.array([data for nid, data in sorted(d_dt.iteritems())]).transpose()
    cm = np.corrcoef(dm.T)
    (g, sep_set) = pcalg.estimate_skeleton(
        indep_test_func=ci_test_gauss, data_matrix=dm, alpha=threshold, corr_matrix=cm
    )
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #8

0

Show file

def pc_pylib(nsdict, threshold):
    import pcalg
    from gsq.ci_tests import ci_test_bin

    dm = np.array([ns.get_values()
                   for nid, ns in sorted(nsdict.iteritems())]).transpose()
    (g, sep_set) = pcalg.estimate_skeleton(indep_test_func=ci_test_bin,
                                           data_matrix=dm,
                                           alpha=threshold)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #9

0

Show file

File: dag_generate.py Project: mattratt/causql

def run_pc(data_orig, col_names=None):
    data = np.array([list(r) for r in data_orig])
    (skel_graph,
     sep_set) = pcalg.estimate_skeleton(indep_test_func=ci_tests.ci_test_dis,
                                        data_matrix=data,
                                        alpha=0.01)
    # gdir = nx.DiGraph()
    # gdir.add_nodes_from(g.nodes())
    # gdir.add_edges_from(g.edges())
    dag = pcalg.estimate_cpdag(skel_graph, sep_set)
    if col_names:
        name_map = {i: col_names[i] for i in range(len(dag.nodes()))}
        nx.relabel.relabel_nodes(dag, name_map, copy=False)
    return dag

Example #10

0

Show file

def pc_gsq(d_dt, threshold, skel_method, pc_depth=None, verbose=False):
    import pcalg
    from gsq.ci_tests import ci_test_bin

    dm = np.array([data for nid, data in sorted(d_dt.iteritems())]).transpose()
    args = {
        "indep_test_func": ci_test_bin,
        "data_matrix": dm,
        "alpha": threshold,
        "method": skel_method,
        "verbose": verbose
    }
    if pc_depth is not None and pc_depth >= 0:
        args["max_reach"] = pc_depth
    (g, sep_set) = pcalg.estimate_skeleton(**args)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #11

0

Show file

def pc_chen(indep_test_func, ts_data, p, alpha):
    dim = ts_data.shape[1]
    node_mapping, data_matrix = transform_ts(ts_data, p)
    corr_matrix = np.corrcoef(data_matrix, rowvar=False)

    adj_matrix = np.ones((data_matrix.shape[1], data_matrix.shape[1]))
    np.fill_diagonal(adj_matrix, 0)
    G = nx.from_numpy_matrix(adj_matrix)

    G, sep_sets = _estimate_skeleton(G,
                                     partial_corr_test,
                                     data_matrix,
                                     alpha,
                                     corr_matrix=corr_matrix)

    DG = G.to_directed()
    DG.remove_edges_from([(u, v) for (u, v) in DG.edges() if v >= dim])
    DAG = estimate_cpdag(DG, sep_sets)
    return nx.relabel_nodes(DAG, node_mapping)

Example #12

0

Show file

def pc_gsq(data,
           threshold,
           skel_method,
           pc_depth=None,
           verbose=False,
           init_graph=None):
    import pcalg
    from gsq.ci_tests import ci_test_bin

    args = {
        "indep_test_func": ci_test_bin,
        "data_matrix": data.values,
        "alpha": threshold,
        "method": skel_method,
        "verbose": verbose
    }
    if pc_depth is not None and pc_depth >= 0:
        args["max_reach"] = pc_depth
    if init_graph is not None:
        args["init_graph"] = init_graph
    (g, sep_set) = pcalg.estimate_skeleton(**args)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #13

0

Show file

def estimate_dag(data,
                 threshold,
                 func,
                 skel_method="stable",
                 pc_depth=None,
                 verbose=False,
                 init_graph=None):

    import pcalg
    args = {
        "indep_test_func": func,
        "data_matrix": data.values,
        "alpha": threshold,
        "method": skel_method,
        "verbose": verbose
    }
    if pc_depth is not None and pc_depth >= 0:
        args["max_reach"] = pc_depth
    if init_graph is not None:
        args["init_graph"] = init_graph
    g, sep_set = pcalg.estimate_skeleton(**args)
    g = pcalg.estimate_cpdag(skel_graph=g, sep_set=sep_set)
    return g

Example #14

0

Show file

def do_basic_crosstalk_detection(ds, number_of_regions, settings, confidence=0.95, verbosity=1, name=None,
                                 assume_independent_settings=True, filter_lengths=[]):
    """
    Implements crosstalk detection on multiqubit data (fine-grained data with entries for each experiment).

    Parameters
    ----------
    ds : pyGSTi DataSet or numpy array
        The multiqubit data to analyze. If this is a numpy array, it must contain time series data and it must
        be 2-dimensional with each entry being a sequence of settings and measurment outcomes for each qubit region.
        A region is a set of one or more qubits and crosstalk is assessed between regions. The first n entries are
        the outcomes and the following entries are settings.

    number_of_regions: int, number of regions in experiment

    settings: list of length number_of_regions, indicating the number of settings for each qubit region.

    confidence : float, optional

    verbosity : int, optional

    name : str, optional

    filter_lengths : list of lengths. If this is not empty the dataset will be filtered and the analysis will only be
        done on the sequences of lengths specified in this list. This argument is only used if the dataset is passed in
        as a pyGSTi DataSet

    Returns
    -------
    results : CrosstalkResults object
        The results of the crosstalk detection analysis. This contains: output skeleton graph and DAG from
        PC Algorithm indicating regions with detected crosstalk, all of the input information.

    """
    # -------------------------- #
    # Format and check the input #
    # -------------------------- #

    # This converts a DataSet to an array since the code below uses arrays
    # -------------------------- #

    if type(ds) != _pygobjs.dataset.DataSet:

        data_shape = _np.shape(ds)
        settings_shape = _np.shape(settings)

        # Check that the input data is a 2D array
        assert(len(data_shape) == 2), \
            "Input data format is incorrect!If the input is a numpy array it must be 2-dimensional."

        # Check that settings is a list of length number_of_regions
        assert((len(settings_shape) == 1) and (settings_shape[0] == number_of_regions))
        "settings should be a list of the same length as number_of_regions."

        # The number of columns in the data must be consistent with the number of settings
        assert(data_shape[1] == (sum(settings) + number_of_regions))
        "Mismatch between the number of settings specified for each region and the number of columns in data"

        data = ds
        num_data = data_shape[0]
        num_columns = data_shape[1]

    # This converts a DataSet to an array, as the code below uses arrays
    if type(ds) == _pygobjs.dataset.DataSet:

        opstr = ds.keys()[0]
        temp = ds.auxInfo[opstr]['settings']
        num_settings = len(temp)

        settings_shape = _np.shape(settings)
        # Check that settings is a list of length number_of_regions
        assert((len(settings_shape) == 1) and (settings_shape[0] == number_of_regions))
        "settings should be a list of the same length as number_of_regions."

        dscopy = ds.copy_nonstatic()
        # filter out lengths not in filter_lengths
        if len(filter_lengths) > 0:
            for k in dscopy.keys():
                if len(k) not in filter_lengths:
                    dscopy.remove([k])

        dscopy.done_adding_data()

        # num columns = number of settings + number of regions (b/c we assume one outcome per region)
        num_columns = num_settings + number_of_regions

        num_data = len(dscopy.keys())

        data = []
        collect_settings = {key: [] for key in range(num_settings)}
        for row in range(num_data):
            opstr = dscopy.keys()[row]

            templine_set = [0] * num_settings
            settings_row = dscopy.auxInfo[opstr]['settings']

            for key in settings_row:
                if len(key) == 1:  # single region/qubit gate
                    templine_set[key[0]] = settings_row[key]
                    collect_settings[key[0]].append(settings_row[key])
                else:  # two-region/two-qubit gate
                    print("Two qubit gate, not sure what to do!!")  # TODO
                    return
            outcomes_row = dscopy[opstr]

            for outcome in outcomes_row:
                templine_out = [0] * number_of_regions

                if len(outcome[0]) == 1:
                    # outcomes labeled by bitstrings
                    for r in range(number_of_regions):
                        templine_out[r] = int(outcome[0][0][r])

                    num_rep = int(outcome[2])

                    templine_out.append(templine_set)
                    flattened_line = list(flatten(templine_out))
                else:
                    # outcomes labeled by tuples of bits
                    for r in range(number_of_regions):
                        templine_out[r] = int(outcome[0][1][0][r])  # templine_out[r] = int(outcome[0][r])
                        # print(templine_out[r])
                    num_rep = int(outcome[2])

                    templine_out.append(templine_set)
                    flattened_line = list(flatten(templine_out))

                for r in range(num_rep):
                    data.append(flattened_line)

        data = _np.asarray(data)

    # if the dataset is specified by a string assume its a filename with a saved numpy array
    elif type(ds) == str:
        data = _np.loadtxt(ds)
        data = data.astype(int)

        data_shape = _np.shape(data)
        settings_shape = _np.shape(settings)

        # Check that the input data is a 2D array
        assert(len(data_shape) == 2)
        "Input data format is incorrect!If the input is a numpy array it must be 2-dimensional."

        # Check that settings is a list of length number_of_regions
        assert((len(settings_shape) == 1) and (settings_shape[0] == number_of_regions))
        "settings should be a list of the same length as number_of_regions."

        # The number of columns in the data must be consistent with the number of settings
        assert(data_shape[1] == (sum(settings) + number_of_regions))
        "Mismatch between the number of settings specified for each region and the number of columns in data"

        num_data = data_shape[0]
        num_columns = data_shape[1]

    # if neither a pygsti data set or string, assume a numpy array was passed in
    else:
        data_shape = _np.shape(ds)
        settings_shape = _np.shape(settings)

        # Check that the input data is a 2D array
        assert(len(data_shape) == 2)
        "Input data format is incorrect!If the input is a numpy array it must be 2-dimensional."

        # Check that settings is a list of length number_of_regions
        assert((len(settings_shape) == 1) and (settings_shape[0] == number_of_regions))
        "settings should be a list of the same length as number_of_regions."

        # The number of columns in the data must be consistent with the number of settings
        assert(data_shape[1] == (sum(settings) + number_of_regions))
        "Mismatch between the number of settings specified for each region and the number of columns in data"

        data = ds

    data_shape = _np.shape(data)
    num_data = data_shape[0]
    num_columns = data_shape[1]

    # dump the array form of the dataset into a file for diagnostics
    _np.savetxt('dataset_dump.txt', data, fmt='%d')

    # --------------------------------------------------------- #
    # Prepare a results object, and store the input information #
    # --------------------------------------------------------- #

    # Initialize an empty results object.
    results = _obj.CrosstalkResults()

    # Records input information into the results object.
    results.name = name
    results.data = data
    if type(ds) == _pygobjs.dataset.DataSet:
        results.pygsti_ds = dscopy
    results.number_of_regions = number_of_regions
    results.settings = settings
    results.number_of_datapoints = num_data
    results.number_of_columns = num_columns
    results.confidence = confidence

    # ------------------------------------------------- #
    #     Calculate the causal graph skeleton           #
    # ------------------------------------------------- #

    if assume_independent_settings:
        # List edges between settings so that these can be ignored when constructing skeleton
        ignore_edges = []
        for set1 in range(number_of_regions, num_columns):
            for set2 in range(number_of_regions, num_columns):
                if set1 > set2:
                    ignore_edges.append((set1, set2))
    else:
        ignore_edges = []

    print("Calculating causal graph skeleton ...")
    (skel, sep_set) = pcalg.estimate_skeleton(ci_test_dis, data, 1 - confidence, ignore_edges)

    print("Calculating directed causal graph ...")
    g = pcalg.estimate_cpdag(skel_graph=skel, sep_set=sep_set)

    # Store skeleton and graph in results object
    results.skel = skel
    results.sep_set = sep_set
    results.graph = g

    # Calculate the column index for the first setting for each region
    setting_indices = {x: number_of_regions + sum(settings[:x]) for x in range(number_of_regions)}
    results.setting_indices = setting_indices

    node_labels = {}
    cnt = 0
    for col in range(num_columns):
        if col < number_of_regions:
            node_labels[cnt] = r'R$_{%d}$' % col
            cnt += 1
#            node_labels.append("$%d^O$" % col)
        else:
            for region in range(number_of_regions):
                if col in range(setting_indices[region],
                                (setting_indices[(region + 1)] if region < (number_of_regions - 1) else num_columns)):
                    break
            node_labels[cnt] = r'S$_{%d}^{(%d)}$' % (region, (col - setting_indices[region]))
            cnt += 1
            #node_labels.append("%d^S_{%d}$" % (region, (col-setting_indices[region]+1)))

    results.node_labels = node_labels

    # Generate crosstalk detected matrix and assign weight to each edge according to TVD variation in distribution of
    # destination variable when source variable is varied.
    print("Examining edges for crosstalk ...")

    cmatrix = _np.zeros((number_of_regions, number_of_regions))
    edge_weights = _np.zeros(len(g.edges()))
    is_edge_ct = _np.zeros(len(g.edges()))
    edge_tvds = {}
    source_levels_dict = {}
    max_tvds = {}
    median_tvds = {}
    max_tvd_explanations = {}

    def _setting_range(x):
        return range(
            setting_indices[x],
            setting_indices[x + 1] if x < (number_of_regions - 1) else num_columns
        )

    for idx, edge in enumerate(g.edges()):
        source = edge[0]
        dest = edge[1]

        if verbosity > 1:
            print("** Edge: ", edge, " **")

        # For each edge, decide if it represents crosstalk
        #   Crosstalk is:
        #       (1) an edge between outcomes on different regions
        #       (2) an edge between a region's outcome and a setting of another region

        # source and destination are results
        if source < number_of_regions and dest < number_of_regions:
            cmatrix[source, dest] = 1
            is_edge_ct[idx] = 1
            print("Crosstalk detected. Regions " + str(source) + " and " + str(dest))

        # source is a result, destination is a setting
        if source < number_of_regions and dest >= number_of_regions:
            if dest not in range(setting_indices[source],
                                 (setting_indices[(source + 1)] if source < (number_of_regions - 1) else num_columns)):
                # make sure destination is not a setting for that region
                for region in range(number_of_regions):
                    # search among regions to find the one that this destination setting belongs to
                    if dest in range(setting_indices[region],
                                     (setting_indices[(region + 1)] if region < (number_of_regions - 1)
                                      else num_columns)):
                        break
                cmatrix[source, region] = 1
                is_edge_ct[idx] = 1
                print("Crosstalk detected. Regions " + str(source) + " and " + str(region))

        # source is a setting, destination is a result
        if source >= number_of_regions and dest < number_of_regions:
            if source not in range(setting_indices[dest],
                                   (setting_indices[(dest + 1)] if dest < (number_of_regions - 1) else num_columns)):
                # make sure source is not a setting for that region
                for region in range(number_of_regions):
                    # search among regions to find the one that this source setting belongs to
                    if source in range(setting_indices[region],
                                       (setting_indices[(region + 1)] if region < (number_of_regions - 1)
                                        else num_columns)):
                        break
                cmatrix[region, dest] = 1
                is_edge_ct[idx] = 1
                print("Crosstalk detected. Regions " + str(region) + " and " + str(dest))

        # For each edge in causal graph that represents crosstalk, calculate the TVD between distributions of dependent
        # variable when other variable is varied

        if is_edge_ct[idx] == 1:

            # the TVD calculation depends on what kind of crosstalk it is.

            # source and destination are results OR source is a result, destination is a setting
            if (source < number_of_regions and dest < number_of_regions) or \
               (source < number_of_regions and dest >= number_of_regions):
                source_levels, level_cnts = _np.unique(data[:, source], return_counts=True)
                num_levels = len(source_levels)

                if any(level_cnts < 10):
                    print((" ***   Warning: n<10 data points for some levels. "
                           "TVD calculations may have large error bars."))

                tvds = _np.zeros((num_levels, num_levels))
                calculated_tvds = []
                for i in range(num_levels):
                    for j in range(i):

                        marg1 = data[data[:, source] == source_levels[i], dest]
                        marg2 = data[data[:, source] == source_levels[j], dest]
                        n1, n2 = len(marg1), len(marg2)

                        marg1_levels, marg1_level_cnts = _np.unique(marg1, return_counts=True)
                        marg2_levels, marg2_level_cnts = _np.unique(marg2, return_counts=True)

                        #print(marg1_levels, marg1_level_cnts)
                        #print(marg2_levels, marg2_level_cnts)

                        tvd_sum = 0.0
                        for lidx, level in enumerate(marg1_levels):
                            temp = _np.where(marg2_levels == level)
                            if len(temp[0]) == 0:
                                tvd_sum += marg1_level_cnts[lidx] / n1
                            else:
                                tvd_sum += _np.fabs(marg1_level_cnts[lidx] / n1 - marg2_level_cnts[temp[0][0]] / n2)

                        tvds[i, j] = tvds[j, i] = tvd_sum / 2.0
                        calculated_tvds.append(tvds[i, j])

                edge_tvds[idx] = tvds
                source_levels_dict[idx] = source_levels
                max_tvds[idx] = _np.max(calculated_tvds)
                median_tvds[idx] = _np.median(calculated_tvds)

            # source is a setting, destination is a result
            else:
                source_levels, level_cnts = _np.unique(data[:, source], return_counts=True)
                num_levels = len(source_levels)

                if any(level_cnts < 10):
                    print((" ***   Warning: n<10 data points for some levels. "
                           "TVD calculations may have large error bars."))

                tvds = _np.zeros((num_levels, num_levels))
                max_dest_levels = _np.zeros((num_levels, num_levels))
                calculated_tvds = []
                for i in range(num_levels):
                    for j in range(i):

                        marg1 = data[data[:, source] == source_levels[i], ]
                        marg2 = data[data[:, source] == source_levels[j], ]

                        if(settings[dest] > 1):
                            print(('Region {} has more than one setting -- '
                                   'TVD code not implemented yet for this case').format(dest))
                            edge_tvds[idx] = tvds
                            source_levels_dict[idx] = source_levels
                        else:
                            dest_setting = setting_indices[dest]
                            dest_levels_i, dest_level_i_cnts = _np.unique(marg1[:, dest_setting], return_counts=True)
                            dest_levels_j, dest_level_j_cnts = _np.unique(marg2[:, dest_setting], return_counts=True)

                            common_dest_levels = list(set(dest_levels_i).intersection(dest_levels_j))

                            if common_dest_levels == []:
                                # No common settings on the destination regions for this combination of settings
                                # for the source region
                                # No sensible TVD here
                                tvds[i, j] = tvds[j, i] = -1
                            else:
                                max_tvd = 0
                                max_dest_level = 0
                                for dest_level in common_dest_levels:
                                    marg1d = marg1[marg1[:, dest_setting] == dest_level, dest]
                                    marg2d = marg2[marg2[:, dest_setting] == dest_level, dest]

                                    n1, n2 = len(marg1d), len(marg2d)

                                    marg1d_levels, marg1d_level_cnts = _np.unique(marg1d, return_counts=True)
                                    marg2d_levels, marg2d_level_cnts = _np.unique(marg2d, return_counts=True)

                                    #print(marg1_levels, marg1_level_cnts)
                                    #print(marg2_levels, marg2_level_cnts)

                                    tvd_sum = 0.0
                                    for lidx, level in enumerate(marg1d_levels):
                                        temp = _np.where(marg2d_levels == level)
                                        if len(temp[0]) == 0:
                                            tvd_sum += marg1d_level_cnts[lidx] / n1
                                        else:
                                            tvd_sum += _np.fabs(marg1d_level_cnts[lidx] / n1
                                                                - marg2d_level_cnts[temp[0][0]] / n2)

                                    if tvd_sum > max_tvd:
                                        max_tvd = tvd_sum
                                        max_dest_level = dest_level

                                tvds[i, j] = tvds[j, i] = max_tvd / 2.0
                                calculated_tvds.append(tvds[i, j])
                                max_dest_levels[i, j] = max_dest_levels[j, i] = max_dest_level

                edge_tvds[idx] = tvds
                source_levels_dict[idx] = source_levels
                max_tvds[idx] = _np.max(calculated_tvds)
                median_tvds[idx] = _np.median(calculated_tvds)  # take median over the calculated TVDs vector instead of
                # tvds matrix since that might have -1 elements that will skew median

                if max_tvds[idx] > 0:
                    i = _np.floor_divide(_np.argmax(tvds), num_levels)
                    j = _np.mod(_np.argmax(tvds), num_levels)

                    source_setting1 = source_levels[i]
                    source_setting2 = source_levels[j]
                    dest_setting = max_dest_levels[i, j]

                    # The following assumes each region is a single qubit -- need to generalize # TODO
                    source_qubit = source - results.number_of_regions
                    dest_qubit = dest

                    if results.pygsti_ds is None:
                        max_tvd_explanations[idx] = ("Max TVD = {}. Settings on source qubit: {}, {}. Setting on "
                                                     "destination qubit: {}").format(max_tvds[idx], source_setting1,
                                                                                     source_setting2, dest_setting)
                    else:
                        source_setting1_seq = 0
                        for key in results.pygsti_ds.keys():
                            if results.pygsti_ds.auxInfo[key]['settings'][(source_qubit,)] == source_setting1:
                                key_copy = key.copy(editable=True)
                                key_copy.delete_lines([i for i in range(key.number_of_lines()) if i != source_qubit])

                                source_setting1_seq = key_copy
                                break

                        source_setting2_seq = 0
                        for key in results.pygsti_ds.keys():
                            if results.pygsti_ds.auxInfo[key]['settings'][(source_qubit,)] == source_setting2:
                                key_copy = key.copy(editable=True)
                                key_copy.delete_lines([i for i in range(key.number_of_lines()) if i != source_qubit])

                                source_setting2_seq = key_copy
                                break

                        dest_seq = 0
                        for key in results.pygsti_ds.keys():
                            if results.pygsti_ds.auxInfo[key]['settings'][(dest_qubit,)] == dest_setting:
                                key_copy = key.copy(editable=True)
                                key_copy.delete_lines([i for i in range(key.number_of_lines()) if i != dest_qubit])
                                dest_seq = key_copy
                                break

                        for key in results.pygsti_ds.keys():
                            if (results.pygsti_ds.auxInfo[key]['settings'][(source_qubit,)] == source_setting1) and \
                               (results.pygsti_ds.auxInfo[key]['settings'][(dest_qubit,)] == dest_setting):
                                res1 = results.pygsti_ds[key]

                            if (results.pygsti_ds.auxInfo[key]['settings'][(source_qubit,)] == source_setting2) and \
                               (results.pygsti_ds.auxInfo[key]['settings'][(dest_qubit,)] == dest_setting):
                                res2 = results.pygsti_ds[key]

                        max_tvd_explanations[idx] = \
                            ("Max TVD = {}. Settings on source qubit: {}, {}. Setting on destination qubit: {}\n"
                             "    Sequences on source (qubit {})\n {}\n {}\n    Sequence on destination (qubit {})\n"
                             " {}\n"
                             "    Results when source={}, destination={}:\n"
                             " {}\n"
                             "    Results when source={}, destination={}:\n {}\n").format(
                                 max_tvds[idx], source_setting1, source_setting2, dest_setting, source_qubit,
                                 source_setting1_seq, source_setting2_seq, dest_qubit, dest_seq, source_setting1,
                                 dest_setting, res1, source_setting2, dest_setting, res2)
                else:
                    max_tvd_explanations[idx] = "Max TVD = 0. Experiment not rich enough to calculate TVD."

            if any(level_cnts < 10):
                print(" ***   Warning: n<10 data points for some levels. TVD calculations may have large error bars.")

            tvds = _np.zeros((num_levels, num_levels))
            for i in range(num_levels):
                for j in range(i):

                    marg1 = data[data[:, source] == source_levels[i], dest]
                    marg2 = data[data[:, source] == source_levels[j], dest]
                    n1, n2 = len(marg1), len(marg2)

                    tvd_sum = 0.0
                    for lidx, level in enumerate(marg1_levels):
                        temp = _np.where(marg2_levels == level)
                        if len(temp[0]) == 0:
                            tvd_sum += marg1_level_cnts[lidx] / n1
                        else:
                            tvd_sum += _np.fabs(marg1_level_cnts[lidx] / n1 - marg2_level_cnts[temp[0][0]] / n2)

                    tvds[i, j] = tvds[j, i] = tvd_sum / 2.0

    results.cmatrix = cmatrix
    results.is_edge_ct = is_edge_ct
    results.crosstalk_detected = _np.sum(is_edge_ct) > 0
    results.edge_weights = edge_weights
    results.edge_tvds = edge_tvds
    results.max_tvds = max_tvds
    results.median_tvds = median_tvds
    results.max_tvd_explanations = max_tvd_explanations

    return results

Example #15

0

Show file

def do_basic_crosstalk_detection(ds,
                                 number_of_regions,
                                 settings,
                                 confidence=0.95,
                                 verbosity=1,
                                 name=None,
                                 assume_independent_settings=True):
    """
    Implements crosstalk detection on multiqubit data (fine-grained data with entries for each experiment).

    Parameters
    ----------
    ds : pyGSTi DataSet or numpy array
        The multiqubit data to analyze. If this is a numpy array, it must contain time series data and it must
        be 2-dimensional with each entry being a sequence of settings and measurment outcomes for each qubit region.
        A region is a set of one or more qubits and crosstalk is assessed between regions. The first n entries are
        the outcomes and the following entries are settings.

    number_of_regions: int, number of regions in experiment

    settings: list of length number_of_regions, indicating the number of settings for each qubit region.

    confidence : float, optional

    verbosity : int, optional

    name : str, optional

    Returns
    -------
    results : CrosstalkResults object
        The results of the crosstalk detection analysis. This contains: output skeleton graph and DAG from
        PC Algorithm indicating regions with detected crosstalk, all of the input information.

    """
    # -------------------------- #
    # Format and check the input #
    # -------------------------- #

    if type(ds) != _pygobjs.dataset.DataSet:

        data_shape = _np.shape(ds)
        settings_shape = _np.shape(settings)

        # Check that the input data is a 2D array
        assert(len(data_shape) == 2), \
            "Input data format is incorrect!If the input is a numpy array it must be 2-dimensional."

        # Check that settings is a list of length number_of_regions
        assert ((len(settings_shape) == 1)
                and (settings_shape[0] == number_of_regions))
        "settings should be a list of the same length as number_of_regions."

        # The number of columns in the data must be consistent with the number of settings
        assert (data_shape[1] == (sum(settings) + number_of_regions))
        "Mismatch between the number of settings specified for each region and the number of columns in data"

        data = ds
        num_data = data_shape[0]
        num_columns = data_shape[1]

    # This converts a DataSet to an array, as the code below uses arrays
    if type(ds) == _pygobjs.dataset.DataSet:

        opstr = ds.keys()[0]
        temp = ds.auxInfo[opstr]['settings']
        num_settings = len(temp)

        settings_shape = _np.shape(settings)
        # Check that settings is a list of length number_of_regions
        assert ((len(settings_shape) == 1)
                and (settings_shape[0] == number_of_regions))
        "settings should be a list of the same length as number_of_regions."

        # num columns = number of settings + number of regions (b/c we assume one outcome per region)
        num_columns = num_settings + number_of_regions

        num_data = len(ds.keys())

        data = []
        collect_settings = {key: [] for key in range(num_settings)}
        for row in range(num_data):
            opstr = ds.keys()[row]

            templine_set = [0] * num_settings
            settings_row = ds.auxInfo[opstr]['settings']

            for key in settings_row:
                if len(key) == 1:  # single region/qubit gate
                    templine_set[key[0]] = settings_row[key]
                    collect_settings[key[0]].append(settings_row[key])
                else:  # two-region/two-qubit gate
                    print("Two qubit gate, not sure what to do!!")  # TODO
                    return

            outcomes_row = ds[opstr]
            for outcome in outcomes_row:
                templine_out = [0] * number_of_regions

                for r in range(number_of_regions):
                    templine_out[r] = int(outcome[0][r])
                num_rep = int(outcome[2])

                templine_out.append(templine_set)
                flattened_line = list(flatten(templine_out))

                for r in range(num_rep):
                    data.append(flattened_line)

        data = _np.asarray(data)

    # --------------------------------------------------------- #
    # Prepare a results object, and store the input information #
    # --------------------------------------------------------- #

    # Initialize an empty results object.
    results = _obj.CrosstalkResults()

    # Records input information into the results object.
    results.name = name
    results.data = data
    results.number_of_regions = number_of_regions
    results.settings = settings
    results.number_of_datapoints = num_data
    results.number_of_columns = num_columns
    results.confidence = confidence

    # ------------------------------------------------- #
    #     Calculate the causal graph skeleton           #
    # ------------------------------------------------- #

    if assume_independent_settings:
        # List edges between settings so that these can be ignored when constructing skeleton
        ignore_edges = []
        for set1 in range(number_of_regions, num_columns):
            for set2 in range(number_of_regions, num_columns):
                if set1 > set2:
                    ignore_edges.append((set1, set2))
    else:
        ignore_edges = []

    print("Calculating causal graph skeleton ...")
    (skel, sep_set) = pcalg.estimate_skeleton(ci_test_dis, data,
                                              1 - confidence, ignore_edges)

    print("Calculating directed causal graph ...")
    g = pcalg.estimate_cpdag(skel_graph=skel, sep_set=sep_set)

    # Store skeleton and graph in results object
    results.skel = skel
    results.sep_set = sep_set
    results.graph = g

    # Calculate the column index for the first setting for each region
    setting_indices = {
        x: number_of_regions + sum(settings[:x])
        for x in range(number_of_regions)
    }
    results.setting_indices = setting_indices

    node_labels = {}
    cnt = 0
    for col in range(num_columns):
        if col < number_of_regions:
            node_labels[cnt] = r'R$_{%d}$' % col
            cnt += 1
#            node_labels.append("$%d^O$" % col)
        else:
            for region in range(number_of_regions):
                if col in range(setting_indices[region],
                                (setting_indices[(region + 1)] if region <
                                 (number_of_regions - 1) else num_columns)):
                    break
            node_labels[cnt] = r'S$_{%d}^{(%d)}$' % (region,
                                                     (col -
                                                      setting_indices[region]))
            cnt += 1
            #node_labels.append("%d^S_{%d}$" % (region, (col-setting_indices[region]+1)))

    results.node_labels = node_labels

    # Generate crosstalk detected matrix and assign weight to each edge according to TVD variation in distribution of
    # destination variable when source variable is varied.
    print("Examining edges for crosstalk ...")

    cmatrix = _np.zeros((number_of_regions, number_of_regions))
    edge_weights = _np.zeros(len(g.edges()))
    is_edge_ct = _np.zeros(len(g.edges()))
    edge_tvds = {}

    def _setting_range(x):
        return range(
            setting_indices[x], setting_indices[x + 1] if x <
            (number_of_regions - 1) else num_columns)

    for idx, edge in enumerate(g.edges()):
        source = edge[0]
        dest = edge[1]

        if verbosity > 1:
            print("** Edge: ", edge, " **")

        # For each edge, decide if it represents crosstalk
        #   Crosstalk is:
        #       (1) an edge between outcomes on different regions
        #       (2) an edge between a region's outcome and a setting of another region
        if source < number_of_regions and dest < number_of_regions:
            cmatrix[source, dest] = 1
            is_edge_ct[idx] = 1
            print("Crosstalk detected. Regions " + str(source) + " and " +
                  str(dest))

        if source < number_of_regions and dest >= number_of_regions:
            if dest not in _setting_range(source):
                for region in range(number_of_regions):
                    if dest in _setting_range(region):
                        break
                cmatrix[source, region] = 1
                is_edge_ct[idx] = 1
                print("Crosstalk detected. Regions " + str(source) + " and " +
                      str(region))

        if source >= number_of_regions and dest < number_of_regions:
            if source not in _setting_range(dest):
                for region in range(number_of_regions):
                    if source in _setting_range(region):
                        break
                cmatrix[region, dest] = 1
                is_edge_ct[idx] = 1
                print("Crosstalk detected. Regions " + str(region) + " and " +
                      str(dest))

        # For each edge in causal graph that represents crosstalk, calculate the TVD between distributions of dependent
        # variable when other variable is varied

        if is_edge_ct[idx] == 1:

            source_levels, level_cnts = _np.unique(data[:, source],
                                                   return_counts=True)
            num_levels = len(source_levels)

            if any(level_cnts < 10):
                print(
                    " ***   Warning: n<10 data points for some levels. TVD calculations may have large error bars."
                )

            tvds = _np.zeros((num_levels, num_levels))
            for i in range(num_levels):
                for j in range(i):

                    marg1 = data[data[:, source] == source_levels[i], dest]
                    marg2 = data[data[:, source] == source_levels[j], dest]
                    n1, n2 = len(marg1), len(marg2)

                    marg1_levels, marg1_level_cnts = _np.unique(
                        marg1, return_counts=True)
                    marg2_levels, marg2_level_cnts = _np.unique(
                        marg2, return_counts=True)

                    tvd_sum = 0.0
                    for lidx, level in enumerate(marg1_levels):
                        temp = _np.where(marg2_levels == level)
                        if len(temp[0]) == 0:
                            tvd_sum += marg1_level_cnts[lidx] / n1
                        else:
                            tvd_sum += _np.fabs(marg1_level_cnts[lidx] / n1 -
                                                marg2_level_cnts[temp[0][0]] /
                                                n2)

                    tvds[i, j] = tvds[j, i] = tvd_sum / 2.0

            edge_tvds[idx] = tvds

    results.cmatrix = cmatrix
    results.is_edge_ct = is_edge_ct
    results.crosstalk_detected = _np.sum(is_edge_ct) > 0
    results.edge_weights = edge_weights
    results.edge_tvds = edge_tvds

    return results