Esempio n. 1
0
    def getCIGroups(local_data, ds_context=None, scope=None, families=None):
        """
        :param local_data: np array
        :param scope: a list of index to output variables
        :param alpha: threshold
        :param families: obsolete
        :return: np array of clustering

        This function take tuple (output, conditional) as input and returns independent groups
        alpha is the cutoff parameter for connected components
        BE CAREFUL WITH SPARSE DATA!
        """

        # data = preproc(local_data, ds_context, None, ohe)

        y, x = get_YX(local_data, ds_context.feature_size)

        pvals = testRcoT(y, x) + epsilon

        pvals[pvals > alpha] = 0

        clusters = np.zeros(y.shape[1])
        for i, c in enumerate(connected_components(from_numpy_matrix(pvals))):
            clusters[list(c)] = i + 1

        return split_conditional_data_by_clusters(y,
                                                  x,
                                                  clusters,
                                                  scope,
                                                  rows=False)
Esempio n. 2
0
def getIndependentRDCGroups_py(data_slice,
                               threshold,
                               k=None,
                               s=1. / 6.,
                               non_linearity=numpy.sin,
                               n_jobs=1,
                               rand_gen=None):

    rdc_adjacency_matrix = rdc_test(data_slice,
                                    k=k,
                                    s=s,
                                    non_linearity=non_linearity,
                                    n_jobs=n_jobs,
                                    rand_gen=rand_gen)

    n_features = len(data_slice.cols)

    #
    # thresholding
    rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0
    #print("thresholding", rdc_adjacency_matrix)

    #
    # getting connected components
    result = numpy.zeros(n_features)
    for i, c in enumerate(connected_components(from_numpy_matrix(rdc_adjacency_matrix))):
        result[list(c)] = i + 1

    return result
Esempio n. 3
0
def plotNetwork(path,corr):
    # Transform it in a links data frame
    #links=corr.stack().reset_index()
    #Build graph
    corr=Corr_mat
    adj_matrix = corr
    constits_latest = corr.index
    # remove self-loops
    adj_matrix = np.where((adj_matrix<=1.000001) & (adj_matrix>=0.99999),0,adj_matrix)
    # replace values that are below threshold
    # create undirected graph from adj_matrix
    graph = from_numpy_matrix(adj_matrix, parallel_edges=False, create_using= nx.Graph())
    # set names to crypots
    graph = nx.relabel.relabel_nodes(graph, dict(zip(range(len(constits_latest)), constits_latest)))
    pos_og =  nx.circular_layout(graph, scale=2)
    pos = nx.circular_layout(graph, scale=1.7)
    
    for p in pos:  # raise text positions
        if pos[p][1]>1:
            pos[p][1] += 0.15
        if pos[p][1]<-1:
            pos[p][1] -= 0.15
        elif pos[p][0]<0:
            pos[p][0] -= 0.3
        else:
            pos[p][0]+=0.3
    plt = mpl.figure(figsize = (5,5)) 
    nx.draw(graph, pos_og, with_labels= False)
    nx.draw_networkx_labels(graph, pos)
     
    plt.savefig(path,dpi=300 ,transparent=True)
    mpl.clf();mpl.close()
    return
Esempio n. 4
0
    def getCIGroups(local_data, ds_context=None, scope=None, alpha=0.001, families=None):
        """
        :param local_data: np array
        :param scope: a list of index to output variables
        :param alpha: threshold
        :param families: obsolete
        :return: np array of clustering

        This function take tuple (output, conditional) as input and returns independent groups
        alpha is the cutoff parameter for connected components
        BE CAREFUL WITH SPARSE DATA!
        """

        data = preproc(local_data, ds_context, None, ohe)

        num_instance = data.shape[0]

        output_mask = np.zeros(data.shape, dtype=bool)  # todo check scope and node.scope again
        output_mask[:, np.arange(len(scope))] = True

        dataOut = data[output_mask].reshape(num_instance, -1)
        dataIn = data[~output_mask].reshape(num_instance, -1)

        assert len(dataIn) > 0
        assert len(dataOut) > 0

        pvals = testRcoT(dataOut, dataIn)

        pvals[pvals > alpha] = 0

        clusters = np.zeros(dataOut.shape[1])
        for i, c in enumerate(connected_components(from_numpy_matrix(pvals))):
            clusters[list(c)] = i + 1

        return split_conditional_data_by_clusters(local_data, clusters, scope, rows=False)
Esempio n. 5
0
def read_sigle_data(data_dir,filename):

    temp = h5py.File(osp.join(data_dir, filename), 'r')

    # read edge and edge attribute
    pcorr = np.abs(temp['pcorr'].value)
    # only keep the top 10% edges
    th = np.percentile(pcorr.reshape(-1),95)
    pcorr[pcorr < th] = 0  # set a threshold
    num_nodes = pcorr.shape[0]

    G = from_numpy_matrix(pcorr)
    A = nx.to_scipy_sparse_matrix(G)
    adj = A.tocoo()
    edge_att = np.zeros((len(adj.row)))
    for i in range(len(adj.row)):
        edge_att[i] = pcorr[adj.row[i], adj.col[i]]
    edge_index = np.stack([adj.row, adj.col])
    edge_index, edge_att = remove_self_loops(torch.from_numpy(edge_index).long(), torch.from_numpy(edge_att).float())
    edge_index, edge_att = coalesce(edge_index, edge_att, num_nodes,
                                    num_nodes)

    att = temp['corr'].value

    return edge_att.data.numpy(),edge_index.data.numpy(),att,temp['indicator'].value, num_nodes
def process_single_data(index, use_gdc=False):
    # key to how we adapt to our model
    # read edge and edge attribute, partial correlation
    PTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/PTE_parPearson_BCI-DNI.npz")
    NONPTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/NONPTE_parPearson_BCI-DNI.npz")
    if index < PTE_data["conn_mat"].shape[0]:
        data = PTE_data
        new_index = index
        label = 1
    else:
        data = NONPTE_data
        new_index = index - PTE_data["conn_mat"].shape[0]
        label = 0

    # index = min(index, )
    pcorr = np.abs(data['partial_mat'][new_index])
    # only keep the top 10% edges
    th = np.percentile(pcorr.reshape(-1), 95)
    pcorr[pcorr < th] = 0  # set a threshold

    num_nodes = pcorr.shape[0]
    G = from_numpy_matrix(pcorr)
    A = nx.to_scipy_sparse_matrix(G)
    adj = A.tocoo()
    edge_att = np.zeros(len(adj.row))
    for i in range(len(adj.row)):
        edge_att[i] = pcorr[adj.row[i], adj.col[i]]

    edge_index = np.stack([adj.row, adj.col])
    edge_index, edge_att = remove_self_loops(torch.from_numpy(edge_index), torch.from_numpy(edge_att))
    edge_index = edge_index.long()
    edge_index, edge_att = coalesce(edge_index, edge_att, num_nodes,
                                    num_nodes)

    att = data['conn_mat'][new_index]

    att_torch = torch.from_numpy(att).float()
    y_torch = torch.from_numpy(np.array(label)).long()  # classification

    data = Data(x=att_torch, edge_index=edge_index.long(), y=y_torch, edge_attr=edge_att)

    if use_gdc:
        '''
        Implementation of https://papers.nips.cc/paper/2019/hash/23c894276a2c5a16470e6a31f4618d73-Abstract.html
        '''
        data.edge_attr = data.edge_attr.squeeze()
        gdc = GDC(self_loop_weight=1, normalization_in='sym',
                  normalization_out='col',
                  diffusion_kwargs=dict(method='ppr', alpha=0.2),
                  sparsification_kwargs=dict(method='topk', k=20,
                                             dim=0), exact=True)
        data = gdc(data)
        return data.edge_attr.data.numpy(), data.edge_index.data.numpy(), data.x.data.numpy(), data.y.data.item(), num_nodes

    else:
        return edge_att.data.numpy(), edge_index.data.numpy(), att, label, num_nodes
Esempio n. 7
0
def make_disk_graph(X, radius, metric='euclidean'):
    """Make a generalized disk graph, in which points whose distance is less
    than a certain radius are considered adjacent.
    
    Params:
        X: a 2D numpy array of shape (n_observations, n_features).
        radius: the radius of disks for adjacency. 
        metric: string, representing which metric. Options are given by
            sklearn.metrics.pairwise.distance_metrics. Default is 'euclidean'.
        
    Returns: a networkx simple Graph
    """
    metric = distance_metrics()[metric]
    dist = metric(X)
    adj = np.asarray(dist < radius, dtype=np.float)
    return from_numpy_matrix(adj, create_using=Graph)
Esempio n. 8
0
def process_single_data(index):
    # key to how we adapt to our model
    # read edge and edge attribute, partial correlation
    PTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/PTE_parPearson_BCI-DNI_aug.npz")
    NONPTE_data = np.load("/home/wenhuicu/ImagePTE/pte_gcn/PRGNN_fMRI-main/NONPTE_parPearson_BCI-DNI_aug.npz")
    if index < PTE_data["conn_mat"].shape[0]:
        data = PTE_data
        new_index = index
    else:
        print(index)
        data = NONPTE_data
        new_index = index - PTE_data["conn_mat"].shape[0]

    # index = min(index, )
    pcorr = np.abs(data['partial_mat'][new_index])
    # only keep the top 10% edges
    th = np.percentile(pcorr.reshape(-1), 95)
    pcorr[pcorr < th] = 0  # set a threshold
    num_nodes = pcorr.shape[0]


    G = from_numpy_matrix(pcorr)
    A = nx.to_scipy_sparse_matrix(G)
    adj = A.tocoo()
    edge_att = np.zeros((len(adj.row)))
    for i in range(len(adj.row)):
        edge_att[i] = pcorr[adj.row[i], adj.col[i]]
    edge_index = np.stack([adj.row, adj.col])
    edge_index, edge_att = remove_self_loops(torch.from_numpy(edge_index).long(), torch.from_numpy(edge_att).float())
    edge_index, edge_att = coalesce(edge_index, edge_att, num_nodes, num_nodes)

    # node attribute, Pearson correlation
    node_att = data["conn_mat"][new_index]
    pearson_corr = data["conn_mat"][new_index]
    mean_fmri = np.mean(data['features'][new_index], axis=-1, keepdims=True)
    std_fmri = np.std(data['features'][new_index], axis=-1, keepdims=True)
    # node_att = np.concatenate([pearson_corr, mean_fmri], axis=-1)

    # print(edge_att.data.numpy().shape, edge_index.data.numpy().shape, node_att.shape, num_nodes)
    # print(std_fmri)
    # pdb.set_trace()
    return edge_att.data.numpy(), edge_index.data.numpy(), node_att, num_nodes


# read_data("")
    def load_from_numpy(self, np_adjacency_matrix):
        """
    	Load data from 2D numpy array interpreted as an Adjacency matrix into the Graph datatype of NetworkX

    	Parameters
    	----------
    		np_adjacency_matrix: np.array of shape(Nnodes, Nnodes,)
    		Adjacency matrix to be converted to graph

    	Returns
    	--------
    	    NetworkX graph with nodes labeled by indicy and directed weights given as per the np adjacency matrix
    	"""
        # self.graph = to_directed(from_numpy_matrix(np.array(np_adjacency_matrix)))
        graph = convert_matrix.from_numpy_matrix(np.array(np_adjacency_matrix),
                                                 create_using=DiGraph)
        self.nodes = len(graph.nodes)
        return graph
Esempio n. 10
0
def make_kernel_graph(X, metric='rbf', cutoff=0, **kwargs):
    """Make a weighted graph, using the a pairwise kernel function
    for weights.
    
    Params:
        X: a 2D numpy array of shape (n_observations, n_features).
        metric: string or function, the metric to use when calculating kernel.
            Options are given by sklearn.metrics.pairwise.pairwise_kernels.
            Default is 'rbf'.
        cutoff: float, optional kernal truncation value, entries below which
            are set to 0.
        **kwargs: passed to pairwise_kernels
        
    Returns: a networkx weighted Graph
    """
    kernel = pairwise_kernels(X, metric=metric, **kwargs)
    if cutoff:
        kernel[kernel < cutoff] = 0
    return from_numpy_matrix(kernel, create_using=Graph)
Esempio n. 11
0
def getIndependentGDTGroups_py(data_slice,
                               threshold,
                               # n_jobs=1,
                               rand_gen=None):

    gdt_adjacency_matrix = pairwise_gdt(data_slice,
                                        )

    n_features = len(data_slice.cols)

    #
    # thresholding
    gdt_adjacency_matrix[gdt_adjacency_matrix < threshold] = 0
    #print("thresholding", gdt_adjacency_matrix)

    #
    # getting connected components
    result = numpy.zeros(n_features)
    for i, c in enumerate(connected_components(from_numpy_matrix(gdt_adjacency_matrix))):
        result[list(c)] = i + 1

    return result
Esempio n. 12
0
def getIndependentGroupsStabilityTest(data, alpha=0.001):
    #data = numpy.loadtxt("/Users/alejomc/Dropbox/pspn/spyn/experiments/graphclassification/wl/1mutag.build_wl_corpus.csv", dtype=int, delimiter=",")
    #df = pandas.read_csv('/Users/alejomc/Dropbox/pspn/spyn/experiments/graphclassification/wl/1mutag.build_wl_corpus.csv')
    #df = pandas.read_csv('/Users/alejomc/Dropbox/pspn/spyn/experiments/graphclassification/wl/5nci1.build_wl_corpus.csv')

    df = DataFrame(data,
                   columns=["V" + str(i) for i in range(1, data.shape[1] + 1)])

    #pvals = bonferroniCorrection(computeEstabilityTest(df, 0))

    #compute stability test
    with Pool() as pool:
        pvals = pool.starmap(computePvals, zip(repeat(df), range(df.shape[1])))

    #print(pvals)

    pvals = numpy.asarray(pvals)

    #print(pvals[0,:])

    #convert graph to undirected graph

    #print("AM SHAPE ",pvals.shape)

    for i, j in zip(*numpy.tril_indices(pvals.shape[1])):
        pvals[i, j] = pvals[j, i] = min(pvals[i, j], pvals[j, i])

    pvals[numpy.diag_indices_from(pvals)] = 1

    #print(pvals)

    pvals[pvals > alpha] = 0

    result = numpy.zeros(df.shape[1])
    for i, c in enumerate(connected_components(from_numpy_matrix(pvals))):
        result[list(c)] = i + 1

    return result
Esempio n. 13
0
def getIndependentRDCGroups_py(local_data,
                               threshold,
                               meta_types,
                               domains,
                               k=None,
                               s=1.0 / 6.0,
                               non_linearity=np.sin,
                               n_jobs=-2,
                               rand_gen=None):
    rdc_adjacency_matrix = rdc_test(local_data,
                                    meta_types,
                                    domains,
                                    k=k,
                                    s=s,
                                    non_linearity=non_linearity,
                                    n_jobs=n_jobs,
                                    rand_gen=rand_gen)

    #
    # Why is this necessary?
    #
    rdc_adjacency_matrix[np.isnan(rdc_adjacency_matrix)] = 0
    n_features = local_data.shape[1]

    #
    # thresholding
    rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0
    # logger.info("thresholding %s", rdc_adjacency_matrix)

    #
    # getting connected components
    result = np.zeros(n_features)
    for i, c in enumerate(
            connected_components(from_numpy_matrix(rdc_adjacency_matrix))):
        result[list(c)] = i + 1

    return result
Esempio n. 14
0
        up_to_conjugation_elements[conjugation_class] = 0
    up_to_conjugation_elements[conjugation_class] += 1
graph_labelling = UpToConjugationGraphLabelling(up_to_conjugation_elements, elements_generator)
graph_covering = GraphCovering(graph, representation)

action2 = PGLGroupAction(pgl2)
representation2 = TransitiveActionUnitaryStandardRepresentation(action2, pgl2.get_pf().infinity())
graph_labelling2 = UpToConjugationGraphLabelling(conjugation_classes, pgl2)
graph_covering2 = GraphCovering(graph, representation2)

matching_polynomials = {}
characteristic_polynomials = {}

for labelling, weight in graph_labelling.weighted_labellings(graph):
    adjacency = graph_covering.adjacency(labelling).astype(int)
    lifted_graph = from_numpy_matrix(adjacency, create_using=nx.MultiGraph, parallel_edges=True)
    polynomial = get_matching_polynomial(lifted_graph)
    matching_polynomials[list(labelling.values())[0]] = (polynomial, weight)

for labelling, weight in graph_labelling2.weighted_labellings(graph):
    polynomial = graph_covering2.get_polynomial(labelling)
    characteristic_polynomials[list(labelling.values())[0]] = (polynomial, weight)


s = q+1
# s = sympy.symbols("s")
m = []

elements = list(characteristic_polynomials.keys())
weights = []
Esempio n. 15
0
        if final_mat.iloc[i, j] >= final_mat.iloc[j, i]:
            final_mat.iloc[i, j] += final_mat.iloc[j, i]
            final_mat.iloc[j, i] = 0
        else:
            final_mat.iloc[j, i] += final_mat.iloc[i, j]
            final_mat.iloc[i, j] = 0

print(final_mat)
for column in CONFIG.column_names:
    final_mat[column] = np.where(np.abs(final_mat[column]) < .5, 0, 1)

# Save final binary adjacency matrix
final_mat.to_csv("results/final_adjacency_matrix.csv", index=True)

# Draw the DAG
final_DAG = from_numpy_matrix(final_mat.to_numpy(), create_using=nx.DiGraph)
final_DAG = nx.relabel_nodes(
    final_DAG,
    dict(zip(list(range(CONFIG.data_variable_size)), CONFIG.column_names)))

final_DAG.remove_nodes_from(list(nx.isolates(final_DAG)))

nx.draw(
    final_DAG,
    node_color="lightcoral",
    node_size=75,
    font_size=3,
    width=0.5,
    arrowsize=4,
    with_labels=True,
    pos=nx.spring_layout(final_DAG),
Esempio n. 16
0
def get_translations(structure, structural_type='100'):
    assert structural_type in ['100', '110']

    metal = [
        Element.from_Z(z).symbol for z in set(structure.atomic_numbers)
        if Element.from_Z(z).is_metal or Element.from_Z(z).is_metalloid
    ]

    mul_structures, conn_components_, ab_indices = [], [], [0, 1, 2]
    conn_indices = [[2, 1, 1], [1, 2, 1], [1, 1, 2]]

    number_connected_components = [
        conn_comps_sci(adjacency_matrix(structure.__mul__(i)))[0]
        for i in conn_indices
    ]
    c_index = number_connected_components.index(
        max(number_connected_components))
    ab_indices.remove(c_index)

    extended_structure = structure.__mul__(3)
    extended_components = list(
        conn_comps_netx(from_numpy_matrix(
            adjacency_matrix(extended_structure))))
    extended_sites = [[extended_structure[i] for i in components]
                      for components in extended_components]
    layers = [
        s for s in extended_sites
        if metal[0] in [site.specie.symbol for site in s]
    ]

    max_coords = [
        max([a.coords[c_index] for a in layer if a.specie.symbol == metal[0]])
        for layer in layers
    ]
    first_layer_index = max_coords.index(sorted(max_coords)[0])
    second_layer_index = max_coords.index(sorted(max_coords)[1])

    first_layer_coords = array([
        a.coords for a in layers[first_layer_index]
        if a.specie.symbol == metal[0]
    ])
    second_layer_coords = array([
        a.coords for a in layers[second_layer_index]
        if a.specie.symbol == metal[0]
    ])

    if structural_type == '110':
        first_layer_coords = first_layer_coords[
            first_layer_coords[:, c_index].argsort(
            )][:int(first_layer_coords.shape[0] / 2), :]
        second_layer_coords = second_layer_coords[
            second_layer_coords[:, c_index].argsort(
            )][:int(second_layer_coords.shape[0] / 2), :]

    a_axis = extended_structure.lattice.matrix[ab_indices][0]
    b_axis = extended_structure.lattice.matrix[ab_indices][1]
    perp = cross(a_axis / norm(a_axis), b_axis / norm(b_axis))

    dir_1 = sorted([
        c[0] - c[1] for c in combinations(first_layer_coords, 2)
        if abs(dot(c[0] - c[1], perp) / norm(c[0] - c[1]) / norm(perp)) < 0.07
    ],
                   key=norm)[0]
    m_dist = norm(dir_1)
    dir_1 = dir_1 / norm(dir_1)
    dir_2 = cross(perp / norm(perp), dir_1 / norm(dir_1))

    a_projections, b_projections = [], []

    for site_coords in first_layer_coords:
        nearest_site = second_layer_coords[KDTree(second_layer_coords).query(
            site_coords)[1]]
        a_projections.append(dot((site_coords - nearest_site), dir_1))
        b_projections.append(dot((site_coords - nearest_site), dir_2))

    a_translation = min([
        min(abs(m_dist - abs(p) % m_dist),
            abs(p) % m_dist) for p in a_projections
    ]) / m_dist

    if structural_type == '110':
        m_dist = m_dist * sqrt(2)

    b_translation = min([
        min(abs(m_dist - abs(p) % m_dist),
            abs(p) % m_dist) for p in b_projections
    ]) / m_dist

    return sorted([round(a_translation, 2), round(b_translation, 2)])
Esempio n. 17
0
def getIndependentRDCGroups_py(local_data,
                               threshold,
                               meta_types,
                               domains,
                               scope,
                               l_rfft=None,
                               is_pair=False,
                               k=None,
                               s=1.0 / 6.0,
                               non_linearity=np.sin,
                               n_jobs=-2,
                               rand_gen=None):
    # modified by zhongjie on 04.10.2019, byu adding scope and keepComplexPairs
    rdc_adjacency_matrix = rdc_test(local_data,
                                    meta_types,
                                    domains,
                                    k=k,
                                    s=s,
                                    non_linearity=non_linearity,
                                    n_jobs=n_jobs,
                                    rand_gen=rand_gen)

    #
    # Why is this necessary?
    #
    rdc_adjacency_matrix[np.isnan(rdc_adjacency_matrix)] = 0
    n_features = local_data.shape[1]

    #
    # Add function to keep correlation between real and imag coefficients.
    # rdc_adjacency_matrix = keepComplexPairs(rdc_adjacency_matrix, scope)
    """
    Additional comments:
    we can do splitting based on real coefs only, 
    in order to achieve that, all the correlations between imag-real and imag-imag can be set to 0,
    """
    if l_rfft is not None:
        for s_real in scope:
            # select scope that belongs to the REAL part.
            if l_rfft - 1 > s_real % (l_rfft * 2) > 0:
                # keep the real and imag coefs connected
                index_real = scope.index(s_real)
                index_imag = scope.index(s_real + l_rfft)
                rdc_adjacency_matrix[index_real, index_imag] = 1
                rdc_adjacency_matrix[index_imag, index_real] = 1
        rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0
        # rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0
    #

    #
    # thresholding
    rdc_adjacency_matrix[rdc_adjacency_matrix < threshold] = 0
    # logger.info("thresholding %s", rdc_adjacency_matrix)

    #
    # getting connected components
    result = np.zeros(n_features)
    for i, c in enumerate(
            connected_components(from_numpy_matrix(rdc_adjacency_matrix))):
        result[list(c)] = i + 1

    return result