コード例 #1
0
ファイル: sampler_auta.py プロジェクト: WhiteReal/MIP
def construct_graph(p_p_g, a_a_g, p_a_g):
    p_p_edges = p_p_g.edge_index
    p_p_edges = utils.sort_edge_index(p_p_edges)[0]
    p_p_edges = utils.to_undirected(p_p_edges)
    p_p_edges = utils.remove_self_loops(p_p_edges)[0]
    a_a_edges = a_a_g.edge_index
    a_a_edges = utils.sort_edge_index(a_a_edges)[0]
    a_a_edges = utils.to_undirected(a_a_edges)
    a_a_edges = utils.remove_self_loops(a_a_edges)[0]
    p_a_edges = p_a_g.edge_index
    p_a_edges = utils.sort_edge_index(p_a_edges)[0]
    p_a_edges = utils.remove_self_loops(p_a_edges)[0]
    paper_paper_graph = dgl.graph((p_p_edges[0], p_p_edges[1]), 'paper', 'pp')
    author_author_graph = dgl.graph((a_a_edges[0], a_a_edges[1]), 'author',
                                    'aa')
    paper_author_graph = dgl.bipartite(
        (p_a_edges[0], p_a_edges[1]),
        'paper',
        'pa',
        'author',
        num_nodes=(paper_paper_graph.number_of_nodes(),
                   author_author_graph.number_of_nodes()))
    author_paper_graph = dgl.bipartite(
        (p_a_edges[1], p_a_edges[0]),
        'author',
        'ap',
        'paper',
        num_nodes=(author_author_graph.number_of_nodes(),
                   paper_paper_graph.number_of_nodes()))
    hg = dgl.hetero_from_relations([
        author_author_graph, author_paper_graph, paper_author_graph,
        paper_paper_graph
    ])

    return hg
コード例 #2
0
def test_sort_edge_index():
    edge_index = torch.tensor([[2, 1, 1, 0], [1, 2, 0, 1]])
    edge_attr = torch.tensor([[1], [2], [3], [4]])

    out = sort_edge_index(edge_index)
    assert out.tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]]

    out = sort_edge_index(edge_index, edge_attr)
    assert out[0].tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]]
    assert out[1].tolist() == [[4], [3], [2], [1]]
コード例 #3
0
ファイル: dbp15k.py プロジェクト: zhihy96/Multi-GNN
    def process_graph(self, triple_path, feature_path, embeddings):
        g1 = read_txt_array(triple_path, sep='\t', dtype=torch.long)
        subj, rel, obj = g1.t()

        x_dict = {}
        with open(feature_path, 'r') as f:
            for line in f:
                info = line.strip().split('\t')
                info = info if len(info) == 2 else info + ['**UNK**']
                seq = info[1].lower().split()
                hs = [embeddings.get(w, embeddings['**UNK**']) for w in seq]
                x_dict[int(info[0])] = torch.stack(hs, dim=0)

        idx = torch.tensor(list(x_dict.keys()))
        assoc = torch.full((idx.max().item() + 1, ), -1, dtype=torch.long)
        assoc[idx] = torch.arange(idx.size(0))

        subj, obj = assoc[subj], assoc[obj]
        edge_index = torch.stack([subj, obj], dim=0)
        edge_index, rel = sort_edge_index(edge_index, rel)

        xs = [None for _ in range(idx.size(0))]
        for i in x_dict.keys():
            xs[assoc[i]] = x_dict[i]
        x = torch.nn.utils.rnn.pad_sequence(xs, batch_first=True)

        return x, edge_index, rel, assoc
コード例 #4
0
ファイル: Graph_AE_SAGE.py プロジェクト: gyhandy/Graph_AE
 def augment_adj(self, edge_index, edge_weight, num_nodes):
     edge_index, edge_weight = sort_edge_index(edge_index, edge_weight,
                                               num_nodes)
     edge_index, edge_weight = spspmm(edge_index, edge_weight, edge_index,
                                      edge_weight, num_nodes, num_nodes,
                                      num_nodes)
     return edge_index.to(device)
コード例 #5
0
ファイル: dbp15k.py プロジェクト: underreview/EASY
    def process_graph(self, triple_path, feature_path):
        g1 = read_txt_array(triple_path, sep='\t', dtype=torch.long)
        subj, rel, obj = g1.t()
        name_dict = {}
        with open(feature_path, 'r') as f:
            for line in f:
                info = line.strip().split('\t')
                info = info if len(info) == 2 else info + ['']
                seq_str = remove_punc(info[1]).strip()
                if seq_str == "":
                    seq_str = '<unk>'
                name_dict[int(info[0])] = seq_str

        idx = torch.tensor(list(name_dict.keys()))
        assoc = torch.full((idx.max().item() + 1,), -1, dtype=torch.long)
        assoc[idx] = torch.arange(idx.size(0))

        subj, obj = assoc[subj], assoc[obj]
        edge_index = torch.stack([subj, obj], dim=0)
        edge_index, rel = sort_edge_index(edge_index, rel)

        # xs = [None for _ in range(idx.size(0))]
        names = [None for _ in range(idx.size(0))]
        for i in name_dict.keys():
            names[assoc[i]] = name_dict[i]
        # x = torch.nn.utils.rnn.pad_sequence(xs, batch_first=True)

        return edge_index, rel, assoc, names
コード例 #6
0
ファイル: UNet.py プロジェクト: gyhandy/Graph_AE
 def augment_adj(self, edge_index, edge_weight, num_nodes):
     edge_index, edge_weight = coalesce(edge_index, edge_weight, num_nodes, num_nodes)
     edge_index, edge_weight = sort_edge_index(edge_index, edge_weight,
                                               num_nodes)
     edge_index, edge_weight = spspmm(edge_index, edge_weight, edge_index,
                                      edge_weight, num_nodes, num_nodes,
                                      num_nodes)
     return edge_index, edge_weight
コード例 #7
0
 def augment_adj(self, edge_index, edge_weight, num_nodes):
     edge_index, edge_weight = add_self_loops(edge_index, edge_weight,
                                              num_nodes=num_nodes)
     edge_index, edge_weight = sort_edge_index(edge_index, edge_weight,
                                               num_nodes)
     edge_index, edge_weight = spspmm(edge_index, edge_weight, edge_index,
                                      edge_weight, num_nodes, num_nodes,
                                      num_nodes)
     edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
     return edge_index, edge_weight
コード例 #8
0
def is_undirected(
    edge_index: Tensor,
    edge_attr: Optional[Union[Tensor, List[Tensor]]] = None,
    num_nodes: Optional[int] = None,
) -> bool:
    r"""Returns :obj:`True` if the graph given by :attr:`edge_index` is
    undirected.

    Args:
        edge_index (LongTensor): The edge indices.
        edge_attr (Tensor or List[Tensor], optional): Edge weights or multi-
            dimensional edge features.
            If given as a list, will check for equivalence in all its entries.
            (default: :obj:`None`)
        num_nodes (int, optional): The number of nodes, *i.e.*
            :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`)

    :rtype: bool
    """
    num_nodes = maybe_num_nodes(edge_index, num_nodes)

    edge_attr = [] if edge_attr is None else edge_attr
    edge_attr = [edge_attr] if isinstance(edge_attr, Tensor) else edge_attr

    edge_index1, edge_attr1 = sort_edge_index(
        edge_index,
        edge_attr,
        num_nodes=num_nodes,
        sort_by_row=True,
    )
    edge_index2, edge_attr2 = sort_edge_index(
        edge_index1,
        edge_attr1,
        num_nodes=num_nodes,
        sort_by_row=False,
    )

    return (bool(torch.all(edge_index1[0] == edge_index2[1]))
            and bool(torch.all(edge_index1[1] == edge_index2[0])) and all([
                torch.all(e == e_T) for e, e_T in zip(edge_attr1, edge_attr2)
            ]))
コード例 #9
0
ファイル: data_all.py プロジェクト: mujin2020/IJCAI2021-3458
def get_agreement_dist(edge_index: torch.Tensor,
                       y: torch.Tensor,
                       with_self_loops=True,
                       return_agree_dist_sum=False,
                       epsilon=1e-11) -> List[torch.Tensor] or (List, List):
    """
    :param edge_index: tensor the shape of which is [2, E]
    :param y: tensor the shape of which is [N]
    :param with_self_loops: add_self_loops if True
    :param return_agree_dist_sum: whether return the sum of agreement dist
    :param epsilon: small float number for stability.
    :return: Tensor list L the length of which is N.
        L[i] = tensor([..., a(y_j, y_i), ...]) for e_{ji} \in {E}
            - a(y_j, y_i) = 1 / L[i].sum() if y_j = y_i,
            - a(y_j, y_i) = 0 otherwise.
    """
    y = y.squeeze()
    num_nodes = y.size(0)

    # Add self-loops and sort by index
    if with_self_loops:
        edge_index, _ = remove_self_loops(edge_index)
        edge_index, _ = add_self_loops(edge_index,
                                       num_nodes=num_nodes)  # [2, E + N]
    edge_index, _ = sort_edge_index(edge_index, num_nodes=num_nodes)

    agree_dist_list = []
    agree_dist_sum_list = []
    for node_idx, label in enumerate(tqdm(y)):
        neighbors, _ = edge_index[:, edge_index[1] == node_idx]
        y_neighbors = y[neighbors]
        if len(label.size()) == 0:
            agree_dist = (y_neighbors == label).float()
        else:  # multi-label case
            agree_dist = (y_neighbors * label).float().sum(dim=1)

        if return_agree_dist_sum:
            agree_dist_sum_list.append(agree_dist.sum().item())

        if int(agree_dist.sum()) != 0:
            agree_dist[agree_dist == 0] = epsilon  # For KLD
            agree_dist = agree_dist / agree_dist.sum()
        else:
            agree_dist[:] = 1.0
            agree_dist = agree_dist / agree_dist.sum()

        agree_dist_list.append(agree_dist)

    if not return_agree_dist_sum:
        return agree_dist_list  # [N, #neighbors]
    else:
        return agree_dist_list, agree_dist_sum_list  # [N, #neighbors], [N]
コード例 #10
0
    def __call__(self, data: Union[Data, HeteroData]):
        for store in data.edge_stores:
            if 'edge_index' not in store:
                continue

            keys, values = [], []
            for key, value in store.items():
                if key == 'edge_index':
                    continue

                if store.is_edge_attr(key):
                    keys.append(key)
                    values.append(value)

            store.edge_index, values = sort_edge_index(store.edge_index,
                                                       values,
                                                       sort_by_row=False)

            for key, value in zip(keys, values):
                store[key] = value

            store.adj_t = SparseTensor(
                row=store.edge_index[1],
                col=store.edge_index[0],
                value=None if self.attr is None or self.attr not in store else
                store[self.attr],
                sparse_sizes=store.size()[::-1],
                is_sorted=True,
                trust_data=True)

            if self.remove_edge_index:
                del store['edge_index']
                if self.attr is not None and self.attr in store:
                    del store[self.attr]

            if self.fill_cache:  # Pre-process some important attributes.
                store.adj_t.storage.rowptr()
                store.adj_t.storage.csr2csc()

        return data
コード例 #11
0
def count_motifs(data, is_direct):
    if (data.edge_index.shape[1] > 500000):
            return None
    edge_index = data.edge_index.clone()
    if (is_direct == True):
            edge_index = to_undirected(data.edge_index)
    edge_index, _ = sort_edge_index(edge_index)
    edge_index = edge_index.numpy()
    k = pd.DataFrame(edge_index.T).reset_index(drop=True)
    k.rename(columns={0: data.x.shape[0], 1: edge_index.shape[1]}, inplace=True)
    try:
        name = 'graph' + str(data.x.shape[0]) + str(data.x.shape[1]) + str(
                        data.edge_index.shape[0]) + str(data.edge_index.shape[1])
        name1 = name + '.in'
        name2 = name + '.out'
        path = './' + name + '.in'
        k.to_csv(path, sep=' ', index=False)
        os.system('./orca 4 ' + name1 + ' ' + name2)
        k = pd.read_csv('./' + name2, sep=' ', header=None)
    except:
        return None
    return k.to_numpy()
コード例 #12
0
    def forward(self, x, edge_index, train_mask, is_debug=False):

        # Step 1: Class Distribution & Entropy Regularization
        cd = F.softmax(x, dim=-1)
        EPS = 1e-15
        entropy = -(cd * torch.log(cd + EPS)).sum(dim=-1)

        # Step 2: Compute a transition matrix: transP
        transP, sum_pipj = self.compute_transP(cd, edge_index)

        # Step 3: gamma
        with torch.no_grad():
            deg = degree(edge_index[0])
            deg[deg == 0] = 1
            cont_i = sum_pipj / deg

            gamma = self.beta + (1 - self.beta) * cont_i

        # Step 4: Aggregate features
        x = F.dropout(x, p=self.dropout, training=self.training)
        H = x

        for k in range(self.K):
            x = self.propagate(edge_index, x=x, transP=transP)

        x = (1 - gamma.unsqueeze(dim=-1)) * H + gamma.unsqueeze(dim=-1) * x

        if is_debug:
            debug_tensor = []
            with torch.no_grad():
                debug_tensor.append(sort_edge_index(edge_index, transP))
                debug_tensor.append(cd)
                debug_tensor.append(sum_pipj)
                debug_tensor.append(gamma)
        else:
            debug_tensor = None

        return x, entropy, debug_tensor
コード例 #13
0
def add_direct_edge(data):
    data = data.to('cpu')
    edge_index = data.edge_index.detach().clone()
    edge_index, _ = sort_edge_index(edge_index)
    edge_index = edge_index.numpy()
    k = pd.DataFrame(edge_index.T).reset_index(drop=True)
    k.rename(columns={0: data.x.shape[0], 1: edge_index.shape[1]}, inplace=True)
    k['1'] = data.edge_weight.detach().numpy()
    #print(k)
    #return
    try:
        name = 'edge' + str(data.x.shape[0]) + str(data.x.shape[1]) + str(
                        data.edge_index.shape[0]) + str(data.edge_index.shape[1])
        name1 = name + '.in'
        name2 = name + '.out'
        path = './' + name + '.in'
        k.to_csv(path, sep=' ', index=False)
        os.system('./edge 4 ' + name1 + ' ' + name2)
        k = pd.read_csv('./' + name2, sep=' ', header=None)
    except:
        return None,None
    k = k.to_numpy()
    return k[:,0:2],k[:,2]
コード例 #14
0
ファイル: model.py プロジェクト: wjwangppt/AutoGraph2020
    def generate_pyg_data(self, data):
        # get x feature table
        x = data['fea_table'].copy()
        df = data['edge_file']
        edges = df[['src_idx', 'dst_idx', 'edge_weight']]

        # get indices first
        train_indices = data['train_indices']
        if self.config.use_valid:
            train_indices, valid_indices = train_test_split(train_indices, test_size=0.2, shuffle=False)

        try:
            if x.shape[1] == 1:        # 0-dimensional feature
                x = x.set_index(keys="node_index")
                x = feat_engineering(
                    x,
                    edges=edges,
                    num_nodes=self.metadata["n_node"].iloc[0]
                )
            else:
                x_feat = x.drop('node_index', axis=1).to_numpy()
                conf_name = self.config.filename.split("/")[-1].split(".")[0]
                is_only_one_zero = not ((x_feat != 0) & (x_feat != 1)).any()
                logger.info("use {} config".format(conf_name))
                logger.info(
                    "feature only contains zero: {}, only one and zero: {}".format((x_feat == 0).all(), is_only_one_zero))

                if conf_name in self.citation_configs:  # Judge whether it is a citation graph
            # if True:
                    if is_only_one_zero:
                        logger.info("Normalize features")
                        normal_feat = feat_row_sum_inv_normalize(x_feat)
                        normal_df = pd.DataFrame(data=normal_feat)
                        normal_df["node_index"] = x["node_index"]
                        x = normal_df

                    pre_feat = prepredict(data, train_indices=train_indices, use_valid=self.config.use_valid, use_ohe=False)
                    x = x.set_index(keys="node_index")
                    x_index = x.index.tolist()
                    lpa_preds, lpa_train_acc = lpa_predict(data, n_class=self._n_class, train_indices=train_indices, use_valid=self.config.use_valid)
                    if not np.isnan(lpa_train_acc) and lpa_train_acc > 0.8:
                        logger.info("Use LPA predicts")
                        x = pd.concat([x, pre_feat, lpa_preds], axis=1).values[x_index]
                    else:
                        x = pd.concat([x, pre_feat], axis=1).values[x_index]
                else:
                    x = x.set_index(keys="node_index")
                    x = feat_engineering(
                        x,
                        edges=edges,
                        num_nodes=self.metadata["n_node"].iloc[0]
                    )
        except Exception as e:
            logger.error(e)
            if x.shape[1] == 0:
                x = np.zeros((x.shape[0], 64), dtype=np.float)
            else:
                x = x.to_numpy()

        logger.info("x shape: {}".format(x.shape))
        node_index = torch.tensor(data['fea_table']['node_index'].to_numpy(), dtype=torch.long)
        x = torch.tensor(x, dtype=torch.float)

        # get edge_index, edge_weight
        edges = edges.to_numpy()
        edge_index = edges[:, :2].astype(np.int)
        # transpose from [edge_num, 2] to [2, edge_num] which is required by PyG
        edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1)
        edge_weight = edges[:, 2]
        edge_weight = torch.tensor(edge_weight, dtype=torch.float32)

        undirected = gtils.is_undirected(edge_index)

        edge_index, edge_weight = gtils.sort_edge_index(edge_index, edge_weight)
        logger.info(f"is undirected ? {undirected}")
        logger.info(f"edge index {edge_index.shape}, edge weight {edge_weight.shape}")

        # get train/test mask
        num_nodes = x.size(0)
        self._num_nodes = num_nodes
        y = torch.zeros(num_nodes, dtype=torch.long)
        inds = data['train_label'][['node_index']].to_numpy()
        train_y = data['train_label'][['label']].to_numpy()
        self.y_train = train_y
        y[inds] = torch.tensor(train_y, dtype=torch.long)

        # train_indices = data['train_indices']
        self._origin_graph_data_indices = copy.deepcopy(data['train_indices'])
        if self.config.use_valid:
            # train_indices, valid_indices = train_test_split(train_indices, test_size=0.2)
            # train_indices, valid_indices = train_test_split(train_indices, test_size=0.2, shuffle=False)
            self.y_train = data['train_label'].set_index('node_index').loc[train_indices][['label']].to_numpy()
        test_indices = data['test_indices']

        data = Data(x=x, node_index=node_index, edge_index=edge_index, y=y, edge_weight=edge_weight)

        data.num_nodes = num_nodes

        train_mask = torch.zeros(num_nodes, dtype=torch.bool)
        train_mask[train_indices] = 1
        data.train_indices = np.asarray(train_indices)
        data.train_mask = train_mask
        self._train_indices = np.asarray(train_indices)
        self._train_mask = train_mask

        if self.config.use_valid:
            valid_mask = torch.zeros(num_nodes, dtype=torch.bool)
            valid_mask[valid_indices] = 1
            data.valid_indices = valid_indices
            data.valid_mask = valid_mask
            self._valid_indices = valid_indices
            self._valid_mask = valid_mask

        self._test_mask = np.zeros(num_nodes, dtype=np.bool)
        self._test_mask[test_indices] = True
        test_mask = torch.zeros(num_nodes, dtype=torch.bool)
        test_mask[test_indices] = 1
        data.test_mask = test_mask
        data.test_indices = np.asarray(test_indices)

        self._sampler = Sampler(data, self.metadata["n_edge"].iloc[0], self.device)

        return data
コード例 #15
0
ファイル: CustomDataSet.py プロジェクト: gyhandy/Graph_AE
    def process(self):
        data_list = []

        with open(self.root + '/objects.json', 'r') as f:
            objects = f.read()
        with open(self.root + '/relationships.json', 'r') as f:
            links = f.read()

        object_entry = json.loads(objects)
        link_entry = json.loads(links)
        word_dict = dict()

        count = 0
        num_samples = 5000
        feature_size = 500

        for i in tqdm(range(num_samples)):
            objs = object_entry[i]["objects"]
            for obj in objs:
                name = obj["names"][0]
                if name not in word_dict.keys():
                    word_dict[name] = count
                    count += 1

        embeds = nn.Embedding(len(word_dict), feature_size)
        eb = embeds(Variable(torch.arange(0, len(word_dict)).long()))
        for i in tqdm(range(num_samples)):
            objs = object_entry[i]["objects"]
            id_dict = dict()
            node_list = []
            idx = 0
            if len(objs) == 0:
                continue
            for obj in objs:
                name = obj["names"][0]
                node_list.append(eb[word_dict[name]])
                id_dict[obj["object_id"]] = idx
                for j in obj["merged_object_ids"]:
                    id_dict[j] = idx
                idx += 1
            x = torch.stack(node_list)
            print()
            print(x.shape)

            from_list = []
            to_list = []
            links = link_entry[i]["relationships"]
            for link in links:
                v = link["object"]["object_id"]
                u = link["subject"]["object_id"]
                if v in id_dict.keys() and u in id_dict.keys():
                    from_list.append(id_dict[v])
                    to_list.append(id_dict[u])
            edge_index = torch.tensor([from_list, to_list], dtype=torch.long)
            edge_index, _ = sort_edge_index(edge_index, None, x.shape[0])
            if len(from_list) > 0:
                edge_index, _ = coalesce(edge_index, None, x.shape[0], x.shape[0])
                print(edge_index.shape)
                print()
                data = Data(x=x, edge_index=edge_index)
                data_list.append(data)

        print(len(word_dict))
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])