Beispiel #1
0
    def setup_dataset(self):
        """
        Creating train and test split.
        """
        self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"],
                                                                         test_size=self.args.test_size)

        self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"],
                                                                         test_size=self.args.test_size)
        self.ecount = len(self.positive_edges + self.negative_edges)

        self.X = setup_features(self.args,
                                self.positive_edges,
                                self.negative_edges,
                                self.edges["ncount"])

        self.positive_edges = torch.from_numpy(np.array(self.positive_edges,
                                                        dtype=np.int64).T).type(torch.long).to(self.device)

        self.negative_edges = torch.from_numpy(np.array(self.negative_edges,
                                                        dtype=np.int64).T).type(torch.long).to(self.device)

        self.y = np.array([0 if i < int(self.ecount/2) else 1 for i in range(self.ecount)]+[2]*(self.ecount*2))
        self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device)
        self.X = torch.from_numpy(self.X).float().to(self.device)
Beispiel #2
0
    def setup_dataset(self):
        """
        Creating train and test split.
        """
        # self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"], test_size = self.args.test_size)
        # self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"], test_size = self.args.test_size)
        self.positive_edges = self.edges["positive_edges"]
        self.negative_edges = self.edges["negative_edges"]

        self.ecount = len(self.positive_edges + self.negative_edges)
        node_count = self.node_count
        self.X = setup_features(self.args, self.positive_edges,
                                self.negative_edges, node_count)
        self.positive_edges = torch.from_numpy(
            np.array(self.positive_edges,
                     dtype=np.int64).T).type(torch.long).to(self.device)
        self.negative_edges = torch.from_numpy(
            np.array(self.negative_edges,
                     dtype=np.int64).T).type(torch.long).to(self.device)
        # self.y = np.array([0 if i< int(self.ecount/2) else 1 for i in range(self.ecount)] +[2]*(self.ecount*2))
        # self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device)

        self.y = np.array(
            [1 if label == -1 else 0 for label in self.node_labels])
        self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(
            self.device)
        if self.args.subgraph_training == True:
            self.X = self.X[
                self.nodes_dict['subgraph_map_from_original_feature']]

        self.X = torch.from_numpy(self.X).float().to(self.device)
Beispiel #3
0
    def setup_dataset(self):
        """
        Creating train and test split.
        """
        self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"],  # postive的点边关系
                                                                         test_size=self.args.test_size)

        self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"],
                                                                         test_size=self.args.test_size)
        self.ecount = len(self.positive_edges + self.negative_edges)  # 训练样本数量
        # SVD分解,邻接矩阵,为节点特征;
        self.X = setup_features(self.args,
                                self.positive_edges,  # 训练样本
                                self.negative_edges,
                                self.edges["ncount"])  # 点的数量
        # positive样本的点边关系转换成torch类型
        self.positive_edges = torch.from_numpy(np.array(self.positive_edges,
                                                        dtype=np.int64).T).type(torch.long).to(self.device)
        # negative样本的点边关系
        self.negative_edges = torch.from_numpy(np.array(self.negative_edges,
                                                        dtype=np.int64).T).type(torch.long).to(self.device)
        # label: [0],[1] 两个部分; [2]是两倍的点边数量-表示不连接的边; !!  np.array([0]*len(self.positive_edges) + [1]*len(self.negative_edges) + [2]*(self.ecount*2))
        self.y = np.array([0]*len(self.positive_edges[0]) + [1]*len(self.negative_edges[0]) + [2]*(self.ecount*2))  #np.array([0 if i < int(self.ecount/2) else 1 for i in range(self.ecount)]+[2]*(self.ecount*2))
        self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device)
        self.X = torch.from_numpy(self.X).float().to(self.device)  # [5881,64]的input features
Beispiel #4
0
    def setup_dataset(self):
        """
        Creating train and test split.
        """
        # self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"], test_size = self.args.test_size)
        # self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"], test_size = self.args.test_size)
        self.positive_edges = self.edges["positive_edges"]
        self.negative_edges = self.edges["negative_edges"]

        self.ecount = len(self.positive_edges + self.negative_edges)
        node_count = self.node_count  # node_countを補正
        self.X = setup_features(self.args, self.positive_edges,
                                self.negative_edges, node_count)
        self.positive_edges = torch.from_numpy(
            np.array(self.positive_edges,
                     dtype=np.int64).T).type(torch.long).to(self.device)
        self.negative_edges = torch.from_numpy(
            np.array(self.negative_edges,
                     dtype=np.int64).T).type(torch.long).to(self.device)
        # self.y = np.array([0 if i< int(self.ecount/2) else 1 for i in range(self.ecount)] +[2]*(self.ecount*2))
        # self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device)
        # self.y : ノードのラベル Fraudが正例, Benignが負例
        self.y = np.array(
            [1 if label == -1 else 0 for label in self.node_labels])
        self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(
            self.device)

        self.X = torch.from_numpy(self.X).float().to(self.device)
Beispiel #5
0
    def setup_dataset(self):
        """
        Creating train and test split.
        """
        def train_test_label_split(la_data):
            """
            Filter according to the label
            """
            tr_pos = la_data[la_data['dataset_type'].isin(['train'])]
            te_pos = la_data[la_data['dataset_type'].isin(['test', 'valid'])]
            del tr_pos['dataset_type']
            del te_pos['dataset_type']

            return tr_pos, te_pos

        self.positive_edges, self.test_positive_edges = self.edges[
            "positive_edges"]['train'], self.edges["positive_edges"]['test']

        self.negative_edges, self.test_negative_edges = self.edges[
            "negative_edges"]['train'], self.edges["negative_edges"]['test']

        self.ecount = len(self.positive_edges + self.negative_edges)

        self.X = setup_features(self.args, self.positive_edges,
                                self.negative_edges, self.edges["ncount"])

        self.positive_edges = torch.from_numpy(
            np.array(self.positive_edges,
                     dtype=np.int64).T).type(torch.long).to(self.device)

        self.negative_edges = torch.from_numpy(
            np.array(self.negative_edges,
                     dtype=np.int64).T).type(torch.long).to(self.device)

        self.y = np.array([0] * (self.positive_edges.shape[1]) + [1] *
                          (self.negative_edges.shape[1]) + [2] * 2 *
                          ((self.positive_edges.shape[1]) +
                           (self.negative_edges.shape[1])))
        self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(
            self.device)
        self.X = torch.from_numpy(self.X).float().to(self.device)