def setup_dataset(self): """ Creating train and test split. """ self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"], test_size=self.args.test_size) self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"], test_size=self.args.test_size) self.ecount = len(self.positive_edges + self.negative_edges) self.X = setup_features(self.args, self.positive_edges, self.negative_edges, self.edges["ncount"]) self.positive_edges = torch.from_numpy(np.array(self.positive_edges, dtype=np.int64).T).type(torch.long).to(self.device) self.negative_edges = torch.from_numpy(np.array(self.negative_edges, dtype=np.int64).T).type(torch.long).to(self.device) self.y = np.array([0 if i < int(self.ecount/2) else 1 for i in range(self.ecount)]+[2]*(self.ecount*2)) self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device) self.X = torch.from_numpy(self.X).float().to(self.device)
def setup_dataset(self): """ Creating train and test split. """ # self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"], test_size = self.args.test_size) # self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"], test_size = self.args.test_size) self.positive_edges = self.edges["positive_edges"] self.negative_edges = self.edges["negative_edges"] self.ecount = len(self.positive_edges + self.negative_edges) node_count = self.node_count self.X = setup_features(self.args, self.positive_edges, self.negative_edges, node_count) self.positive_edges = torch.from_numpy( np.array(self.positive_edges, dtype=np.int64).T).type(torch.long).to(self.device) self.negative_edges = torch.from_numpy( np.array(self.negative_edges, dtype=np.int64).T).type(torch.long).to(self.device) # self.y = np.array([0 if i< int(self.ecount/2) else 1 for i in range(self.ecount)] +[2]*(self.ecount*2)) # self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device) self.y = np.array( [1 if label == -1 else 0 for label in self.node_labels]) self.y = torch.from_numpy(self.y).type(torch.LongTensor).to( self.device) if self.args.subgraph_training == True: self.X = self.X[ self.nodes_dict['subgraph_map_from_original_feature']] self.X = torch.from_numpy(self.X).float().to(self.device)
def setup_dataset(self): """ Creating train and test split. """ self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"], # postive的点边关系 test_size=self.args.test_size) self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"], test_size=self.args.test_size) self.ecount = len(self.positive_edges + self.negative_edges) # 训练样本数量 # SVD分解,邻接矩阵,为节点特征; self.X = setup_features(self.args, self.positive_edges, # 训练样本 self.negative_edges, self.edges["ncount"]) # 点的数量 # positive样本的点边关系转换成torch类型 self.positive_edges = torch.from_numpy(np.array(self.positive_edges, dtype=np.int64).T).type(torch.long).to(self.device) # negative样本的点边关系 self.negative_edges = torch.from_numpy(np.array(self.negative_edges, dtype=np.int64).T).type(torch.long).to(self.device) # label: [0],[1] 两个部分; [2]是两倍的点边数量-表示不连接的边; !! np.array([0]*len(self.positive_edges) + [1]*len(self.negative_edges) + [2]*(self.ecount*2)) self.y = np.array([0]*len(self.positive_edges[0]) + [1]*len(self.negative_edges[0]) + [2]*(self.ecount*2)) #np.array([0 if i < int(self.ecount/2) else 1 for i in range(self.ecount)]+[2]*(self.ecount*2)) self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device) self.X = torch.from_numpy(self.X).float().to(self.device) # [5881,64]的input features
def setup_dataset(self): """ Creating train and test split. """ # self.positive_edges, self.test_positive_edges = train_test_split(self.edges["positive_edges"], test_size = self.args.test_size) # self.negative_edges, self.test_negative_edges = train_test_split(self.edges["negative_edges"], test_size = self.args.test_size) self.positive_edges = self.edges["positive_edges"] self.negative_edges = self.edges["negative_edges"] self.ecount = len(self.positive_edges + self.negative_edges) node_count = self.node_count # node_countを補正 self.X = setup_features(self.args, self.positive_edges, self.negative_edges, node_count) self.positive_edges = torch.from_numpy( np.array(self.positive_edges, dtype=np.int64).T).type(torch.long).to(self.device) self.negative_edges = torch.from_numpy( np.array(self.negative_edges, dtype=np.int64).T).type(torch.long).to(self.device) # self.y = np.array([0 if i< int(self.ecount/2) else 1 for i in range(self.ecount)] +[2]*(self.ecount*2)) # self.y = torch.from_numpy(self.y).type(torch.LongTensor).to(self.device) # self.y : ノードのラベル Fraudが正例, Benignが負例 self.y = np.array( [1 if label == -1 else 0 for label in self.node_labels]) self.y = torch.from_numpy(self.y).type(torch.LongTensor).to( self.device) self.X = torch.from_numpy(self.X).float().to(self.device)
def setup_dataset(self): """ Creating train and test split. """ def train_test_label_split(la_data): """ Filter according to the label """ tr_pos = la_data[la_data['dataset_type'].isin(['train'])] te_pos = la_data[la_data['dataset_type'].isin(['test', 'valid'])] del tr_pos['dataset_type'] del te_pos['dataset_type'] return tr_pos, te_pos self.positive_edges, self.test_positive_edges = self.edges[ "positive_edges"]['train'], self.edges["positive_edges"]['test'] self.negative_edges, self.test_negative_edges = self.edges[ "negative_edges"]['train'], self.edges["negative_edges"]['test'] self.ecount = len(self.positive_edges + self.negative_edges) self.X = setup_features(self.args, self.positive_edges, self.negative_edges, self.edges["ncount"]) self.positive_edges = torch.from_numpy( np.array(self.positive_edges, dtype=np.int64).T).type(torch.long).to(self.device) self.negative_edges = torch.from_numpy( np.array(self.negative_edges, dtype=np.int64).T).type(torch.long).to(self.device) self.y = np.array([0] * (self.positive_edges.shape[1]) + [1] * (self.negative_edges.shape[1]) + [2] * 2 * ((self.positive_edges.shape[1]) + (self.negative_edges.shape[1]))) self.y = torch.from_numpy(self.y).type(torch.LongTensor).to( self.device) self.X = torch.from_numpy(self.X).float().to(self.device)