def main(): """ Parsing command line parameters, reading data, graph decomposition, fitting a ClusterGCN and scoring the model. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) graph = graph_reader(args.edge_path) adj = adj_reader(graph) features = feature_reader(args.features_path) target = target_reader(args.target_path) clustering_machine = ClusteringMachine(args, graph, features, target, adj) clustering_machine.decompose() gcn_trainer = ClusterGCNTrainer(args, clustering_machine) gcn_trainer.train() gcn_trainer.test()
def main(): """ Parsing command line parameters, reading data, doing sparsification, fitting a GWNN and saving the logs. """ args = parameter_parser() tab_printer(args) graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) target = target_reader(args.target_path) sparsifier = WaveletSparsifier(graph, args.scale, args.approximation_order, args.tolerance) sparsifier.calculate_all_wavelets() trainer = GWNNTrainer(args, sparsifier, features, target) trainer.fit() trainer.score() save_logs(args, trainer.logs)
def main(): """ Parsing command line parameters, reading data, fitting an NGCN and scoring the model. """ args = parameter_parser() torch.manual_seed(args.seed) tab_printer(args) graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) target = target_reader(args.target_path) trainer = Trainer(args, graph, features, target, True) trainer.fit() if args.model == "mixhop": trainer.evaluate_architecture() args = trainer.reset_architecture() trainer = Trainer(args, graph, features, target, False) trainer.fit()
def load_data(self): print('loading data...') self.features, self.labels, self.idx_train, self.idx_val, self.idx_test \ = feature_reader(dataset=self.dataset, scale=self.args.scale, train_ratio=self.args.train_ratio, feature_size=self.args.feature_size) # print('feature_size', self.features.shape) self.n_features = self.features.shape[1] self.n_classes = self.labels.max().item() + 1 self.edges = graph_reader(dataset=self.dataset) # transform graph to nxnetwork self.G = nx.Graph() self.G.add_edges_from(self.edges) self.n = self.G.number_of_nodes() self.E = self.G.number_of_edges() # total edges print('dataset load finish') print('number of nodes:', self.n) print('number of edges:', self.E)
def load_data(self): self.features, self.labels, self.idx_train, self.idx_val, self.idx_test \ = feature_reader(dataset=self.dataset, scale=self.args.scale, train_ratio=self.args.train_ratio, feature_size=self.args.feature_size) # print('feature_size', self.features.shape) self.n_nodes = len(self.labels) self.n_features = self.features.shape[1] self.n_classes = self.labels.max().item() + 1 self.edges = graph_reader(dataset=self.dataset) self.adj = self.build_adj_mat() # self.calculate_connectivity() if torch.cuda.is_available(): self.features = self.features.cuda() self.adj = self.adj.cuda() self.labels = self.labels.cuda() if hasattr(self, 'prj'): self.prj = self.prj.cuda()
def load_data(self): if self.dataset in ( 'reddit', 'flickr', 'ppi', 'ppi-large', 'cora', 'citeseer', 'pubmed' ): self.features, self.features_train, self.labels, self.idx_train, self.idx_val, self.idx_test \ = feature_reader(dataset=self.dataset, scale=self.args.scale, train_ratio=self.args.train_ratio, feature_size=self.args.feature_size) if torch.cuda.is_available(): self.features = self.features.cuda() self.features_train = self.features_train.cuda() self.labels = self.labels.cuda() self.n_nodes = len(self.labels) self.n_features = self.features.shape[1] self.multi_label = self.labels.shape[1] if self.multi_label == 1: self.n_classes = self.labels.max().item() + 1 else: self.n_classes = self.multi_label elif self.dataset.startswith( 'twitch-train' ): p = self.dataset.find('/') self.features, self.labels = feature_reader(dataset=f'twitch/{self.dataset[p+1:]}') self.n_nodes = len(self.labels) self.n_nodes_1 = int(0.8 * self.n_nodes) self.n_nodes_2 = self.n_nodes - self.n_nodes_1 self.idx_train = np.random.choice(self.n_nodes, self.n_nodes_1, replace=False) self.idx_val = np.asarray( list( set(range(self.n_nodes)) - set(range(self.n_nodes_1)) ) ) self.features_train = self.features[self.idx_train] scaler = StandardScaler() scaler.fit(self.features_train) self.features = scaler.transform(self.features) self.features = torch.FloatTensor(self.features) self.features_train = self.features[self.idx_train] if torch.cuda.is_available(): self.features = self.features.cuda() self.features_train = self.features_train.cuda() self.labels = self.labels.cuda() self.n_features = 3170 self.multi_label = 1 self.n_classes = 2 elif self.dataset.startswith( 'twitch' ): p_0 = self.dataset.find('/') data_folder = self.dataset[:p_0] p = self.dataset.rfind('/')+1 self.dataset1 = self.dataset[:p-1] self.dataset2 = f'{data_folder}/{self.dataset[p:]}' self.features_1, self.labels_1 = feature_reader(dataset=self.dataset1) self.features_2, self.labels_2 = feature_reader(dataset=self.dataset2) scaler = StandardScaler() scaler.fit(self.features_1) self.features_1 = torch.FloatTensor(scaler.transform(self.features_1)) self.features_2 = torch.FloatTensor(scaler.transform(self.features_2)) if torch.cuda.is_available(): self.features_1 = self.features_1.cuda() self.features_2 = self.features_2.cuda() self.labels_1 = self.labels_1.cuda() self.labels_2 = self.labels_2.cuda() self.n_nodes_1 = len(self.labels_1) self.n_nodes_2 = len(self.labels_2) self.n_features = 3170 self.multi_label = 1 self.n_classes = 2 elif self.dataset.startswith( 'deezer' ): p_0 = self.dataset.find('/') data_folder = self.dataset[:p_0] p = self.dataset.rfind('/')+1 self.dataset1 = self.dataset[:p-1] self.dataset2 = f'{data_folder}/{self.dataset[p:]}' self.labels_1 = feature_reader(dataset=self.dataset1) self.labels_2 = feature_reader(dataset=self.dataset2) if torch.cuda.is_available(): self.labels_1 = self.labels_1.cuda() self.labels_2 = self.labels_2.cuda() self.n_nodes_1 = len(self.labels_1) self.n_nodes_2 = len(self.labels_2) self.n_classes = self.multi_label = 84 else: raise NotImplementedError(f'dataset = {self.dataset} not implemented!') print(f'loading {self.dataset} features done!') # print('feature_size', self.features.shape) # print('====================================') # print('|| n_nodes =', self.n_nodes) # print('|| n_features =', self.n_features) # print('|| n_classes =', self.n_classes, '(', self.multi_label, ')') # print('====================================') if self.args.mode in ( 'mlp', 'lr' ): return if self.dataset in ( 'reddit', 'flickr', 'ppi', 'ppi-large', 'cora', 'citeseer', 'pubmed' ): self.adj_full = graph_reader(args=self.args, dataset=self.dataset, n_nodes=self.n_nodes) # construct training data if self.dataset in ( 'cora', 'citeseer', 'pubmed' ): self.adj_train = sp.csr_matrix.copy(self.adj_full) self.adj_ori = sp.csr_matrix.copy(self.adj_full) else: self.adj_train = self.adj_full[self.idx_train, :][:, self.idx_train] self.adj_ori = sp.csr_matrix.copy(self.adj_full) elif self.dataset.startswith( 'twitch-train' ): p = self.dataset.find('/') self.adj_full = graph_reader(args=self.args, dataset=f'twitch/{self.dataset[p+1:]}', n_nodes=self.n_nodes) self.adj_train = self.adj_full[self.idx_train, :][:, self.idx_train] self.adj_ori = sp.csr_matrix.copy(self.adj_full) elif self.dataset.startswith( 'twitch' ): self.adj_1 = graph_reader(args=self.args, dataset=self.dataset1, n_nodes=self.n_nodes_1) self.adj_2 = graph_reader(args=self.args, dataset=self.dataset2, n_nodes=self.n_nodes_2) self.adj_ori = sp.csr_matrix.copy(self.adj_2) elif self.dataset.startswith( 'deezer' ): self.adj_1, self.features_1 = graph_reader(args=self.args, dataset=self.dataset1, n_nodes=self.n_nodes_1) self.adj_2, self.features_2 = graph_reader(args=self.args, dataset=self.dataset2, n_nodes=self.n_nodes_2) self.adj_ori = sp.csr_matrix.copy(self.adj_2) self.n_features = self.features_1.shape[-1] if torch.cuda.is_available(): self.features_1 = self.features_1.cuda() self.features_2 = self.features_2.cuda() else: self.edges = graph_reader(args=self.args, dataset=self.dataset) # self.construct_hop_dict() # self.exist_edges = random.sample(self.edges.tolist(), self.n_test) # self.nonexist_edges = random.sample(self.one_hop_edges, self.n_test) # self.nonexist_edges = random.sample(self.two_hop_edges, self.n_test) # self.nonexist_edges = random.sample(self.two_hop_edges+self.one_hop_edges, self.n_test) # self.nonexist_edges = [] # cnt_nonexist = 0 # while 1: # u = np.random.choice(self.n_nodes) # v = np.random.choice(self.n_nodes) # if u != v and v not in self.edge_dict[u]: # self.nonexist_edges.append((u, v)) # cnt_nonexist += 1 # if cnt_nonexist == self.n_test: break # self.labeler = Labeler(self.features, self.labels, self.n_classes, # self.idx_train, self.idx_val, self.idx_test) self.prepare_data()
"""Model runner.""" import os from asne import ASNE from utils import graph_reader, feature_reader, parse_args, tab_printer os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" def run_asne(args, graph, features): """ Fitting an ASNE model and saving the embedding. :param args: Arguments object. :param graph: NetworkX graph. :param features: Features in a dictionary. """ tab_printer(args) model = ASNE(args, graph, features) model.train() model.save_embedding() if __name__ == "__main__": args = parse_args() graph = graph_reader(args.edge_path) features = feature_reader(args.features_path) run_asne(args, graph, features)