def load_data(args): dataset = args.input path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ['cora', 'citeseer', 'pubmed']: dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] return data, num_features, num_classes elif dataset == 'corafull': dataset = CoraFull(path) elif dataset in ['cs', 'physics']: dataset = Coauthor(path, name=dataset) elif dataset in ['computers', 'photo']: dataset = Amazon(path, name=dataset) elif dataset == 'reddit': dataset = Reddit(path) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] return data, num_features, num_classes num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.train_mask, data.val_mask, data.test_mask = generate_split( data, num_classes) return data, num_features, num_classes
def _load_data_small_graphs(self): # m here complete, fill the model runner from reg notmnt, and configurations and loss data_transform = None print("loading data") self._data_path = './DataSets/{}'.format(dataSetName) if dataSetName == "CoraFull": self._data_set = CoraFull(self._data_path) elif dataSetName in {"CS", "Physics"}: self._data_set = Coauthor(self._data_path, dataSetName) else: self._data_set = Planetoid(self._data_path, dataSetName, data_transform) self._data_set.data.to(self._device) self._data = self._data_set[0] self._labels = self._data.y self._num_classes = self._data_set.num_classes self._g = self.create_graph() if BOW: self._X = self._data.x self.in_features = self._data.num_features else: #2k vectors input self._X = None self.in_features = num_classes * 2 self._num_classes = num_classes
def load_dataset(dataset): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) if dataset in ['cora', 'citeseer', 'pubmed']: dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.adj = torch.zeros((data.x.size(0), data.x.size(0))) col, row = data.edge_index data.adj[col, row] = 1 return data, num_features, num_classes elif dataset == 'reddit': dataset = Reddit(path) elif dataset == 'corafull': dataset = CoraFull(path) num_features = dataset.num_features num_classes = dataset.num_classes data = dataset[0] data.train_mask, data.val_mask, data.test_mask = generate_split( data, num_classes) data.adj = torch.zeros((data.x.size[0], data.x.size(0))) col, row = data.edge_index data.adj[col, row] = 1 return data, num_features, num_classes
def load_dataset(name): if name in ["Cora", "CiteSeer", "PubMed"]: dataset = Planetoid(root='./data/'+name, name=name) elif name == "CoraFull": dataset = CoraFull(root='./data/'+name) elif name in ["Computers", "Photo"]: dataset = Amazon(root='./data/'+name, name=name) elif name in ["CS", "Physics"]: dataset = Coauthor(root='./data/'+name, name=name) else: exit("wrong dataset") return dataset
def load_data(self): data_name = self._params['data_name'] if self._params['net'] in {'combined', 'symmetric', 'asymmetric', 'combined_gcn'}: self._data_path = './data/{}'.format(data_name) gnx = nx.read_gpickle("./data/{}/gnx.pkl".format(data_name)) bow = pickle.load(open("./data/{}/content.pkl".format(data_name), "rb")) nodes = sorted(gnx.nodes) dict = {x: i for i, x in enumerate(nodes)} x = torch.Tensor(np.vstack([bow[node] for node in nodes])).to(self._device) y = torch.LongTensor([gnx.nodes[node]['label'] for node in nodes]).to(self._device) edges = torch.LongTensor(np.vstack([[dict[x[0]] for x in gnx.edges], [dict[x[1]] for x in gnx.edges]])).to(self._device) self._data = Data(x=x, edge_index=edges, y=y) self._num_features = x.shape[1] self._num_classes = len(gnx.graph['node_labels']) # Adjacency matrices adj = nx.adjacency_matrix(gnx, nodelist=nodes).astype(np.float32) if self._params['net'] == 'symmetric': self._adj = handle_matrix_symmetric(adj) self._adj = sparse_mx_to_torch_sparse_tensor(self._adj).to_dense().to(self._device) else: self._adj = handle_matrix_concat(adj, should_normalize=True) self._adj = sparse_mx_to_torch_sparse_tensor(self._adj).to_dense().to(self._device) return self._data data_transform = T.NormalizeFeatures() if self._params['norm'] == True else None self._data_path = './DataSets/{}'.format(data_name) if data_name == "CoraFull": self._data_set = CoraFull(self._data_path) elif data_name in {"CS", "Physics"}: self._data_set = Coauthor(self._data_path, data_name) else: self._data_set = Planetoid(self._data_path, data_name, data_transform) self._data_set.data.to(self._device) self._data = self._data_set[0] # self._data = self._data_set.data self._num_features = self._data_set.num_features self._num_classes = self._data_set.num_classes return self._data
def load_pyg_dataset(dataset_name, root='dataset/'): from ogb.nodeproppred import PygNodePropPredDataset, Evaluator source, name = dataset_name.split('-', maxsplit=1) assert source in ['ogbn', 'pyg', 'custom'] if source == 'ogbn': dataset = PygNodePropPredDataset(name=dataset_name, root=root) return dataset, dataset.get_idx_split(), Evaluator(dataset_name) elif source == 'pyg': from torch_geometric.datasets import KarateClub, CoraFull if name == "karate": dataset = KarateClub() elif name == "cora": dataset = CoraFull(root) else: raise Exception("Dataset not recognized") num_nodes = dataset[0].x.shape[0] num_train = int(num_nodes * 0.8) num_val = int(num_nodes * 0.1) perm = np.arange(num_nodes, dtype=int) np.random.shuffle(perm) split_idx = { 'train': perm[:num_train], 'valid': perm[num_train:num_train + num_val], 'test': perm[num_train + num_val:] } return dataset, split_idx, Evaluator('ogbn-arxiv') elif source == "custom": from dataset import registry dataset = registry[name]() split_idx = { 'train': dataset[0].idx_train, 'valid': dataset[0].idx_val, 'test': dataset[0].idx_test } return dataset, split_idx, CustomEvaluator() else: raise Exception("Dataset not recognized")
def load_data(dataset_name): """ Loads required data set and normalizes features. Implemented data sets are any of type Planetoid and Reddit. :param dataset_name: Name of data set :return: Tuple of dataset and extracted graph """ path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset_name) if dataset_name == 'cora_full': dataset = CoraFull(path, T.NormalizeFeatures()) elif dataset_name.lower() == 'coauthor': dataset = Coauthor(path, 'Physics', T.NormalizeFeatures()) elif dataset_name.lower() == 'reddit': dataset = Reddit(path, T.NormalizeFeatures()) elif dataset_name.lower() == 'amazon': dataset = Amazon(path) else: dataset = Planetoid(path, dataset_name, T.NormalizeFeatures()) print(f"Loading data set {dataset_name} from: ", path) data = dataset[0] # Extract graph return dataset, data
'dataset': [], 'run': [], 'n_epochs': [], 'method': [], 'accuracy': [], 'mean': [], 'std_dev': [] } for dataset_name in ['PubMed', 'Cora']: # CoraFull omitted for this run # define data #dataset_name = 'Cora' # 'PubMed', 'CoraFull' path = osp.join(os.getcwd(), '..', 'data', dataset_name) if (dataset_name == 'CoraFull'): dataset = CoraFull(path, T.NormalizeFeatures()) else: dataset = Planetoid(path, dataset_name, T.NormalizeFeatures()) data = dataset[0] data.batch = None data.adj = to_dense_adj(data.edge_index) # data training_fraction = 0.05 if dataset_name == 'CoraFull': data.test_mask = torch.empty(size=torch.Size([data.x.shape[0]]), dtype=torch.bool) data.val_mask = torch.empty(size=torch.Size([data.x.shape[0]]), dtype=torch.bool) data.train_mask = torch.empty(size=torch.Size([data.x.shape[0]]),
pr = parser.parse_args() label_ids = defaultdict(list) if pr.net == 1: print("Data Cora") _data = Planetoid(root="./pcora", name="Cora") elif pr.net == 2: print("Data CiteSeer") _data = Planetoid(root="./pciteseer", name="Citeseer") elif pr.net == 3: print("Data Pubmed") _data = Planetoid(root="./ppubmed", name="Pubmed") elif pr.net == 4: print("Data CoraFull") _data = CoraFull("./Corafull") elif pr.net == 5: print("Data Coauthor CS") _data = Coauthor("./CS", "CS") elif pr.net == 6: print("Data Coauthor Physics") _data = Coauthor("./Physics", "Physics") elif pr.net == 7: print("Data Amazon Computer") _data = Amazon("./Computer", "Computers") elif pr.net == 8: print("Data Amazon Photos") _data = Amazon("./Photo", "Photo") #_data = Coauthor("./Physics","Physics") #_data = Coauthor("./CS","CS")
from torch_geometric.datasets import Planetoid, CoraFull for dataset_name in ['Cora', 'PubMed', 'CoraFull']: print(dataset_name) if dataset_name == 'CoraFull': dataset = CoraFull(root='/tmp/CoraFull') elif dataset_name == 'PubMed': dataset = Planetoid(root='/tmp/PubMed', name=dataset_name) else: dataset = Planetoid(root='/tmp/Cora', name=dataset_name) print("num classes=", dataset.num_classes) data = dataset[0] print("num nodes=", data.num_nodes) print("num edges=", data.num_edges / 2) print("num features=", dataset.num_node_features)
def TrainingNet(dataset, modelName, params, num_pre_epochs, num_epochs, NumCutoff, optimizerName, MonteSize, savepath): Batch_size = int(params[0]) root = '/git/data/GraphData/' + dataset TestAccs = [] for Monte_iter in range(MonteSize): # Data start_epoch = 0 # start from epoch 0 or last checkpoint epoch NewNetworkSizeAdjust = [] WeightsDynamicsEvolution = [] # model if dataset == 'Cora' or dataset == 'Citeseer' or dataset == 'Pubmed': datasetroot = Planetoid(root=root, name=dataset, transform=T.NormalizeFeatures()).shuffle() trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) [net, model_to_save] = ModelAndSave(dataset, modelName, datasetroot, params, num_epochs) criterion = nn.CrossEntropyLoss() elif dataset == "CoraFull": datasetroot = CoraFull(root=root, transform=T.NormalizeFeatures()).shuffle() trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) [net, model_to_save] = ModelAndSave(dataset, modelName, datasetroot, params, num_epochs) elif dataset == "Amazon": datasetroot = Amazon(root, "Photo", transform=None, pre_transform=None) trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) [net, model_to_save] = ModelAndSave(dataset, modelName, datasetroot, params, num_epochs) elif dataset == 'ENZYMES' or dataset == 'MUTAG': datasetroot = TUDataset(root, name=dataset, use_node_attr=True) Num = len(datasetroot) // 10 global train_dataset, test_dataset train_dataset = datasetroot[:Num] test_dataset = datasetroot[Num:] trainloader = DataLoader(train_dataset, batch_size=Batch_size) testloader = DataLoader(test_dataset, batch_size=60) [net, model_to_save] = ModelAndSave(dataset, modelName, datasetroot, params, num_epochs) elif dataset == "PPI": train_dataset = PPI(root, split='train') test_dataset = PPI(root, split='test') trainloader = DataLoader(train_dataset, batch_size=Batch_size, shuffle=True) testloader = DataLoader(test_dataset, batch_size=1, shuffle=False) [net, model_to_save] = ModelAndSave(dataset, modelName, train_dataset, params, num_epochs) criterion = torch.nn.BCEWithLogitsLoss() elif dataset == "Reddit": datasetroot = Reddit(root) trainloader = DataListLoader(datasetroot, batch_size=1, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=2, shuffle=False) [net, model_to_save] = ModelAndSave(dataset, modelName, datasetroot, params, num_epochs) criterion = torch.nn.BCEWithLogitsLoss() elif dataset == 'MNIST': datasetroot = MNISTSuperpixels(root=root, transform=T.Cartesian()) trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) [net, model_to_save] = ModelAndSave(dataset, modelName, datasetroot, params, num_epochs) elif dataset == 'CIFAR10': pass else: raise Exception("Input wrong datatset!!") FileName = "{}-{}-param_{}_{}_{}_{}-monte_{}".format( dataset, modelName, params[0], params[1], params[2], params[3], Monte_iter) print('Let\'s use', torch.cuda.device_count(), 'GPUs!') global device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') optimizer = optim.Adam(net.parameters(), lr=params[3], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) criterion = nn.CrossEntropyLoss() net = net.to(device) #cudnn.benchmark = True logging( 'dataset:{}, Batch size: {}, Number of layers:{} ConCoeff: {}, LR:{}, MonteSize:{}' .format(dataset, params[0], params[1], params[2], params[3], Monte_iter)) mark = "{}/{}Convergence/DiagElement-{}".format( savepath, dataset, FileName) PreTrainConvergence, PreTestConvergence, PreTestAcc = TrainPart( modelName, datasetroot, start_epoch, num_pre_epochs, trainloader, testloader, net, optimizer, criterion, NumCutoff, mark, False, model_to_save) print( 'dataset: {}, model name: {}, Number epoches: {}, Pre-train error is: {}, Pre-test error is: {}, test acc is {}' .format(dataset, modelName, num_pre_epochs, PreTrainConvergence[-1], PreTestConvergence[-1], PreTestAcc[-1])) NewNetworksize, NewNetworkWeight = RetainNetworkSize(net, params[2])[0:2] NetworkInfo = [NewNetworksize[0:-1], NewNetworkWeight] OptimizedNet = ChooseModel(modelName, datasetroot, NetworkInfo) NewNetworksize.insert(0, datasetroot.num_features) NewNetworkSizeAdjust.append(NewNetworksize[0:-1]) print(NewNetworkSizeAdjust) #OptimizedNet.apply(init_weights) #OptimizedNet = DataParallel(OptimizedNet) OptimizedNet = OptimizedNet.to(device) cudnn.benchmark = True criterionNew = nn.CrossEntropyLoss() if optimizerName == "SGD": optimizerNew = getattr(optim, optimizerName)(OptimizedNet.parameters(), lr=params[3], momentum=0.9, weight_decay=5e-4) elif optimizerName == "Adam": optimizerNew = getattr(optim, optimizerName)(OptimizedNet.parameters(), lr=params[3], betas=(0.9, 0.999), eps=1e-08, weight_decay=5e-4, amsgrad=False) TrainConvergence, TestConvergence, TestAcc = TrainPart( modelName, datasetroot, start_epoch, num_epochs, trainloader, testloader, OptimizedNet, optimizerNew, criterionNew, NumCutoff, mark, True, model_to_save) np.save( "{}/{}Convergence/TrainConvergence-{}".format( savepath, dataset, FileName), TrainConvergence) np.save( "{}/{}Convergence/TestConvergence-{}".format( savepath, dataset, FileName), TestConvergence) np.save( "{}/{}Convergence/NewNetworkSizeAdjust-{}".format( savepath, dataset, FileName), NewNetworkSizeAdjust) #np.save(savepath+'TestConvergence-'+FileName,TestConvergence) #torch.cuda.empty_cache() print( 'dataset: {}, model name:{}, resized network size is {}, Number epoches:{}, Train error is: {}, Test error is: {}, test acc is {}\n' .format(dataset, modelName, NewNetworksize[0:-1], num_epochs, TrainConvergence[-1], TestConvergence[-1], TestAcc[-1])) TestAccs.append(TestAcc) np.save( "{}/{}Convergence/MeanTestAccs-{}".format(savepath, dataset, FileName), TestAccs) print("The change of test error is:{}".format(TestAccs)) print_nvidia_useage()
def get_small_dataset(dataset_name, normalize_attributes=False, add_self_loops=False, remove_isolated_nodes=False, make_undirected=False, graph_availability=None, seed=0, create_adjacency_lists=True): """ Get the pytorch_geometric.data.Data object associated with the specified dataset name. :param dataset_name: str => One of the datasets mentioned below. :param normalize_attributes: Whether the attributes for each node should be normalized to sum to 1. :param add_self_loops: Add self loops to the input Graph. :param remove_isolated_nodes: Remove isolated nodes. :param make_undirected: Make the Graph undirected. :param graph_availability: Either inductive and transductive. If transductive, all the graph nodes are available during training. Otherwise, only training split nodes are available. :param seed: The random seed to use while splitting into train/val/test splits. :param create_adjacency_lists: Whether to process and store adjacency lists that can be used for efficient r-radius neighborhood sampling. :return: A pytorch_geometric.data.Data object for that dataset. """ assert dataset_name in { 'amazon-computers', 'amazon-photo', 'citeseer', 'coauthor-cs', 'coauthor-physics', 'cora', 'cora-full', 'ppi', 'pubmed', 'reddit' } assert graph_availability in {'inductive', 'transductive'} # Compose transforms that should be applied. transforms = [] if normalize_attributes: transforms.append(NormalizeFeatures()) if remove_isolated_nodes: transforms.append(RemoveIsolatedNodes()) if add_self_loops: transforms.append(AddSelfLoops()) transforms = Compose(transforms) if transforms else None # Load the specified dataset and apply transforms. root_dir = '/tmp/{dir}'.format(dir=dataset_name) processed_dir = os.path.join(root_dir, dataset_name, 'processed') # Remove any previously pre-processed data, so pytorch_geometric can pre-process it again. if os.path.exists(processed_dir) and os.path.isdir(processed_dir): shutil.rmtree(processed_dir) data = None def split_function(y): return _get_train_val_test_masks(y.shape[0], y, 0.2, 0.2, seed) if dataset_name in ['citeseer', 'cora', 'pubmed']: data = Planetoid(root=root_dir, name=dataset_name, pre_transform=transforms, split='full').data if seed != 0: data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'cora-full': data = CoraFull(root=root_dir, pre_transform=transforms).data data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'amazon-computers': data = Amazon(root=root_dir, name='Computers', pre_transform=transforms).data data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'amazon-photo': data = Amazon(root=root_dir, name='Photo', pre_transform=transforms).data data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'coauthor-cs': data = Coauthor(root=root_dir, name='CS', pre_transform=transforms).data data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'coauthor-physics': data = Coauthor(root=root_dir, name='Physics', pre_transform=transforms).data data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'reddit': data = Reddit(root=root_dir, pre_transform=transforms).data if seed != 0: data.train_mask, data.val_mask, data.test_mask = split_function( data.y.numpy()) data.graphs = [data] elif dataset_name == 'ppi': data = SimpleNamespace() data.graphs = [] for split in ['train', 'val', 'test']: split_data = PPI(root=root_dir, split=split, pre_transform=transforms) x_idxs = split_data.slices['x'].numpy() edge_idxs = split_data.slices['edge_index'].numpy() split_data = split_data.data for x_start, x_end, e_start, e_end in zip(x_idxs, x_idxs[1:], edge_idxs, edge_idxs[1:]): graph = Data(split_data.x[x_start:x_end], split_data.edge_index[:, e_start:e_end], y=split_data.y[x_start:x_end]) graph.num_nodes = int(x_end - x_start) graph.split = split all_true = torch.ones(graph.num_nodes).bool() all_false = torch.zeros(graph.num_nodes).bool() graph.train_mask = all_true if split == 'train' else all_false graph.val_mask = all_true if split == 'val' else all_false graph.test_mask = all_true if split == 'test' else all_false data.graphs.append(graph) if seed != 0: temp_random = random.Random(seed) val_graphs = temp_random.sample(range(len(data.graphs)), 2) test_candidates = [ graph_idx for graph_idx in range(len(data.graphs)) if graph_idx not in val_graphs ] test_graphs = temp_random.sample(test_candidates, 2) for graph_idx, graph in enumerate(data.graphs): all_true = torch.ones(graph.num_nodes).bool() all_false = torch.zeros(graph.num_nodes).bool() graph.split = 'test' if graph_idx in test_graphs else 'val' if graph_idx in val_graphs else 'train' graph.train_mask = all_true if graph.split == 'train' else all_false graph.val_mask = all_true if graph.split == 'val' else all_false graph.test_mask = all_true if graph.split == 'test' else all_false if make_undirected: for graph in data.graphs: graph.edge_index = to_undirected(graph.edge_index, graph.num_nodes) LOG.info(f'Downloaded and transformed {len(data.graphs)} graph(s).') # Populate adjacency lists for efficient k-neighborhood sampling. # Only retain edges coming into a node and reverse the edges for the purpose of adjacency lists. LOG.info('Processing adjacency lists and degree information.') for graph in data.graphs: train_in_degrees = np.zeros(graph.num_nodes, dtype=np.int64) val_in_degrees = np.zeros(graph.num_nodes, dtype=np.int64) test_in_degrees = np.zeros(graph.num_nodes, dtype=np.int64) adjacency_lists = defaultdict(list) not_val_test_mask = (~graph.val_mask & ~graph.test_mask).numpy() val_mask = graph.val_mask.numpy() test_mask = graph.test_mask.numpy() if create_adjacency_lists: num_edges = graph.edge_index[0].shape[0] sources, dests = graph.edge_index[0].numpy( ), graph.edge_index[1].numpy() for source, dest in tqdm(zip(sources, dests), total=num_edges, leave=False): if not_val_test_mask[dest] and not_val_test_mask[source]: train_in_degrees[dest] += 1 val_in_degrees[dest] += 1 elif val_mask[dest] and not test_mask[source]: val_in_degrees[dest] += 1 test_in_degrees[dest] += 1 adjacency_lists[dest].append(source) graph.adjacency_lists = dict(adjacency_lists) graph.train_in_degrees = torch.from_numpy(train_in_degrees).long() graph.val_in_degrees = torch.from_numpy(val_in_degrees).long() graph.test_in_degrees = torch.from_numpy(test_in_degrees).long() if graph_availability == 'transductive': graph.train_in_degrees = data.test_in_degrees graph.val_in_degrees = data.test_in_degrees graph.graph_availability = graph_availability # To accumulate any neighborhood perturbations to the graph. graph.perturbed_neighborhoods = defaultdict(set) graph.added_nodes = defaultdict(set) graph.modified_degrees = {} # For small datasets, cache the neighborhoods for all nodes for at least 3 different radii queries. graph.use_cache = True graph.neighborhood_cache = NeighborhoodCache(graph.num_nodes * 3) graph.train_mask_original = graph.train_mask graph.val_mask_original = graph.val_mask graph.test_mask_original = graph.test_mask graph.train_mask = torch.ones( graph.num_nodes).bool() & ~graph.val_mask & ~graph.test_mask return data
def TrainingNet(dataset,modelName,params,num_pre_epochs,num_epochs,NumCutoff,optimizerName,LinkPredictionMethod,MonteSize,savepath): Batch_size=params[0] VectorPairs=params[4] StartTopoCoeffi=params[5] WeightCorrectionCoeffi=params[6] interval=params[7] root='/git/data/GraphData/'+dataset TestAccs=[] for Monte_iter in range(MonteSize): # Data NewNetworkSizeAdjust=[] WeightsDynamicsEvolution=[] trainValRatio=[0.2,0.4] # model if dataset=='Cora' or dataset =='Citeseer' or dataset =='Pubmed': datasetroot= Planetoid(root=root, name=dataset,transform =T.NormalizeFeatures()).shuffle() trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) """ train_mask, val_mask,test_mask=DataSampler(trainValRatio,datasetroot.data.num_nodes) DataMask={} DataMask['train_mask']=train_mask DataMask['val_mask']=val_mask DataMask['test_mask']=test_mask trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True)""" num_features=datasetroot.num_features num_classes=datasetroot.num_classes criterion = nn.CrossEntropyLoss() elif dataset =="CoraFull": datasetroot = CoraFull(root=root,transform =T.NormalizeFeatures()).shuffle() """train_mask, val_mask,test_mask=DataSampler(trainValRatio,datasetroot.data.num_nodes) DataMask={} DataMask['train_mask']=train_mask DataMask['val_mask']=val_mask DataMask['test_mask']=test_mask""" criterion = nn.CrossEntropyLoss() num_features=datasetroot.num_features num_classes=datasetroot.num_classes trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=False) elif dataset=='ENZYMES' or dataset=='MUTAG': datasetroot=TUDataset(root,name=dataset,use_node_attr=True) trainloader = DataLoader(datasetroot, batch_size=Batch_size, shuffle=True) num_features=datasetroot.num_features num_classes=datasetroot.num_classes elif dataset =="PPI": train_dataset = PPI(root, split='train') val_dataset = PPI(root, split='val') test_dataset = PPI(root, split='test') trainloader = DataListLoader(train_dataset, batch_size=Batch_size, shuffle=True) valloader = DataListLoader(val_dataset, batch_size=100, shuffle=False) testloader = DataListLoader(test_dataset, batch_size=100, shuffle=False) num_classes=train_dataset.num_classes num_features=train_dataset.num_features criterion = torch.nn.BCEWithLogitsLoss() elif dataset =="Reddit": datasetroot=Reddit(root) trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=2, shuffle=False) elif dataset=="Amazon": datasetroot=Amazon(root, "Photo", transform=None, pre_transform=None) trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) elif dataset=='MNIST': datasetroot = MNISTSuperpixels(root=root, transform=T.Cartesian()) trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) elif dataset=='CIFAR10': pass else: raise Exception("Input wrong datatset!!") width=ContractionLayerCoefficients(num_features,*params[1:3]) net =ChooseModel(modelName,num_features,num_classes,width) FileName="{}-{}-param_{}_{}_{}_{}-monte_{}".format(dataset,modelName,interval,WeightCorrectionCoeffi,StartTopoCoeffi,VectorPairs,Monte_iter) print('Let\'s use', torch.cuda.device_count(), 'GPUs!') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') criterion = criterion.to(device) net = DataParallel(net) net = net.to(device) optimizer = getattr(optim,optimizerName)(net.parameters(), lr=params[3], momentum=0.9, weight_decay=5e-4) model_to_save='./checkpoint/{}-{}-param_{}_{}_{}_{}-ckpt.pth'.format(dataset,modelName,params[0],params[1],params[5],params[4]) if resume=="True" and os.path.exists(model_to_save): [net,optimizer,TrainConvergence,TestConvergence,Acc]=ResumeModel(net,optimizer,model_to_save) start_epoch=len(TrainConvergence) else: start_epoch = 0 # start from epoch 0 or last checkpoint epoch #cudnn.benchmark = True logging('dataset:{}, Batch size: {}, Number of layers:{} ConCoeff: {}, LR:{}, MonteSize:{}'.format(dataset, params[0], params[1],params[2],params[3],Monte_iter)) mark="{}{}Convergence/DiagElement-{}".format(savepath,dataset,FileName) markweights="{}{}Convergence/WeightChanges-{}".format(savepath,dataset,FileName) PreTrainConvergence,PreTestConvergence,PreAcc=TrainPart(start_epoch,num_pre_epochs,num_classes, trainloader,net,optimizer,criterion,NumCutoff,LinkPredictionMethod,VectorPairs,WeightCorrectionCoeffi,StartTopoCoeffi,mark,markweights,model_to_save,False) print('dataset: {}, model name:{}, epoch:{},Pre-train error:{}; Pre-test error:{}; test acc:{}'.format(dataset,modelName,num_pre_epochs,PreTrainConvergence[-1],PreTestConvergence[-1],PreAcc)) NewNetworksize=RetainNetworkSize(net,params[2]) OptimizedNet=ChooseModel(modelName,num_features,num_classes,NewNetworksize[0:-1]) NewNetworksize.insert(0,num_features) NewNetworkSizeAdjust.append(NewNetworksize[0:-1]) print(NewNetworkSizeAdjust) #OptimizedNet.apply(init_weights) OptimizedNet = DataParallel(OptimizedNet) OptimizedNet = OptimizedNet.to(device) cudnn.benchmark = True # Begin Pre training if optimizerName =="SGD": optimizerNew = getattr(optim,optimizerName)(OptimizedNet.parameters(), lr=params[3], momentum=0.9, weight_decay=5e-4) elif optimizerName =="Adam": optimizerNew = getattr(optim,optimizerName)(OptimizedNet.parameters(), lr=params[3], betas=(0.9, 0.999), eps=1e-08, weight_decay=5e-4, amsgrad=False) TrainConvergence,TestConvergence,TestAcc=TrainPart(start_epoch,num_epochs,datasetroot.num_classes, trainloader,OptimizedNet,optimizerNew,criterion, NumCutoff,LinkPredictionMethod,VectorPairs,WeightCorrectionCoeffi,StartTopoCoeffi,mark,markweights,model_to_save,True) np.save("{}/{}Convergence/AlgebraicConectivityTrainConvergence-{}".format(savepath,dataset,FileName),TrainConvergence) np.save("{}/{}Convergence/AlgebraicConectivityTestConvergence-{}".format(savepath,dataset,FileName),TestConvergence) #np.save("{}/{}Convergence/NewNetworkSizeAdjust-{}".format(savepath,dataset,FileName),NewNetworkSizeAdjust) #torch.cuda.empty_cache() print('dataset: {}, model name:{}, resized network size: {}, the train error: {},test error: {}, test acc:{}\n'.format(dataset,modelName,NewNetworksize[0:-1],num_epochs,TrainConvergence[-1],TestConvergence[-1],TestAcc)) np.save("{}/{}Convergence/AlgebraicConectivityMeanTestAccs-{}".format(savepath,dataset,FileName),TestAccs.append(TestAcc)) TestAccs.append(TestAcc) print_nvidia_useage()