def load_dataset(): ''' load raw datasets. :return: a list of networkx/deepsnap graphs, plus additional info if needed ''' format = cfg.dataset.format name = cfg.dataset.name # dataset_dir = '{}/{}'.format(cfg.dataset.dir, name) dataset_dir = cfg.dataset.dir # Try to load customized data format for func in register.loader_dict.values(): graphs = func(format, name, dataset_dir) if graphs is not None: return graphs # Load from Pytorch Geometric dataset if format == 'PyG': graphs = load_pyg(name, dataset_dir) # Load from networkx formatted data # todo: clean nx dataloader elif format == 'nx': graphs = load_nx(name, dataset_dir) # Load from OGB formatted data elif cfg.dataset.format == 'OGB': if cfg.dataset.name == 'ogbg-molhiv': dataset = PygGraphPropPredDataset(name=cfg.dataset.name) graphs = GraphDataset.pyg_to_graphs(dataset) # Note this is only used for custom splits from OGB split_idx = dataset.get_idx_split() return graphs, split_idx else: raise ValueError('Unknown data format: {}'.format(cfg.dataset.format)) return graphs
def get_molhiv(): path = osp.dirname(osp.realpath(__file__)) dataset = PygGraphPropPredDataset(name='ogbg-molhiv', root=path) split_idx = dataset.get_idx_split() max_num_nodes = torch.tensor(dataset.data.num_nodes).max().item() return dataset[split_idx["train"]], dataset[split_idx["valid"]], dataset[ split_idx["test"]], max_num_nodes
def main(): args = ArgsInit().args if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygGraphPropPredDataset(name=args.dataset) args.num_tasks = dataset.num_tasks print(args) if args.feature == 'full': pass elif args.feature == 'simple': print('using simple feature') # only retain the top two node/edge features dataset.data.x = dataset.data.x[:, :2] dataset.data.edge_attr = dataset.data.edge_attr[:, :2] split_idx = dataset.get_idx_split() evaluator = Evaluator(args.dataset) train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) model = DeeperGCN(args) model.load_state_dict(torch.load(args.model_load_path)['model_state_dict']) model.to(device) train_result = eval(model, device, train_loader, evaluator)[dataset.eval_metric] valid_result = eval(model, device, valid_loader, evaluator)[dataset.eval_metric] test_result = eval(model, device, test_loader, evaluator)[dataset.eval_metric] print({ 'Train': train_result, 'Validation': valid_result, 'Test': test_result }) model.print_params(final=True)
def setup(self, stage: Optional[str] = None): """Load data. Set variables: self.data_train, self.data_val, self.data_test.""" if not self.data_train and not self.data_val and not self.data_test: dataset = PygGraphPropPredDataset(name="ogbg-molpcba", root=self.data_dir, transform=self.transform) split_idx = dataset.get_idx_split() self.data_train = dataset[split_idx["train"]] self.data_val = dataset[split_idx["valid"]] self.data_test = dataset[split_idx["test"]]
def __init__(self, train): super(Mol_pred_DNN_dataset, self).__init__() self.train = train dataset_name = 'ogbg-molhiv' mol_origin_dataset = PygGraphPropPredDataset(name=dataset_name) evaluator = Evaluator(name=dataset_name) split_idx = mol_origin_dataset.get_idx_split() if self.train == True: self.mol_origin_dataset = mol_origin_dataset[split_idx["train"]] else: self.mol_origin_dataset = mol_origin_dataset[split_idx["test"]]
def mol_pred_GNN_prepare(batch_size=50): dataset_name = 'ogbg-molhiv' dataset = PygGraphPropPredDataset(name=dataset_name) evaluator = Evaluator(name=dataset_name) split_idx = dataset.get_idx_split() train_loader = DataLoader(dataset[split_idx["train"]], batch_size=batch_size, shuffle=True) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=batch_size, shuffle=False) test_loader = DataLoader(dataset[split_idx["test"]], batch_size=batch_size, shuffle=False) return train_loader, test_loader
def main(): args = ArgsInit().args if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') if args.not_extract_node_feature: dataset = PygGraphPropPredDataset(name=args.dataset, transform=add_zeros) else: extract_node_feature_func = partial(extract_node_feature, reduce=args.aggr) dataset = PygGraphPropPredDataset(name=args.dataset, transform=extract_node_feature_func) args.num_tasks = dataset.num_classes evaluator = Evaluator(args.dataset) split_idx = dataset.get_idx_split() train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) print(args) model = DeeperGCN(args) model.load_state_dict(torch.load(args.model_load_path)['model_state_dict']) model.to(device) train_accuracy = eval(model, device, train_loader, evaluator) valid_accuracy = eval(model, device, valid_loader, evaluator) test_accuracy = eval(model, device, test_loader, evaluator) print({ 'Train': train_accuracy, 'Validation': valid_accuracy, 'Test': test_accuracy }) model.print_params(final=True)
def mol_data(root, dataset, batch_size=32, num_workers=4): dataset = PygGraphPropPredDataset(name=f"ogbg-mol{dataset}", root=root) split_idx = dataset.get_idx_split() loaders = dict() for split in ["train", "valid", "test"]: loaders[split] = DataLoader( dataset[split_idx[split]], batch_size=batch_size, shuffle=(split == "train"), num_workers=num_workers, ) return loaders
def train_dataloader(self): dataset = PygGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() train_data = dataset[split_idx["train"]] train_loader = DataLoader(train_data, batch_size=self.configuration["batch_size"], shuffle=True, num_workers = self.configuration["num_workers"]) self._train_data = train_data self._train_loader = train_loader return train_loader
def val_dataloader(self): dataset = PygGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() val_data = dataset[split_idx["valid"]] validation_loader = DataLoader(val_data, batch_size=self.configuration["batch_size"], shuffle=False, num_workers = self.configuration["num_workers"]) self._validation_data = val_data self._validation_loader = validation_loader return validation_loader
def __init__(self, version=None, root_dir='data', download=False, split_scheme='official'): self._version = version if version is not None: raise ValueError( 'Versioning for OGB-MolPCBA is handled through the OGB package. Please set version=none.' ) # internally call ogb package self.ogb_dataset = PygGraphPropPredDataset(name='ogbg-molpcba', root=root_dir) # set variables self._data_dir = self.ogb_dataset.root if split_scheme == 'official': split_scheme = 'scaffold' self._split_scheme = split_scheme self._y_type = 'float' # although the task is binary classification, the prediction target contains nan value, thus we need float self._y_size = self.ogb_dataset.num_tasks self._n_classes = self.ogb_dataset.__num_classes__ self._split_array = torch.zeros(len(self.ogb_dataset)).long() split_idx = self.ogb_dataset.get_idx_split() self._split_array[split_idx['train']] = 0 self._split_array[split_idx['valid']] = 1 self._split_array[split_idx['test']] = 2 self._y_array = self.ogb_dataset.data.y self._metadata_fields = ['scaffold'] metadata_file_path = os.path.join(self.ogb_dataset.root, 'raw', 'scaffold_group.npy') if not os.path.exists(metadata_file_path): download_url( 'https://snap.stanford.edu/ogb/data/misc/ogbg_molpcba/scaffold_group.npy', os.path.join(self.ogb_dataset.root, 'raw')) self._metadata_array = torch.from_numpy( np.load(metadata_file_path)).reshape(-1, 1).long() if torch_geometric.__version__ >= '1.7.0': self._collate = PyGCollater(follow_batch=[], exclude_keys=[]) else: self._collate = PyGCollater(follow_batch=[]) self._metric = Evaluator('ogbg-molpcba') super().__init__(root_dir, download, split_scheme)
def load_graphs(ogb_name): dataset = PygGraphPropPredDataset(ogb_name, root='data', transform=preproc) out_dim = dataset[0].y.shape[1] split_idx = dataset.get_idx_split() train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"] print("Preprocessing Graphs...") train_graphs = list(tqdm(dataset[train_idx])) train_graphs = [d for d in train_graphs if d.num_edges > 0] valid_graphs = list(dataset[valid_idx]) test_graphs = list(dataset[test_idx]) return out_dim, train_graphs, valid_graphs, test_graphs
def __init__(self, path): dataset = "ogbg-molpcba" # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) PygGraphPropPredDataset(name=dataset, root=path) super(OGBGmolpcbaDataset, self).__init__(dataset, path) setattr(OGBGmolpcbaDataset, "metric", "AP") setattr(OGBGmolpcbaDataset, "loss", "binary_cross_entropy_with_logits")
def __init__(self, path): dataset = "ogbg-molhiv" # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) PygGraphPropPredDataset(name=dataset, root=path) super(OGBGmolhivDataset, self).__init__(dataset, path) setattr(OGBGmolhivDataset, "metric", "ROC-AUC") setattr(OGBGmolhivDataset, "loss", "BCEWithLogitsLoss")
def __init__(self, args=None): dataset = "ogbg-molpcba" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): PygGraphPropPredDataset(dataset, path) super(OGBMolpcbaDataset, self).__init__(path, dataset)
def get_loader(self, args): split_index = self.get_idx_split() dataset = PygGraphPropPredDataset(self.name, osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data")) train_loader = DataLoader(dataset[split_index["train"]], batch_size = args.batch_size, shuffle = True) valid_loader = DataLoader(dataset[split_index["valid"]], batch_size = args.batch_size, shuffle = False) test_loader = DataLoader(dataset[split_index["test"]], batch_size = args.batch_size, shuffle = False) return train_loader, valid_loader, test_loader
def __init__(self, path): dataset = "ogbg-code" # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) PygGraphPropPredDataset(name=dataset, root=path) super(OGBGcodeDataset, self).__init__(dataset, path) setattr(OGBGcodeDataset, "metric", "F1 score") setattr(OGBGcodeDataset, "loss", "cross_entropy")
def __init__(self, path): dataset = "ogbg-ppa" # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) PygGraphPropPredDataset(name=dataset, root=path) super(OGBGppaDataset, self).__init__(dataset, path) setattr(OGBGppaDataset, "metric", "Accuracy") setattr(OGBGppaDataset, "loss", "CrossEntropyLoss")
def __init__(self): dataset = "ogbg-code" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): PygGraphPropPredDataset(dataset, path) super(OGBCodeDataset, self).__init__(path, dataset)
def load_data(self): dataset = PygGraphPropPredDataset(name = self.args.data) self.args.task_type, self.args.num_features, self.args.num_classes, self.args.avg_num_nodes \ = dataset.task_type, dataset.num_features, dataset.num_tasks, np.ceil(np.mean([data.num_nodes for data in dataset])) print('# %s: [Task]-%s [FEATURES]-%d [NUM_CLASSES]-%d [AVG_NODES]-%d' % (dataset, self.args.task_type, self.args.num_features, self.args.num_classes, self.args.avg_num_nodes)) return dataset
def load_ogb(name, dataset_dir): if name[:4] == 'ogbn': dataset = PygNodePropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = ['train_mask', 'val_mask', 'test_mask'] for i, key in enumerate(splits.keys()): mask = index2mask(splits[key], size=dataset.data.y.shape[0]) set_dataset_attr(dataset, split_names[i], mask, len(mask)) edge_index = to_undirected(dataset.data.edge_index) set_dataset_attr(dataset, 'edge_index', edge_index, edge_index.shape[1]) elif name[:4] == 'ogbg': dataset = PygGraphPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_idx_split() split_names = [ 'train_graph_index', 'val_graph_index', 'test_graph_index' ] for i, key in enumerate(splits.keys()): id = splits[key] set_dataset_attr(dataset, split_names[i], id, len(id)) elif name[:4] == "ogbl": dataset = PygLinkPropPredDataset(name=name, root=dataset_dir) splits = dataset.get_edge_split() id = splits['train']['edge'].T if cfg.dataset.resample_negative: set_dataset_attr(dataset, 'train_pos_edge_index', id, id.shape[1]) # todo: applying transform for negative sampling is very slow dataset.transform = neg_sampling_transform else: id_neg = negative_sampling(edge_index=id, num_nodes=dataset.data.num_nodes[0], num_neg_samples=id.shape[1]) id_all = torch.cat([id, id_neg], dim=-1) label = get_link_label(id, id_neg) set_dataset_attr(dataset, 'train_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'train_edge_label', label, len(label)) id, id_neg = splits['valid']['edge'].T, splits['valid']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = get_link_label(id, id_neg) set_dataset_attr(dataset, 'val_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'val_edge_label', label, len(label)) id, id_neg = splits['test']['edge'].T, splits['test']['edge_neg'].T id_all = torch.cat([id, id_neg], dim=-1) label = get_link_label(id, id_neg) set_dataset_attr(dataset, 'test_edge_index', id_all, id_all.shape[1]) set_dataset_attr(dataset, 'test_edge_label', label, len(label)) else: raise ValueError('OGB dataset: {} non-exist') return dataset
def __init__(self, root_dir='data', download=False, split_scheme='official'): # internally call ogb package self.ogb_dataset = PygGraphPropPredDataset(name='ogbg-molpcba', root=root_dir) # set variables self._dataset_name = 'ogbg-molpcba' self._data_dir = self.ogb_dataset.root if split_scheme == 'official': split_scheme = 'scaffold' self._split_scheme = split_scheme self._y_type = 'float' # although the task is binary classification, the prediction target contains nan value, thus we need float self._y_size = self.ogb_dataset.num_tasks self._n_classes = self.ogb_dataset.__num_classes__ self._split_array = torch.zeros(len(self.ogb_dataset)).long() split_idx = self.ogb_dataset.get_idx_split() self._split_array[split_idx['train']] = 0 self._split_array[split_idx['valid']] = 1 self._split_array[split_idx['test']] = 2 self._y_array = self.ogb_dataset.data.y self._metadata_fields = ['scaffold'] metadata_file_path = os.path.join(self.ogb_dataset.root, 'raw', 'scaffold_group.npy') if not os.path.exists(metadata_file_path): download_url('', os.path.join(self.ogb_dataset.root, 'raw')) self._metadata_array = torch.from_numpy( np.load(metadata_file_path)).reshape(-1, 1).long() self._collate = PyGCollater(follow_batch=[]) self._metric = Evaluator('ogbg-molpcba') super().__init__(root_dir, download, split_scheme)
def get_loader(self, args): split_index = self.get_idx_split() dataset = PygGraphPropPredDataset(self.name, osp.join("data", self.name)) train_loader = DataLoader(dataset[split_index["train"]], batch_size=args.batch_size, shuffle=True) valid_loader = DataLoader(dataset[split_index["valid"]], batch_size=args.batch_size, shuffle=False) test_loader = DataLoader(dataset[split_index["test"]], batch_size=args.batch_size, shuffle=False) return train_loader, valid_loader, test_loader
def code_data( root, batch_size=128, num_vocab=VOCAB_SIZE, seq_len=SEQ_LEN, use_old_code_dataset=False, ): dataset = PygGraphPropPredDataset( "ogbg-code" if use_old_code_dataset else "ogbg-code2", root=root) split_idx = dataset.get_idx_split() vocab2idx, idx2vocab = get_vocab_mapping( [dataset.data.y[i] for i in split_idx["train"]], num_vocab) dataset.transform = transforms.Compose( [augment_edge, lambda data: encode_y_to_arr(data, vocab2idx, seq_len)]) loaders = dict() for split in ["train", "valid", "test"]: loaders[split] = DataLoader( dataset[split_idx[split]], batch_size=batch_size, shuffle=(split == "train"), num_workers=2, ) return loaders, idx2vocab
def load_ogb_data(path, name, degree_as_tag): ### splits and preprocessing according to https://github.com/snap-stanford/ogb def add_zeros(data): data.x = torch.zeros(data.num_nodes, dtype=torch.long) return data transform = add_zeros if name == 'ogbg-ppa' else None print('Applying transform {} to dataset {}.'.format(transform, name)) dataset = PygGraphPropPredDataset(name=name, root=path, transform=transform) Graph = namedtuple('Graph', ['node_features', 'edge_mat', 'edge_features', 'label']) graph_list = list() for datum in dataset: graph = Graph(datum.x, datum.edge_index, datum.edge_attr, datum.y) graph_list.append(graph) num_classes = dataset.num_classes if name == 'ogbg-ppa' else dataset.num_tasks return graph_list, num_classes
def main(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on ogbg-ppi data with Pytorch Geometrics') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument( '--gnn', type=str, default='gin-virtual', help= 'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)') parser.add_argument('--drop_ratio', type=float, default=0.5, help='dropout ratio (default: 0.5)') parser.add_argument( '--num_layer', type=int, default=5, help='number of GNN message passing layers (default: 5)') parser.add_argument( '--emb_dim', type=int, default=300, help='dimensionality of hidden units in GNNs (default: 300)') parser.add_argument('--batch_size', type=int, default=32, help='input batch size for training (default: 32)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--num_workers', type=int, default=0, help='number of workers (default: 0)') parser.add_argument('--dataset', type=str, default="ogbg-ppi", help='dataset name (default: ogbg-ppi)') parser.add_argument('--filename', type=str, default="", help='filename to output result (default: )') args = parser.parse_args() device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") ### automatic dataloading and splitting dataset = PygGraphPropPredDataset(name=args.dataset, transform=add_zeros) splitted_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = Evaluator(args.dataset) train_loader = DataLoader(dataset[splitted_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(dataset[splitted_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) test_loader = DataLoader(dataset[splitted_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) if args.gnn == 'gin': model = GNN(gnn_type='gin', num_class=37, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=False).to(device) elif args.gnn == 'gin-virtual': model = GNN(gnn_type='gin', num_class=37, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=True).to(device) elif args.gnn == 'gcn': model = GNN(gnn_type='gcn', num_class=37, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=False).to(device) elif args.gnn == 'gcn-virtual': model = GNN(gnn_type='gcn', num_class=37, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, virtual_node=True).to(device) else: raise ValueError('Invalid GNN type') optimizer = optim.Adam(model.parameters(), lr=0.001) valid_curve = [] test_curve = [] train_curve = [] for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print('Training...') train(model, device, train_loader, optimizer) print('Evaluating...') train_perf = eval(model, device, train_loader, evaluator) valid_perf = eval(model, device, valid_loader, evaluator) test_perf = eval(model, device, test_loader, evaluator) print({ 'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf }) train_curve.append(train_perf['acc']) valid_curve.append(valid_perf['acc']) test_curve.append(test_perf['acc']) best_val_epoch = np.argmax(np.array(valid_curve)) best_train = max(train_curve) print('Finished training!') print('Best validation score: {}'.format(valid_curve[best_val_epoch])) print('Test score: {}'.format(test_curve[best_val_epoch])) if not args.filename == '': torch.save( { 'Val': valid_curve[best_val_epoch], 'Test': test_curve[best_val_epoch], 'Train': train_curve[best_val_epoch], 'BestTrain': best_train }, args.filename)
def prepare_data(self): """Download data if needed. This method is called only from a single GPU. Do not use it to assign state (self.x = y). Pretransform is applied before saving dataset on disk.""" PygGraphPropPredDataset(name="ogbg-molpcba", root=self.data_dir, pre_transform=self.pre_transform)
from ogb.graphproppred import PygGraphPropPredDataset import os root_folder = '/vol/deform/gbouritsas/datasets/' datasets = ['ogbg-molpcba', 'ogbg-molhiv', 'ogbg-ppa'] for name in datasets: dataset = PygGraphPropPredDataset(name=name, root=os.path.join( root_folder, 'ogb', '{}'.format(name))) split_idx = dataset.get_idx_split() for split_name in {'train', 'valid', 'test'}: idxs = split_idx[split_name] split_name = split_name if split_name is not 'valid' else 'val' save_folder = os.path.join(root_folder, 'ogb', '{}'.format(name), '10fold_idx') if not os.path.exists(save_folder): os.makedirs(save_folder) with open(os.path.join(save_folder, '{}_idx-0.txt'.format(split_name)), 'w') as handle: for idx in idxs: handle.write('{}\n'.format(idx))
from ogb.graphproppred.mol_encoder import AtomEncoder from torch.nn import BatchNorm1d, Linear, ReLU, Sequential from torch.optim.lr_scheduler import ReduceLROnPlateau import torch_geometric.transforms as T from torch_geometric.loader import DataLoader from torch_geometric.nn import EGConv, global_mean_pool parser = argparse.ArgumentParser() parser.add_argument('--use_multi_aggregators', action='store_true', help='Switch between EGC-S and EGC-M') args = parser.parse_args() path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'OGB') dataset = OGBG('ogbg-molhiv', path, pre_transform=T.ToSparseTensor()) evaluator = Evaluator('ogbg-molhiv') split_idx = dataset.get_idx_split() train_dataset = dataset[split_idx['train']] val_dataset = dataset[split_idx['valid']] test_dataset = dataset[split_idx['test']] train_loader = DataLoader(train_dataset, batch_size=32, num_workers=4, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=256) test_loader = DataLoader(test_dataset, batch_size=256)
def main(): args = ArgsInit().save_exp() if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') sub_dir = 'BS_{}-NF_{}'.format(args.batch_size, args.feature) dataset = PygGraphPropPredDataset(name=args.dataset) args.num_tasks = dataset.num_tasks logging.info('%s' % args) if args.feature == 'full': pass elif args.feature == 'simple': print('using simple feature') # only retain the top two node/edge features dataset.data.x = dataset.data.x[:, :2] dataset.data.edge_attr = dataset.data.edge_attr[:, :2] evaluator = Evaluator(args.dataset) split_idx = dataset.get_idx_split() train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) model = DeeperGCN(args).to(device) logging.info(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) results = { 'highest_valid': 0, 'final_train': 0, 'final_test': 0, 'highest_train': 0 } start_time = time.time() for epoch in range(1, args.epochs + 1): logging.info("=====Epoch {}".format(epoch)) logging.info('Training...') # epoch_loss = train(model, device, train_loader, optimizer, dataset.task_type) epoch_loss = train_flag(model, device, train_loader, optimizer, dataset.task_type, args) logging.info('Evaluating...') train_result = eval(model, device, train_loader, evaluator)[dataset.eval_metric] valid_result = eval(model, device, valid_loader, evaluator)[dataset.eval_metric] test_result = eval(model, device, test_loader, evaluator)[dataset.eval_metric] logging.info({ 'Train': train_result, 'Validation': valid_result, 'Test': test_result }) model.print_params(epoch=epoch) if train_result > results['highest_train']: results['highest_train'] = train_result if valid_result > results['highest_valid']: results['highest_valid'] = valid_result results['final_train'] = train_result results['final_test'] = test_result # save_ckpt(model, optimizer, # round(epoch_loss, 4), epoch, # args.model_save_path, # sub_dir, name_post='valid_best') logging.info("%s" % results) end_time = time.time() total_time = end_time - start_time logging.info('Total time: {}'.format( time.strftime('%H:%M:%S', time.gmtime(total_time))))