def model(): # type: () -> GraphConvPredictor mlp = MLP(out_dim=class_num, hidden_dim=n_unit) ggnn = GGNN(out_dim=out_dim, hidden_channels=n_unit, n_edge_types=n_edge_types) return GraphConvPredictor(ggnn, mlp)
def test_mlp(self): batch_size = 2 input_dim = 16 out_dim = 4 model = MLP(out_dim=out_dim) data = numpy.random.rand(batch_size, input_dim).astype(numpy.float32) y_actual = cuda.to_cpu(model(data).data) assert y_actual.shape == (batch_size, out_dim)
def __init__(self, out_channels, nn=None): # type: (int, chainer.Link) -> None super(EdgeNet, self).__init__() if nn is None: from chainer_chemistry.models.mlp import MLP nn = MLP(out_dim=out_channels**2, hidden_dim=16) if not isinstance(nn, chainer.Link): raise ValueError('nn {} must be chainer.Link'.format(nn)) with self.init_scope(): self.nn_layer_in = nn self.nn_layer_out = nn self.out_channels = out_channels
def __init__(self, in_channels=None, hidden_channels=16, out_channels=None, dropout_ratio=0.5, n_layers=2, **kwargs): # To avoid circular reference from chainer_chemistry.models.mlp import MLP if out_channels is None: out_channels = hidden_channels super(GINSparseUpdate, self).__init__() with self.init_scope(): self.mlp = MLP( out_dim=out_channels, hidden_dim=hidden_channels, n_layers=n_layers, activation=functions.relu ) self.dropout_ratio = dropout_ratio
def set_up_predictor( method, # type: str n_unit, # type: int conv_layers, # type: int class_num, # type: int label_scaler=None, # type: Optional[chainer.Link] postprocess_fn=None, # type: Optional[chainer.FunctionNode] conv_kwargs=None # type: Optional[Dict[str, Any]] ): # type: (...) -> GraphConvPredictor """Set up the predictor, consisting of a GCN and a MLP. Args: method (str): Method name. n_unit (int): Number of hidden units. conv_layers (int): Number of convolutional layers for the graph convolution network. class_num (int): Number of output classes. label_scaler (chainer.Link or None): scaler link postprocess_fn (chainer.FunctionNode or None): postprocess function for prediction. conv_kwargs (dict): keyword args for GraphConvolution model. """ mlp = MLP(out_dim=class_num, hidden_dim=n_unit) # type: Optional[MLP] if conv_kwargs is None: conv_kwargs = {} if method == 'nfp': print('Set up NFP predictor...') conv = NFP(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'ggnn': print('Set up GGNN predictor...') conv = GGNN(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'schnet': print('Set up SchNet predictor...') conv = SchNet(out_dim=class_num, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) mlp = None elif method == 'weavenet': print('Set up WeaveNet predictor...') conv = WeaveNet(hidden_dim=n_unit, **conv_kwargs) elif method == 'rsgcn': print('Set up RSGCN predictor...') conv = RSGCN(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'relgcn': print('Set up Relational GCN predictor...') num_edge_type = 4 conv = RelGCN(out_dim=n_unit, n_edge_types=num_edge_type, scale_adj=True, **conv_kwargs) elif method == 'relgat': print('Set up Relational GAT predictor...') conv = RelGAT(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'gin': print('Set up GIN predictor...') conv = GIN(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'nfp_gwm': print('Set up NFP_GWM predictor...') conv = NFP_GWM(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'ggnn_gwm': print('Set up GGNN_GWM predictor...') conv = GGNN_GWM(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'rsgcn_gwm': print('Set up RSGCN_GWM predictor...') conv = RSGCN_GWM(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) elif method == 'gin_gwm': print('Set up GIN_GWM predictor...') conv = GIN_GWM(out_dim=n_unit, hidden_channels=n_unit, n_update_layers=conv_layers, **conv_kwargs) else: raise ValueError('[ERROR] Invalid method: {}'.format(method)) predictor = GraphConvPredictor(conv, mlp, label_scaler, postprocess_fn) return predictor
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data n_unit = args.unit_num conv_layers = args.conv_layers if method == 'node2vec': fname = args.modelpath.split('/')[-1].rsplit('.', 1)[0] r, p, q = fname.split('-')[-3:] r, p, q = int(r[1:]), float(p[1:]), float(q[1:]) print(args.modelpath) print(f"r={r}, p={p}, q={q}") task_type = molnet_default_config[dataset_name]['task_type'] model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } print('Using dataset: {}...'.format(dataset_name)) # Set up some useful variables that will be used later on. if args.label: labels = args.label if method == 'node2vec': cache_dir = os.path.join( args.datadir, '{}_{}_r{}_p{}_q{}_{}'.format(dataset_name, method, r, p, q, labels)) else: cache_dir = os.path.join( args.datadir, '{}_{}_{}'.format(dataset_name, method, labels)) class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None if method == 'node2vec': cache_dir = os.path.join( args.datadir, '{}_{}_r{}_p{}_q{}_all'.format(dataset_name, method, r, p, q)) else: cache_dir = os.path.join(args.datadir, '{}_{}_all'.format(dataset_name, method)) class_num = len(molnet_default_config[args.dataset]['tasks']) # Load the train and validation parts of the dataset. filenames = [ dataset_part_filename(p, num_data) for p in ['train', 'valid'] ] paths = [os.path.join(cache_dir, f) for f in filenames] if all([os.path.exists(path) for path in paths]): dataset_parts = [] for path in paths: print('Loading cached dataset from {}.'.format(path)) dataset_parts.append(NumpyTupleDataset.load(path)) else: dataset_parts = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir, modelpath=args.modelpath) # Scale the label values, if necessary. scaler = None if args.scale == 'standardize': if task_type == 'regression': print('Applying standard scaling to the labels.') scaler, dataset_parts = fit_scaler(dataset_parts) else: print('Label scaling is not available for classification tasks.') else: print('No label scaling was selected.') train, valid = dataset_parts[0], dataset_parts[1] # Set up the predictor. if method == 'node2vec': predictor = MLP(class_num, n_unit) else: predictor = set_up_predictor(method, n_unit, conv_layers, class_num, label_scaler=scaler) # Set up the iterators. train_iter = iterators.SerialIterator(train, args.batchsize) valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Load metrics for the current dataset. metrics = molnet_default_config[dataset_name]['metrics'] metrics_fun = { k: v for k, v in metrics.items() if isinstance(v, types.FunctionType) } loss_fun = molnet_default_config[dataset_name]['loss'] device = chainer.get_device(args.device) if task_type == 'regression': model = Regressor(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=device) elif task_type == 'classification': model = Classifier(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=device) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Set up the optimizer. optimizer = optimizers.Adam(0.0005) optimizer.setup(model) # Save model-related output to this directory. if not os.path.exists(args.out): os.makedirs(args.out) save_json(os.path.join(args.out, 'args.json'), vars(args)) model_dir = os.path.join(args.out, os.path.basename(cache_dir)) if not os.path.exists(model_dir): os.makedirs(model_dir) # save scaler if args.scale == 'standardize' and task_type == 'regression': pkl.dump(scaler, open(os.path.join(cache_dir, 'standatdize_scaler.pkl'), 'wb')) # Set up the updater. if method == 'node2vec': converter = converter_method_dict['nfp'] # concat_mols else: converter = converter_method_dict[method] updater = training.StandardUpdater(train_iter, optimizer, device=device, converter=converter) # Set up the trainer. trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir) trainer.extend( E.Evaluator(valid_iter, model, device=device, converter=converter)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) # TODO: consider go/no-go of the following block # (i) more reporting for val/evalutaion # (ii) best validation score snapshot if task_type == 'regression': metric_name_list = list(metrics.keys()) if 'RMSE' in metric_name_list: trainer.extend(E.snapshot_object( model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MinValueTrigger( 'validation/main/RMSE')) elif 'MAE' in metric_name_list: trainer.extend(E.snapshot_object( model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MinValueTrigger( 'validation/main/MAE')) else: print("[WARNING] No validation metric defined?") elif task_type == 'classification': train_eval_iter = iterators.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) if dataset_name in ['muv', 'pcba']: trainer.extend( PRCAUCEvaluator(train_eval_iter, predictor, eval_func=predictor, device=device, converter=converter, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( PRCAUCEvaluator(valid_iter, predictor, eval_func=predictor, device=device, converter=converter, name='val', pos_labels=1, ignore_labels=-1, raise_value_error=False)) trainer.extend( E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MaxValueTrigger('val/main/prc_auc')) else: trainer.extend( ROCAUCEvaluator(train_eval_iter, predictor, eval_func=predictor, device=device, converter=converter, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(valid_iter, predictor, eval_func=predictor, device=device, converter=converter, name='val', pos_labels=1, ignore_labels=-1, raise_value_error=False)) trainer.extend( E.snapshot_object(model, "best_val_" + model_filename[task_type]), trigger=training.triggers.MaxValueTrigger('val/main/roc_auc')) else: raise NotImplementedError( 'Not implemented task_type = {}'.format(task_type)) trainer.extend(AutoPrintReport()) trainer.extend(E.ProgressBar()) trainer.run() # Save the model's parameters. model_path = os.path.join(model_dir, model_filename[task_type]) print('Saving the trained model to {}...'.format(model_path)) model.save_pickle(model_path, protocol=args.protocol)
def test_mlp_assert_raises(): with pytest.raises(ValueError): MLP(out_dim=out_dim, n_layers=-1)
def model(): return MLP(out_dim=out_dim)