예제 #1
0
def model():
    # type: () -> GraphConvPredictor
    mlp = MLP(out_dim=class_num, hidden_dim=n_unit)
    ggnn = GGNN(out_dim=out_dim,
                hidden_channels=n_unit,
                n_edge_types=n_edge_types)
    return GraphConvPredictor(ggnn, mlp)
예제 #2
0
    def test_mlp(self):
        batch_size = 2
        input_dim = 16
        out_dim = 4

        model = MLP(out_dim=out_dim)
        data = numpy.random.rand(batch_size, input_dim).astype(numpy.float32)
        y_actual = cuda.to_cpu(model(data).data)
        assert y_actual.shape == (batch_size, out_dim)
예제 #3
0
 def __init__(self, out_channels, nn=None):
     # type: (int, chainer.Link) -> None
     super(EdgeNet, self).__init__()
     if nn is None:
         from chainer_chemistry.models.mlp import MLP
         nn = MLP(out_dim=out_channels**2, hidden_dim=16)
     if not isinstance(nn, chainer.Link):
         raise ValueError('nn {} must be chainer.Link'.format(nn))
     with self.init_scope():
         self.nn_layer_in = nn
         self.nn_layer_out = nn
     self.out_channels = out_channels
예제 #4
0
    def __init__(self, in_channels=None, hidden_channels=16, out_channels=None,
                 dropout_ratio=0.5, n_layers=2, **kwargs):
        # To avoid circular reference
        from chainer_chemistry.models.mlp import MLP

        if out_channels is None:
            out_channels = hidden_channels
        super(GINSparseUpdate, self).__init__()
        with self.init_scope():
            self.mlp = MLP(
                out_dim=out_channels, hidden_dim=hidden_channels,
                n_layers=n_layers, activation=functions.relu
            )
        self.dropout_ratio = dropout_ratio
예제 #5
0
def set_up_predictor(
    method,  # type: str
    n_unit,  # type: int
    conv_layers,  # type: int
    class_num,  # type: int
    label_scaler=None,  # type: Optional[chainer.Link]
    postprocess_fn=None,  # type: Optional[chainer.FunctionNode]
    conv_kwargs=None  # type: Optional[Dict[str, Any]]
):
    # type: (...) -> GraphConvPredictor
    """Set up the predictor, consisting of a GCN and a MLP.

    Args:
        method (str): Method name.
        n_unit (int): Number of hidden units.
        conv_layers (int): Number of convolutional layers for the graph
            convolution network.
        class_num (int): Number of output classes.
        label_scaler (chainer.Link or None): scaler link
        postprocess_fn (chainer.FunctionNode or None):
            postprocess function for prediction.
        conv_kwargs (dict): keyword args for GraphConvolution model.
    """
    mlp = MLP(out_dim=class_num, hidden_dim=n_unit)  # type: Optional[MLP]
    if conv_kwargs is None:
        conv_kwargs = {}

    if method == 'nfp':
        print('Set up NFP predictor...')
        conv = NFP(out_dim=n_unit,
                   hidden_channels=n_unit,
                   n_update_layers=conv_layers,
                   **conv_kwargs)
    elif method == 'ggnn':
        print('Set up GGNN predictor...')
        conv = GGNN(out_dim=n_unit,
                    hidden_channels=n_unit,
                    n_update_layers=conv_layers,
                    **conv_kwargs)
    elif method == 'schnet':
        print('Set up SchNet predictor...')
        conv = SchNet(out_dim=class_num,
                      hidden_channels=n_unit,
                      n_update_layers=conv_layers,
                      **conv_kwargs)
        mlp = None
    elif method == 'weavenet':
        print('Set up WeaveNet predictor...')
        conv = WeaveNet(hidden_dim=n_unit, **conv_kwargs)
    elif method == 'rsgcn':
        print('Set up RSGCN predictor...')
        conv = RSGCN(out_dim=n_unit,
                     hidden_channels=n_unit,
                     n_update_layers=conv_layers,
                     **conv_kwargs)
    elif method == 'relgcn':
        print('Set up Relational GCN predictor...')
        num_edge_type = 4
        conv = RelGCN(out_dim=n_unit,
                      n_edge_types=num_edge_type,
                      scale_adj=True,
                      **conv_kwargs)
    elif method == 'relgat':
        print('Set up Relational GAT predictor...')
        conv = RelGAT(out_dim=n_unit,
                      hidden_channels=n_unit,
                      n_update_layers=conv_layers,
                      **conv_kwargs)
    elif method == 'gin':
        print('Set up GIN predictor...')
        conv = GIN(out_dim=n_unit,
                   hidden_channels=n_unit,
                   n_update_layers=conv_layers,
                   **conv_kwargs)
    elif method == 'nfp_gwm':
        print('Set up NFP_GWM predictor...')
        conv = NFP_GWM(out_dim=n_unit,
                       hidden_channels=n_unit,
                       n_update_layers=conv_layers,
                       **conv_kwargs)
    elif method == 'ggnn_gwm':
        print('Set up GGNN_GWM predictor...')
        conv = GGNN_GWM(out_dim=n_unit,
                        hidden_channels=n_unit,
                        n_update_layers=conv_layers,
                        **conv_kwargs)
    elif method == 'rsgcn_gwm':
        print('Set up RSGCN_GWM predictor...')
        conv = RSGCN_GWM(out_dim=n_unit,
                         hidden_channels=n_unit,
                         n_update_layers=conv_layers,
                         **conv_kwargs)
    elif method == 'gin_gwm':
        print('Set up GIN_GWM predictor...')
        conv = GIN_GWM(out_dim=n_unit,
                       hidden_channels=n_unit,
                       n_update_layers=conv_layers,
                       **conv_kwargs)
    else:
        raise ValueError('[ERROR] Invalid method: {}'.format(method))

    predictor = GraphConvPredictor(conv, mlp, label_scaler, postprocess_fn)
    return predictor
예제 #6
0
def main():
    args = parse_arguments()

    # Set up some useful variables that will be used later on.
    dataset_name = args.dataset
    method = args.method
    num_data = args.num_data
    n_unit = args.unit_num
    conv_layers = args.conv_layers

    if method == 'node2vec':
        fname = args.modelpath.split('/')[-1].rsplit('.', 1)[0]
        r, p, q = fname.split('-')[-3:]
        r, p, q = int(r[1:]), float(p[1:]), float(q[1:])
        print(args.modelpath)
        print(f"r={r}, p={p}, q={q}")

    task_type = molnet_default_config[dataset_name]['task_type']
    model_filename = {
        'classification': 'classifier.pkl',
        'regression': 'regressor.pkl'
    }

    print('Using dataset: {}...'.format(dataset_name))

    # Set up some useful variables that will be used later on.
    if args.label:
        labels = args.label
        if method == 'node2vec':
            cache_dir = os.path.join(
                args.datadir,
                '{}_{}_r{}_p{}_q{}_{}'.format(dataset_name, method, r, p, q,
                                              labels))
        else:
            cache_dir = os.path.join(
                args.datadir, '{}_{}_{}'.format(dataset_name, method, labels))
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        labels = None
        if method == 'node2vec':
            cache_dir = os.path.join(
                args.datadir,
                '{}_{}_r{}_p{}_q{}_all'.format(dataset_name, method, r, p, q))
        else:
            cache_dir = os.path.join(args.datadir,
                                     '{}_{}_all'.format(dataset_name, method))
        class_num = len(molnet_default_config[args.dataset]['tasks'])

    # Load the train and validation parts of the dataset.
    filenames = [
        dataset_part_filename(p, num_data) for p in ['train', 'valid']
    ]

    paths = [os.path.join(cache_dir, f) for f in filenames]
    if all([os.path.exists(path) for path in paths]):
        dataset_parts = []
        for path in paths:
            print('Loading cached dataset from {}.'.format(path))
            dataset_parts.append(NumpyTupleDataset.load(path))
    else:
        dataset_parts = download_entire_dataset(dataset_name,
                                                num_data,
                                                labels,
                                                method,
                                                cache_dir,
                                                modelpath=args.modelpath)

    # Scale the label values, if necessary.
    scaler = None
    if args.scale == 'standardize':
        if task_type == 'regression':
            print('Applying standard scaling to the labels.')
            scaler, dataset_parts = fit_scaler(dataset_parts)
        else:
            print('Label scaling is not available for classification tasks.')
    else:
        print('No label scaling was selected.')
    train, valid = dataset_parts[0], dataset_parts[1]

    # Set up the predictor.
    if method == 'node2vec':
        predictor = MLP(class_num, n_unit)
    else:
        predictor = set_up_predictor(method,
                                     n_unit,
                                     conv_layers,
                                     class_num,
                                     label_scaler=scaler)

    # Set up the iterators.
    train_iter = iterators.SerialIterator(train, args.batchsize)
    valid_iter = iterators.SerialIterator(valid,
                                          args.batchsize,
                                          repeat=False,
                                          shuffle=False)

    # Load metrics for the current dataset.
    metrics = molnet_default_config[dataset_name]['metrics']
    metrics_fun = {
        k: v
        for k, v in metrics.items() if isinstance(v, types.FunctionType)
    }
    loss_fun = molnet_default_config[dataset_name]['loss']

    device = chainer.get_device(args.device)
    if task_type == 'regression':
        model = Regressor(predictor,
                          lossfun=loss_fun,
                          metrics_fun=metrics_fun,
                          device=device)
    elif task_type == 'classification':
        model = Classifier(predictor,
                           lossfun=loss_fun,
                           metrics_fun=metrics_fun,
                           device=device)
    else:
        raise ValueError('Invalid task type ({}) encountered when processing '
                         'dataset ({}).'.format(task_type, dataset_name))

    # Set up the optimizer.
    optimizer = optimizers.Adam(0.0005)
    optimizer.setup(model)

    # Save model-related output to this directory.
    if not os.path.exists(args.out):
        os.makedirs(args.out)
    save_json(os.path.join(args.out, 'args.json'), vars(args))
    model_dir = os.path.join(args.out, os.path.basename(cache_dir))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    # save scaler
    if args.scale == 'standardize' and task_type == 'regression':
        pkl.dump(scaler,
                 open(os.path.join(cache_dir, 'standatdize_scaler.pkl'), 'wb'))
    # Set up the updater.
    if method == 'node2vec':
        converter = converter_method_dict['nfp']  # concat_mols
    else:
        converter = converter_method_dict[method]
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=device,
                                       converter=converter)

    # Set up the trainer.
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir)
    trainer.extend(
        E.Evaluator(valid_iter, model, device=device, converter=converter))
    trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch'))
    trainer.extend(E.LogReport())

    # TODO: consider go/no-go of the following block
    # (i) more reporting for val/evalutaion
    # (ii) best validation score snapshot
    if task_type == 'regression':
        metric_name_list = list(metrics.keys())
        if 'RMSE' in metric_name_list:
            trainer.extend(E.snapshot_object(
                model, "best_val_" + model_filename[task_type]),
                           trigger=training.triggers.MinValueTrigger(
                               'validation/main/RMSE'))
        elif 'MAE' in metric_name_list:
            trainer.extend(E.snapshot_object(
                model, "best_val_" + model_filename[task_type]),
                           trigger=training.triggers.MinValueTrigger(
                               'validation/main/MAE'))
        else:
            print("[WARNING] No validation metric defined?")

    elif task_type == 'classification':
        train_eval_iter = iterators.SerialIterator(train,
                                                   args.batchsize,
                                                   repeat=False,
                                                   shuffle=False)
        if dataset_name in ['muv', 'pcba']:
            trainer.extend(
                PRCAUCEvaluator(train_eval_iter,
                                predictor,
                                eval_func=predictor,
                                device=device,
                                converter=converter,
                                name='train',
                                pos_labels=1,
                                ignore_labels=-1,
                                raise_value_error=False))
            # extension name='validation' is already used by `Evaluator`,
            # instead extension name `val` is used.
            trainer.extend(
                PRCAUCEvaluator(valid_iter,
                                predictor,
                                eval_func=predictor,
                                device=device,
                                converter=converter,
                                name='val',
                                pos_labels=1,
                                ignore_labels=-1,
                                raise_value_error=False))

            trainer.extend(
                E.snapshot_object(model,
                                  "best_val_" + model_filename[task_type]),
                trigger=training.triggers.MaxValueTrigger('val/main/prc_auc'))
        else:
            trainer.extend(
                ROCAUCEvaluator(train_eval_iter,
                                predictor,
                                eval_func=predictor,
                                device=device,
                                converter=converter,
                                name='train',
                                pos_labels=1,
                                ignore_labels=-1,
                                raise_value_error=False))
            # extension name='validation' is already used by `Evaluator`,
            # instead extension name `val` is used.
            trainer.extend(
                ROCAUCEvaluator(valid_iter,
                                predictor,
                                eval_func=predictor,
                                device=device,
                                converter=converter,
                                name='val',
                                pos_labels=1,
                                ignore_labels=-1,
                                raise_value_error=False))

            trainer.extend(
                E.snapshot_object(model,
                                  "best_val_" + model_filename[task_type]),
                trigger=training.triggers.MaxValueTrigger('val/main/roc_auc'))

    else:
        raise NotImplementedError(
            'Not implemented task_type = {}'.format(task_type))

    trainer.extend(AutoPrintReport())
    trainer.extend(E.ProgressBar())
    trainer.run()

    # Save the model's parameters.
    model_path = os.path.join(model_dir, model_filename[task_type])
    print('Saving the trained model to {}...'.format(model_path))
    model.save_pickle(model_path, protocol=args.protocol)
예제 #7
0
def test_mlp_assert_raises():
    with pytest.raises(ValueError):
        MLP(out_dim=out_dim, n_layers=-1)
예제 #8
0
def model():
    return MLP(out_dim=out_dim)