Example #1
0
def approach_2(args):
    if args.model_name == './pretrained_models/sum_sum_binary.model':
        # change the default model for regression
        args.model_name = "./pretrained_models/sum_sum_regression.model"

    device = chainer.get_device(args.device)

    # load the trained model, model attributes and graph dataset
    with open('{}_stat'.format(args.model_name), mode='rb') as f:
        model_args = pickle.load(f)

    dataset = util.GraphData("mixed", model_args.feat_init_method,
                             "Regression", args.data_num, device)
    # convert tuple dataset to graphs and targets
    graphs, targets = dataset.converter(dataset, device)

    model = GNN(model_args.num_layers, model_args.num_mlp_layers,
                dataset.graphs[0].node_features.shape[1],
                model_args.hidden_dim,
                dataset.graphs[0].node_features.shape[1],
                model_args.final_dropout, model_args.graph_pooling_type,
                model_args.neighbor_pooling_type, model_args.task_type)
    chainer.serializers.load_npz(args.model_name, model)

    model.to_device(device)
    device.use()

    print('\n--- Prediction by approach2 ---')

    # predict treewidth
    eval_st = time.time()
    with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
        raw_output = model(graphs)
        prediction = np.round(raw_output.array).reshape(len(raw_output))
    eval_time = time.time() - eval_st

    # calculate some stats
    print('# of graphs\t{0}'.format(str(len(graphs)).rjust(5)))
    print('Execution time\t{0}'.format(eval_time))
    print('Execution time per each graphs\t{0}'.format(eval_time /
                                                       len(graphs)))
    print('Mean Absolute Error\t{0}'.format(
        sklearn.metrics.mean_absolute_error(targets, prediction)))
    print('Max Error\t{0}'.format(
        sklearn.metrics.max_error(targets, prediction)))

    # output the scatter plot
    sns.set_style("whitegrid", {'grid.linestyle': '--'})

    df = pd.DataFrame({"Real Value": targets, "Predict Value": prediction})
    g = sns.jointplot(df["Real Value"], df["Predict Value"])
    g.ax_joint.plot([49, 1], [49, 1], ':k')
    # Please make this directory before run this code...
    plt.savefig('./{0}/Approach2/scatter.png'.format(args.out))
    def objective(trial):
        num_layers = trial.suggest_int('num_layers', 1, 10)
        num_mlp_layers = trial.suggest_int('num_mlp_layers', 1, 10)
        hidden_dim = trial.suggest_int('hidden_dim', 16, 128)
        final_dropout = trial.suggest_uniform('final_dropout', 0, 0.5)
        graph_pooling_type = trial.suggest_categorical('graph_pooling_type', ['max', 'average', 'sum'])
        neighbor_pooling_type = trial.suggest_categorical('neighbor_pooling_type', ['max', 'average', 'sum'])
        batchsize = trial.suggest_int('batchsize', 16, 128)

        device = chainer.get_device(0)
        # Classification
        model = GNN(num_layers, num_mlp_layers, dataset.graphs[0].node_features.shape[1],
                    hidden_dim, dataset.graphs[0].node_features.shape[1], final_dropout,
                    graph_pooling_type, neighbor_pooling_type, "Regression")

        # choose the using device
        model.to_device(device)
        device.use()

        # Setup an optimizer
        optimizer = chainer.optimizers.Adam()
        optimizer.setup(model)

        # split the dataset into traindata and testdata
        train, test = chainer.datasets.split_dataset_random(dataset, int(dataset.__len__() * 0.9))
        train_iter = chainer.iterators.SerialIterator(train, batchsize)
        test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False)

        # Set up a trainer
        updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device, converter=dataset.converter)
        trainer = training.Trainer(updater, (300, 'epoch'), out= "result/hypara/regression")

        # Evaluate the model with the test dataset for each epoch
        trainer.extend(extensions.Evaluator(test_iter, model, device=device, converter=dataset.converter))

        trainer.extend(extensions.LogReport(filename='log_{}.dat'.format(trial.number)))
        trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss_{}.png'.format(trial.number)))

        # Write a log of evaluation statistics for each epoch
        trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy_{}.png'.format(trial.number)))

        # Run the training
        trainer.run()

        # save the model ?
        # chainer.serializers.save_npz('./result/hypara/regression/{0}.model'.format(trial.number), model)

        # return the AUC
        graphs, target = dataset.converter(test, device)
        with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
            y_pred = model(graphs)
        y_pred.to_cpu()
        y_pred = y_pred.array
        target = chainer.cuda.to_cpu(target)

        try:
            value = sklearn.metrics.mean_squared_error(target, y_pred)
        except ValueError:
            value = 10e9

        return value