예제 #1
0
def main():
    args = parse_arguments()

    # Set up some useful variables that will be used later on.
    dataset_name = args.dataset
    method = args.method
    num_data = args.num_data

    if args.label:
        labels = args.label
        cache_dir = os.path.join(
            'input', '{}_{}_{}'.format(dataset_name, method, labels))
    else:
        labels = None
        cache_dir = os.path.join('input',
                                 '{}_{}_all'.format(dataset_name, method))

    # Load the cached dataset.
    filename = dataset_part_filename('test', num_data)
    path = os.path.join(cache_dir, filename)
    if os.path.exists(path):
        print('Loading cached dataset from {}.'.format(path))
        test = NumpyTupleDataset.load(path)
    else:
        _, _, test = download_entire_dataset(dataset_name, num_data, labels,
                                             method, cache_dir)

    # Model-related data is stored this directory.
    model_dir = os.path.join(args.in_dir, os.path.basename(cache_dir))

    model_filename = {
        'classification': 'classifier.pkl',
        'regression': 'regressor.pkl'
    }
    task_type = molnet_default_config[dataset_name]['task_type']
    model_path = os.path.join(model_dir, model_filename[task_type])
    print("model_path=" + model_path)
    print('Loading model weights from {}...'.format(model_path))

    if task_type == 'classification':
        model = Classifier.load_pickle(model_path, device=args.gpu)
    elif task_type == 'regression':
        model = Regressor.load_pickle(model_path, device=args.gpu)
    else:
        raise ValueError('Invalid task type ({}) encountered when processing '
                         'dataset ({}).'.format(task_type, dataset_name))

    # Re-load the best-validation score snapshot
    # serializers.load_npz(os.path.join(
    #     model_dir, "best_val_" + model_filename[task_type]), model)

    # Run an evaluator on the test dataset.
    print('Evaluating...')
    test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False)
    eval_result = Evaluator(test_iterator,
                            model,
                            converter=concat_mols,
                            device=args.gpu)()
    print('Evaluation result: ', eval_result)

    # Add more stats
    if task_type == 'regression':
        # loss = cuda.to_cpu(numpy.array(eval_result['main/loss']))
        # eval_result['main/loss'] = loss

        # convert to native values..
        for k, v in eval_result.items():
            eval_result[k] = float(v)

    elif task_type == "classification":
        # For Classifier, we do not equip the model with ROC-AUC evalation function
        # use a seperate ROC-AUC Evaluator here
        rocauc_result = ROCAUCEvaluator(test_iterator,
                                        model,
                                        converter=concat_mols,
                                        device=args.gpu,
                                        eval_func=model.predictor,
                                        name='test',
                                        ignore_labels=-1)()
        print('ROCAUC Evaluation result: ', rocauc_result)
        save_json(os.path.join(model_dir, 'rocauc_result.json'), rocauc_result)
    else:
        print('[WARNING] unknown task_type {}.'.format(task_type))

    # Save the evaluation results.
    save_json(os.path.join(model_dir, 'eval_result.json'), eval_result)
예제 #2
0
def main():
    args = parse_arguments()

    # Set up some useful variables that will be used later on.
    dataset_name = args.dataset
    method = args.method
    num_data = args.num_data

    if args.label:
        labels = args.label
        cache_dir = os.path.join(
            'input', '{}_{}_{}'.format(dataset_name, method, labels))
    else:
        labels = None
        cache_dir = os.path.join('input',
                                 '{}_{}_all'.format(dataset_name, method))

    # Load the cached dataset.
    filename = dataset_part_filename('test', num_data)
    path = os.path.join(cache_dir, filename)
    if os.path.exists(path):
        print('Loading cached dataset from {}.'.format(path))
        test = NumpyTupleDataset.load(path)
    else:
        _, _, test = download_entire_dataset(dataset_name, num_data, labels,
                                             method, cache_dir)


#    # Load the standard scaler parameters, if necessary.
#    if args.scale == 'standardize':
#        scaler_path = os.path.join(args.in_dir, 'scaler.pkl')
#        print('Loading scaler parameters from {}.'.format(scaler_path))
#        with open(scaler_path, mode='rb') as f:
#            scaler = pickle.load(f)
#    else:
#        print('No standard scaling was selected.')
#        scaler = None

# Model-related data is stored this directory.
    model_dir = os.path.join(args.in_dir, os.path.basename(cache_dir))

    model_filename = {
        'classification': 'classifier.pkl',
        'regression': 'regressor.pkl'
    }
    task_type = molnet_default_config[dataset_name]['task_type']
    model_path = os.path.join(model_dir, model_filename[task_type])
    print("model_path=" + model_path)
    print('Loading model weights from {}...'.format(model_path))

    if task_type == 'classification':
        model = Classifier.load_pickle(model_path, device=args.gpu)
    elif task_type == 'regression':
        model = Regressor.load_pickle(model_path, device=args.gpu)
    else:
        raise ValueError('Invalid task type ({}) encountered when processing '
                         'dataset ({}).'.format(task_type, dataset_name))

    # Proposed by Ishiguro
    # ToDo: consider go/no-go with following modification
    # Re-load the best-validation score snapshot
    serializers.load_npz(
        os.path.join(model_dir, "best_val_" + model_filename[task_type]),
        model)

    #    # Replace the default predictor with one that scales the output labels.
    #    scaled_predictor = ScaledGraphConvPredictor(model.predictor)
    #    scaled_predictor.scaler = scaler
    #    model.predictor = scaled_predictor

    # Run an evaluator on the test dataset.
    print('Evaluating...')
    test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False)
    eval_result = Evaluator(test_iterator,
                            model,
                            converter=concat_mols,
                            device=args.gpu)()
    print('Evaluation result: ', eval_result)

    # Proposed by Ishiguro: add more stats
    # ToDo: considre go/no-go with the following modification

    if task_type == 'regression':
        # loss = cuda.to_cpu(numpy.array(eval_result['main/loss']))
        # eval_result['main/loss'] = loss

        # convert to native values..
        for k, v in eval_result.items():
            eval_result[k] = float(v)

        save_json(os.path.join(args.in_dir, 'eval_result.json'), eval_result)
    elif task_type == "classification":
        # For Classifier, we do not equip the model with ROC-AUC evalation function
        # use a seperate ROC-AUC Evaluator here
        rocauc_result = ROCAUCEvaluator(test_iterator,
                                        model,
                                        converter=concat_mols,
                                        device=args.gpu,
                                        eval_func=model.predictor,
                                        name='test',
                                        ignore_labels=-1)()
        print('ROCAUC Evaluation result: ', rocauc_result)
        save_json(os.path.join(args.in_dir, 'eval_result.json'), rocauc_result)
    else:
        pass

    # Save the evaluation results.
    save_json(os.path.join(model_dir, 'eval_result.json'), eval_result)