def check_invalid_key(self, gpu, label_key): link = Classifier(links.Linear(10, 3), label_key=label_key) if gpu: link.to_gpu() x = chainer.Variable(link.xp.asarray(self.x)) with pytest.raises(ValueError): link(x)
def check_predict_proba(self, device): clf = Classifier(self.predictor, device=device) actual_y = clf.predict_proba(self.x) assert actual_y.shape == (3, 2) assert actual_y.dtype == numpy.float32 assert numpy.alltrue(0 <= actual_y) assert numpy.alltrue(actual_y <= 1.) actual_t = numpy.argmax(actual_y, axis=1) assert numpy.alltrue(actual_t == self.t)
def check_call(self, gpu, label_key, args, kwargs, model_args, model_kwargs, metrics_fun, compute_metrics): init_kwargs = {'label_key': label_key} if metrics_fun is not None: init_kwargs['metrics_fun'] = metrics_fun link = Classifier(chainer.Link(), **init_kwargs) if gpu: xp = cuda.cupy link.to_gpu() else: xp = numpy link.compute_metrics = compute_metrics y = chainer.Variable(self.y) link.predictor = mock.MagicMock(return_value=y) loss = link(*args, **kwargs) link.predictor.assert_called_with(*model_args, **model_kwargs) assert hasattr(link, 'y') assert link.y is not None assert hasattr(link, 'loss') xp.testing.assert_allclose(link.loss.data, loss.data) assert hasattr(link, 'metrics') if compute_metrics: assert link.metrics is not None else: assert link.metrics is None
def test_report_key(self, metrics_fun, compute_metrics): repo = chainer.Reporter() link = Classifier(predictor=DummyPredictor(), metrics_fun=metrics_fun) link.compute_metrics = compute_metrics repo.add_observer('target', link) with repo: observation = {} with reporter.report_scope(observation): link(self.x, self.t) # print('observation ', observation) actual_keys = set(observation.keys()) if compute_metrics: if metrics_fun is None: assert set(['target/loss']) == actual_keys elif isinstance(metrics_fun, dict): assert set(['target/loss', 'target/user_key']) == actual_keys elif callable(metrics_fun): assert set(['target/loss', 'target/accuracy']) == actual_keys else: raise TypeError() else: assert set(['target/loss']) == actual_keys
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data n_unit = args.unit_num conv_layers = args.conv_layers task_type = molnet_default_config[dataset_name]['task_type'] model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } print('Using dataset: {}...'.format(dataset_name)) # Set up some useful variables that will be used later on. if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) class_num = len(molnet_default_config[args.dataset]['tasks']) # Load the train and validation parts of the dataset. filenames = [ dataset_part_filename(p, num_data) for p in ['train', 'valid'] ] paths = [os.path.join(cache_dir, f) for f in filenames] if all([os.path.exists(path) for path in paths]): dataset_parts = [] for path in paths: print('Loading cached dataset from {}.'.format(path)) dataset_parts.append(NumpyTupleDataset.load(path)) else: dataset_parts = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) train, valid = dataset_parts[0], dataset_parts[1] # # Scale the label values, if necessary. # if args.scale == 'standardize': # if task_type == 'regression': # print('Applying standard scaling to the labels.') # datasets, scaler = standardize_dataset_labels(datasets) # else: # print('Label scaling is not available for classification tasks.') # else: # print('No label scaling was selected.') # scaler = None # Set up the predictor. predictor = set_up_predictor(method, n_unit, conv_layers, class_num) # Set up the iterators. train_iter = iterators.SerialIterator(train, args.batchsize) valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Load metrics for the current dataset. metrics = molnet_default_config[dataset_name]['metrics'] metrics_fun = { k: v for k, v in metrics.items() if isinstance(v, types.FunctionType) } loss_fun = molnet_default_config[dataset_name]['loss'] if task_type == 'regression': model = Regressor(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) # TODO: Use standard scaler for regression task elif task_type == 'classification': model = Classifier(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Set up the optimizer. optimizer = optimizers.Adam() optimizer.setup(model) # Save model-related output to this directory. model_dir = os.path.join(args.out, os.path.basename(cache_dir)) if not os.path.exists(model_dir): os.makedirs(model_dir) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir) trainer.extend( E.Evaluator(valid_iter, model, device=args.gpu, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) # Report various metrics. print_report_targets = ['epoch', 'main/loss', 'validation/main/loss'] for metric_name, metric_fun in metrics.items(): if isinstance(metric_fun, types.FunctionType): print_report_targets.append('main/' + metric_name) print_report_targets.append('validation/main/' + metric_name) elif issubclass(metric_fun, BatchEvaluator): trainer.extend( metric_fun(valid_iter, model, device=args.gpu, eval_func=predictor, converter=concat_mols, name='val', raise_value_error=False)) print_report_targets.append('val/main/' + metric_name) else: raise TypeError('{} is not a supported metrics function.'.format( type(metrics_fun))) print_report_targets.append('elapsed_time') trainer.extend(E.PrintReport(print_report_targets)) trainer.extend(E.ProgressBar()) trainer.run() # Save the model's parameters. model_path = os.path.join(model_dir, model_filename[task_type]) print('Saving the trained model to {}...'.format(model_path)) model.save_pickle(model_path, protocol=args.protocol)
def main(): method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn'] dataset_names = list(molnet_default_config.keys()) parser = argparse.ArgumentParser(description='molnet example') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp') parser.add_argument('--label', '-l', type=str, default='', help='target label for regression, empty string means ' 'to predict all property at once') parser.add_argument('--conv-layers', '-c', type=int, default=4) parser.add_argument('--batchsize', '-b', type=int, default=32) parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--out', '-o', type=str, default='result') parser.add_argument('--epoch', '-e', type=int, default=20) parser.add_argument('--unit-num', '-u', type=int, default=16) parser.add_argument('--dataset', '-d', type=str, choices=dataset_names, default='bbbp') parser.add_argument('--protocol', type=int, default=2) parser.add_argument('--model-filename', type=str, default='regressor.pkl') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() dataset_name = args.dataset method = args.method num_data = args.num_data n_unit = args.unit_num conv_layers = args.conv_layers print('Use {} dataset'.format(dataset_name)) if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) class_num = len(molnet_default_config[args.dataset]['tasks']) # Dataset preparation def get_dataset_paths(cache_dir, num_data): filepaths = [] for filetype in ['train', 'valid', 'test']: filename = filetype + '_data' if num_data >= 0: filename += '_' + str(num_data) filename += '.npz' filepath = os.path.join(cache_dir, filename) filepaths.append(filepath) return filepaths filepaths = get_dataset_paths(cache_dir, num_data) if all([os.path.exists(fpath) for fpath in filepaths]): datasets = [] for fpath in filepaths: print('load from cache {}'.format(fpath)) datasets.append(NumpyTupleDataset.load(fpath)) else: print('preprocessing dataset...') preprocessor = preprocess_method_dict[method]() # only use first 100 for debug if num_data >= 0 target_index = numpy.arange(num_data) if num_data >= 0 else None datasets = D.molnet.get_molnet_dataset(dataset_name, preprocessor, labels=labels, target_index=target_index) if not os.path.exists(cache_dir): os.makedirs(cache_dir) datasets = datasets['dataset'] for i, fpath in enumerate(filepaths): NumpyTupleDataset.save(fpath, datasets[i]) train, val, _ = datasets # Network if method == 'nfp': print('Train NFP model...') predictor = GraphConvPredictor( NFP(out_dim=n_unit, hidden_dim=n_unit, n_layers=conv_layers), MLP(out_dim=class_num, hidden_dim=n_unit)) elif method == 'ggnn': print('Train GGNN model...') predictor = GraphConvPredictor( GGNN(out_dim=n_unit, hidden_dim=n_unit, n_layers=conv_layers), MLP(out_dim=class_num, hidden_dim=n_unit)) elif method == 'schnet': print('Train SchNet model...') predictor = GraphConvPredictor( SchNet(out_dim=class_num, hidden_dim=n_unit, n_layers=conv_layers), None) elif method == 'weavenet': print('Train WeaveNet model...') n_atom = 20 n_sub_layer = 1 weave_channels = [50] * conv_layers predictor = GraphConvPredictor( WeaveNet(weave_channels=weave_channels, hidden_dim=n_unit, n_sub_layer=n_sub_layer, n_atom=n_atom), MLP(out_dim=class_num, hidden_dim=n_unit)) elif method == 'rsgcn': print('Train RSGCN model...') predictor = GraphConvPredictor( RSGCN(out_dim=n_unit, hidden_dim=n_unit, n_layers=conv_layers), MLP(out_dim=class_num, hidden_dim=n_unit)) else: raise ValueError('[ERROR] Invalid method {}'.format(method)) train_iter = iterators.SerialIterator(train, args.batchsize) val_iter = iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) metrics_fun = molnet_default_config[dataset_name]['metrics'] loss_fun = molnet_default_config[dataset_name]['loss'] task_type = molnet_default_config[dataset_name]['task_type'] if task_type == 'regression': model = Regressor(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) # TODO(nakago): Use standard scaler for regression task elif task_type == 'classification': model = Classifier(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=args.gpu) else: raise NotImplementedError( 'Not implemented task_type = {}'.format(task_type)) optimizer = optimizers.Adam() optimizer.setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, model, device=args.gpu, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) print_report_targets = ['epoch', 'main/loss', 'validation/main/loss'] if metrics_fun is not None and type(metrics_fun) == dict: for m_k in metrics_fun.keys(): print_report_targets.append('main/' + m_k) print_report_targets.append('validation/main/' + m_k) if task_type == 'classification': # Evaluation for train data takes time, skip for now. # trainer.extend(ROCAUCEvaluator( # train_iter, model, device=args.gpu, eval_func=predictor, # converter=concat_mols, name='train', raise_value_error=False)) # print_report_targets.append('train/main/roc_auc') trainer.extend( ROCAUCEvaluator(val_iter, model, device=args.gpu, eval_func=predictor, converter=concat_mols, name='val', raise_value_error=False)) print_report_targets.append('val/main/roc_auc') print_report_targets.append('elapsed_time') trainer.extend(E.PrintReport(print_report_targets)) trainer.extend(E.ProgressBar()) trainer.run() # --- save model --- protocol = args.protocol model.save_pickle(os.path.join(args.out, args.model_filename), protocol=protocol)
def test_predict_gpu(self): clf = Classifier(self.predictor, device=0) actual_t = clf.predict(self.x) assert numpy.alltrue(actual_t == self.t)
def main(): parser = argparse.ArgumentParser( description='Predict with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='Path to the result directory of the training ' 'script.') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] labels = config['labels'] _, test, _ = data.load_dataset(method, labels, num_data=args.num_data) y_test = test.get_datasets()[-1] # Load pretrained model clf = Classifier.load_pickle( os.path.join(args.in_dir, args.model_filename), device=args.gpu) # type: Classifier # ---- predict --- print('Predicting...') # We need to feed only input features `x` to `predict`/`predict_proba`. # This converter extracts only inputs (x1, x2, ...) from the features which # consist of input `x` and label `t` (x1, x2, ..., t). def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] def postprocess_pred(x): x_array = cuda.to_cpu(x.data) return numpy.where(x_array > 0, 1, 0) y_pred = clf.predict(test, converter=extract_inputs, postprocess_fn=postprocess_pred) y_proba = clf.predict_proba(test, converter=extract_inputs, postprocess_fn=F.sigmoid) # `predict` method returns the prediction label (0: non-toxic, 1:toxic) print('y_pread.shape = {}, y_pred[:5, 0] = {}' .format(y_pred.shape, y_pred[:5, 0])) # `predict_proba` method returns the probability to be toxic print('y_proba.shape = {}, y_proba[:5, 0] = {}' .format(y_proba.shape, y_proba[:5, 0])) # --- predict end --- if y_pred.ndim == 1: y_pred = y_pred[:, None] if y_pred.shape != y_test.shape: raise RuntimeError('The shape of the prediction result array and ' 'that of the ground truth array do not match. ' 'Contents of the input directory may be corrupted ' 'or modified.') statistics = [] for t, p in six.moves.zip(y_test.T, y_pred.T): idx = t != -1 n_correct = (t[idx] == p[idx]).sum() n_total = len(t[idx]) accuracy = float(n_correct) / n_total statistics.append([n_correct, n_total, accuracy]) print('{:>6} {:>8} {:>8} {:>8}' .format('TaskID', 'Correct', 'Total', 'Accuracy')) for idx, (n_correct, n_total, accuracy) in enumerate(statistics): print('task{:>2} {:>8} {:>8} {:>8.4f}' .format(idx, n_correct, n_total, accuracy)) prediction_result_file = 'prediction.npz' print('Save prediction result to {}'.format(prediction_result_file)) numpy.savez_compressed(prediction_result_file, y_pred) # --- evaluate --- # To calc loss/accuracy, we can use `Evaluator`, `ROCAUCEvaluator` print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator( test_iterator, clf, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) rocauc_result = ROCAUCEvaluator( test_iterator, clf, converter=concat_mols, device=args.gpu, eval_func=clf.predictor, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result) with open(os.path.join(args.in_dir, 'eval_result.json'), 'w') as f: json.dump(rocauc_result, f)
def main(): args = parse_arguments() generate_drug_list = True if args.generate_drug_list == 'True' else False if args.label: labels = args.label # class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess test dataset...') preprocessor = preprocess_method_dict['ggnn']() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) test_dict = parser.parse(args.test_datafile, return_smiles_pair_original=True) test = test_dict['dataset'] # test_smiles_pairs = test_dict['smiles_pair_original'] from chainer.iterators import SerialIterator test_iter = SerialIterator(test, 64, repeat=False, shuffle=False) out = 'output' + '/' + args.out model_path = os.path.join(out, args.model_filename) # `load_pickle` is static method, call from Class to get an instance print('model_path: {}'.format(model_path)) from chainer_chemistry.models.prediction import Classifier model = Classifier.load_pickle(model_path, args.gpu) if args.gpu >= 0: model.to_gpu(args.gpu) else: model.to_cpu() snapshot_path = os.path.join(out, args.snapshot) try: chainer.serializers.load_npz(snapshot_path, model) except KeyError as e: print(e) def eval_func(atoms_1, adj_1, atoms_2, adj_2): sample = [ (atoms_1, adj_1), (atoms_2, adj_2), ] sample = concat_mols(sample) atoms_1, adj_1 = sample[0] atoms_2, adj_2 = sample[1] print(atoms_1, adj_1) print('shape 1:', atoms_1.shape, adj_1.shape) print('shape 2:', atoms_2.shape, adj_2.shape) pred, _ = model.predictor.predict(atoms_1, adj_1, atoms_2, adj_2) return pred evaluator = MyEvaluator( test_iter, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor.predict, # eval_func=eval_func, # mediate_func=models.predictor.mediate_output, name='test', ignore_labels=-1) e1_total, e2_total = evaluator.generate_representations() y_total, t_total = evaluator.generate_y_and_t() # print('test_datafile: {}'.format(args.test_datafile)) test_filename = os.path.basename(args.test_datafile).split('.')[0] # print('test_filename: {}'.format(test_filename)) dst_repre_filename = test_filename + '_e' + '.csv' dst_repre_filepath = os.path.join(out, dst_repre_filename) add_representations(args.test_datafile, dst_repre_filepath, e1_total, e2_total, generate_drug_list=generate_drug_list) dst_filename = test_filename + '_e_y' + '.csv' dst_filepath = os.path.join(out, dst_filename) add_reprensentations_and_y(args.test_datafile, dst_filepath, e1_total, e2_total, y_total) perf_dict = dict() for metric in [ 'roc_auc', 'prc_auc', 'accuracy', 'precision', 'recall', 'f1' ]: result = evaluator.compuate(metric=metric) perf_dict[metric] = result print('{}: {}'.format(metric, result)) with open(os.path.join(ROOT_PATH, 'eval_result.json'), 'w') as f: json.dump(perf_dict, f)
def main(): parser = argparse.ArgumentParser( description='Imbalanced MNIST classification') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--batchsize', '-b', type=int, default=100, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--updater-type', type=str, default='standard') parser.add_argument('--sampling-size', type=int, default=32) parser.add_argument('--optimizer-type', type=str, default='Adam') parser.add_argument('--alpha', type=str, default='0.001') args = parser.parse_args() # Dataset preparation train, train_val, val = get_binary_imbalanced_data() train_iter = iterators.SerialIterator(train, args.batchsize) val_iter = iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) model = LeNet(n_class=1, binary=True) classifier = Classifier(model, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) if args.optimizer_type == 'Adam': optimizer = optimizers.Adam() else: optimizer = optimizers.SGD(lr=1e-3) optimizer.setup(classifier) updater_type = args.updater_type if updater_type == 'standard': updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) elif updater_type == 'proposed': updater = Proposed(train_iter, optimizer, device=args.gpu, sampling_size=args.sampling_size) elif updater_type == 'LRE': x, t = chainer.dataset.concat_examples(train) train_val_iter = iterators.SerialIterator(train_val, len(train_val)) updater = LRE({ 'main': train_iter, 'val': train_val_iter }, optimizer, device=args.gpu, alpha=args.alpha) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) elif eval_mode == 1: train_eval_iter = iterators.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=model, device=args.gpu, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(val_iter, classifier, eval_func=model, device=args.gpu, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time' ])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): parser = argparse.ArgumentParser( description='Predict with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='Path to the result directory of the training ' 'script.') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] labels = config['labels'] _, test, _ = data.load_dataset(method, labels, num_data=args.num_data) y_test = test.get_datasets()[-1] # Load pretrained model clf = Classifier.load_pickle(os.path.join(args.in_dir, args.model_filename), device=args.gpu) # type: Classifier # ---- predict --- print('Predicting...') # We need to feed only input features `x` to `predict`/`predict_proba`. # This converter extracts only inputs (x1, x2, ...) from the features which # consist of input `x` and label `t` (x1, x2, ..., t). def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] def postprocess_pred(x): x_array = cuda.to_cpu(x.data) return numpy.where(x_array > 0, 1, 0) y_pred = clf.predict(test, converter=extract_inputs, postprocess_fn=postprocess_pred) y_proba = clf.predict_proba(test, converter=extract_inputs, postprocess_fn=F.sigmoid) # `predict` method returns the prediction label (0: non-toxic, 1:toxic) print('y_pread.shape = {}, y_pred[:5, 0] = {}'.format( y_pred.shape, y_pred[:5, 0])) # `predict_proba` method returns the probability to be toxic print('y_proba.shape = {}, y_proba[:5, 0] = {}'.format( y_proba.shape, y_proba[:5, 0])) # --- predict end --- if y_pred.ndim == 1: y_pred = y_pred[:, None] if y_pred.shape != y_test.shape: raise RuntimeError('The shape of the prediction result array and ' 'that of the ground truth array do not match. ' 'Contents of the input directory may be corrupted ' 'or modified.') statistics = [] for t, p in six.moves.zip(y_test.T, y_pred.T): idx = t != -1 n_correct = (t[idx] == p[idx]).sum() n_total = len(t[idx]) accuracy = float(n_correct) / n_total statistics.append([n_correct, n_total, accuracy]) print('{:>6} {:>8} {:>8} {:>8}'.format('TaskID', 'Correct', 'Total', 'Accuracy')) for idx, (n_correct, n_total, accuracy) in enumerate(statistics): print('task{:>2} {:>8} {:>8} {:>8.4f}'.format(idx, n_correct, n_total, accuracy)) prediction_result_file = 'prediction.npz' print('Save prediction result to {}'.format(prediction_result_file)) numpy.savez_compressed(prediction_result_file, y_pred) # --- evaluate --- # To calc loss/accuracy, we can use `Evaluator`, `ROCAUCEvaluator` print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, clf, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) rocauc_result = ROCAUCEvaluator(test_iterator, clf, converter=concat_mols, device=args.gpu, eval_func=clf.predictor, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result) with open(os.path.join(args.in_dir, 'eval_result.json'), 'w') as f: json.dump(rocauc_result, f)
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) # Load the cached dataset. filename = dataset_part_filename('test', num_data) path = os.path.join(cache_dir, filename) if os.path.exists(path): print('Loading cached dataset from {}.'.format(path)) test = NumpyTupleDataset.load(path) else: _, _, test = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) # Model-related data is stored this directory. model_dir = os.path.join(args.in_dir, os.path.basename(cache_dir)) model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } task_type = molnet_default_config[dataset_name]['task_type'] model_path = os.path.join(model_dir, model_filename[task_type]) print("model_path=" + model_path) print('Loading model weights from {}...'.format(model_path)) if task_type == 'classification': model = Classifier.load_pickle(model_path, device=args.gpu) elif task_type == 'regression': model = Regressor.load_pickle(model_path, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Re-load the best-validation score snapshot # serializers.load_npz(os.path.join( # model_dir, "best_val_" + model_filename[task_type]), model) # Run an evaluator on the test dataset. print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, model, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) # Add more stats if task_type == 'regression': # loss = cuda.to_cpu(numpy.array(eval_result['main/loss'])) # eval_result['main/loss'] = loss # convert to native values.. for k, v in eval_result.items(): eval_result[k] = float(v) elif task_type == "classification": # For Classifier, we do not equip the model with ROC-AUC evalation function # use a seperate ROC-AUC Evaluator here rocauc_result = ROCAUCEvaluator(test_iterator, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result) save_json(os.path.join(model_dir, 'rocauc_result.json'), rocauc_result) else: print('[WARNING] unknown task_type {}.'.format(task_type)) # Save the evaluation results. save_json(os.path.join(model_dir, 'eval_result.json'), eval_result)
def main(): parser = argparse.ArgumentParser( description='Imbalanced MNIST classification') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--batchsize', '-b', type=int, default=100, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--updater-type', type=str, default='standard') parser.add_argument('--sampling-size', type=int, default=32) parser.add_argument('--optimizer-type', type=str, default='Adam') parser.add_argument('--alpha', type=str, default='0.001') args = parser.parse_args() # Dataset preparation train, train_val, val = get_binary_imbalanced_data() train_iter = iterators.SerialIterator(train, args.batchsize) val_iter = iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) model = LeNet(n_class=1, binary=True) classifier = Classifier(model, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) if args.optimizer_type == 'Adam': optimizer = optimizers.Adam() else: optimizer = optimizers.SGD(lr=1e-3) optimizer.setup(classifier) updater_type = args.updater_type if updater_type == 'standard': updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu) elif updater_type == 'proposed': updater = Proposed( train_iter, optimizer, device=args.gpu, sampling_size=args.sampling_size) elif updater_type == 'LRE': x, t = chainer.dataset.concat_examples(train) train_val_iter = iterators.SerialIterator(train_val, len(train_val)) updater = LRE( {'main': train_iter, 'val': train_val_iter}, optimizer, device=args.gpu, alpha=args.alpha) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend(E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time'])) elif eval_mode == 1: train_eval_iter = iterators.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=model, device=args.gpu, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( val_iter, classifier, eval_func=model, device=args.gpu, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend(E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time'])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) # Load the cached dataset. filename = dataset_part_filename('test', num_data) path = os.path.join(cache_dir, filename) if os.path.exists(path): print('Loading cached dataset from {}.'.format(path)) test = NumpyTupleDataset.load(path) else: _, _, test = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) # # Load the standard scaler parameters, if necessary. # if args.scale == 'standardize': # scaler_path = os.path.join(args.in_dir, 'scaler.pkl') # print('Loading scaler parameters from {}.'.format(scaler_path)) # with open(scaler_path, mode='rb') as f: # scaler = pickle.load(f) # else: # print('No standard scaling was selected.') # scaler = None # Model-related data is stored this directory. model_dir = os.path.join(args.in_dir, os.path.basename(cache_dir)) model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } task_type = molnet_default_config[dataset_name]['task_type'] model_path = os.path.join(model_dir, model_filename[task_type]) print('Loading model weights from {}...'.format(model_path)) if task_type == 'classification': model = Classifier.load_pickle(model_path, device=args.gpu) elif task_type == 'regression': model = Regressor.load_pickle(model_path, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # # Replace the default predictor with one that scales the output labels. # scaled_predictor = ScaledGraphConvPredictor(model.predictor) # scaled_predictor.scaler = scaler # model.predictor = scaled_predictor # Run an evaluator on the test dataset. print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, model, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) # Save the evaluation results. with open(os.path.join(model_dir, 'eval_result.json'), 'w') as f: json.dump(eval_result, f)
def main(): # Supported preprocessing/network list method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn', 'relgcn', 'relgat'] label_names = D.get_tox21_label_names() iterator_type = ['serial', 'balanced'] parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--iterator-type', type=str, choices=iterator_type, default='serial', help='iterator type. If `balanced` ' 'is specified, data is sampled to take same number of' 'positive/negative labels during training.') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=32, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation train, val, _ = data.load_dataset(method, labels, num_data=args.num_data) # Network predictor_ = predictor.build_predictor( method, args.unit_num, args.conv_layers, class_num) iterator_type = args.iterator_type if iterator_type == 'serial': train_iter = I.SerialIterator(train, args.batchsize) elif iterator_type == 'balanced': if class_num > 1: raise ValueError('BalancedSerialIterator can be used with only one' 'label classification, please specify label to' 'be predicted by --label option.') train_iter = BalancedSerialIterator( train, args.batchsize, train.features[:, -1], ignore_labels=-1) train_iter.show_label_stats() else: raise ValueError('Invalid iterator type {}'.format(iterator_type)) val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) classifier = Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) optimizer = O.Adam() optimizer.setup(classifier) updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend(E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time'])) elif eval_mode == 1: train_eval_iter = I.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( val_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend(E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time'])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() config = {'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label} with open(os.path.join(args.out, 'config.json'), 'w') as o: o.write(json.dumps(config)) classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): label_names = D.get_tox21_label_names() parser = argparse.ArgumentParser( description='Predict with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='Path to the result directory of the training ' 'script.') parser.add_argument('--trainer-snapshot', '-s', type=str, default='', help='Path to the snapshot file of the Chainer ' 'trainer from which serialized model parameters ' 'are extracted. If it is not specified, this ' 'script searches the training result directory ' 'for the latest snapshot, assuming that ' 'the naming convension of snapshot files is ' '`snapshot_iter_N` where N is the number of ' 'iterations, which is the default configuration ' 'of Chainer.') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] labels = config['labels'] if labels: class_num = len(labels) if isinstance(labels, list) else 1 else: class_num = len(label_names) _, test, _ = data.load_dataset(method, labels) y_test = test.get_datasets()[-1] # Load pretrained model predictor_ = predictor.build_predictor(method, config['unit_num'], config['conv_layers'], class_num) snapshot_file = args.trainer_snapshot if not snapshot_file: snapshot_file = _find_latest_snapshot(args.in_dir) print('Loading pretrained model parameters from {}'.format(snapshot_file)) chainer.serializers.load_npz(snapshot_file, predictor_, 'updater/model:main/predictor/') clf = Classifier(predictor=predictor_, device=args.gpu, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy) # ---- predict --- print('Predicting...') # We need to feed only input features `x` to `predict`/`predict_proba`. # This converter extracts only inputs (x1, x2, ...) from the features which # consist of input `x` and label `t` (x1, x2, ..., t). def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] def postprocess_pred(x): x_array = cuda.to_cpu(x.data) return numpy.where(x_array > 0, 1, 0) y_pred = clf.predict(test, converter=extract_inputs, postprocess_fn=postprocess_pred) y_proba = clf.predict_proba(test, converter=extract_inputs, postprocess_fn=F.sigmoid) # `predict` method returns the prediction label (0: non-toxic, 1:toxic) print('y_pread.shape = {}, y_pred[:5, 0] = {}'.format( y_pred.shape, y_pred[:5, 0])) # `predict_proba` method returns the probability to be toxic print('y_proba.shape = {}, y_proba[:5, 0] = {}'.format( y_proba.shape, y_proba[:5, 0])) # --- predict end --- if y_pred.ndim == 1: y_pred = y_pred[:, None] if y_pred.shape != y_test.shape: raise RuntimeError('The shape of the prediction result array and ' 'that of the ground truth array do not match. ' 'Contents of the input directory may be corrupted ' 'or modified.') statistics = [] for t, p in six.moves.zip(y_test.T, y_pred.T): idx = t != -1 n_correct = (t[idx] == p[idx]).sum() n_total = len(t[idx]) accuracy = float(n_correct) / n_total statistics.append([n_correct, n_total, accuracy]) print('{:>6} {:>8} {:>8} {:>8}'.format('TaskID', 'Correct', 'Total', 'Accuracy')) for idx, (n_correct, n_total, accuracy) in enumerate(statistics): print('task{:>2} {:>8} {:>8} {:>8.4f}'.format(idx, n_correct, n_total, accuracy)) prediction_result_file = 'prediction.npz' print('Save prediction result to {}'.format(prediction_result_file)) numpy.savez_compressed(prediction_result_file, y_pred) # --- evaluate --- # To calc loss/accuracy, we can use `Evaluator`, `ROCAUCEvaluator` print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, clf, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) rocauc_result = ROCAUCEvaluator(test_iterator, clf, converter=concat_mols, device=args.gpu, eval_func=predictor_, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result)
def main(): # Supported preprocessing/network list method_list = [ 'nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn', 'relgcn', 'relgat' ] label_names = D.get_tox21_label_names() iterator_type = ['serial', 'balanced'] parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--iterator-type', type=str, choices=iterator_type, default='serial', help='iterator type. If `balanced` ' 'is specified, data is sampled to take same number of' 'positive/negative labels during training.') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=32, help='batch size') parser.add_argument( '--device', type=str, default='-1', help='Device specifier. Either ChainerX device specifier or an ' 'integer. If non-negative integer, CuPy arrays with specified ' 'device id are used. If negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation train, val, _ = data.load_dataset(method, labels, num_data=args.num_data) # Network predictor_ = set_up_predictor(method, args.unit_num, args.conv_layers, class_num) iterator_type = args.iterator_type if iterator_type == 'serial': train_iter = I.SerialIterator(train, args.batchsize) elif iterator_type == 'balanced': if class_num > 1: raise ValueError('BalancedSerialIterator can be used with only one' 'label classification, please specify label to' 'be predicted by --label option.') train_iter = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1) train_iter.show_label_stats() else: raise ValueError('Invalid iterator type {}'.format(iterator_type)) device = chainer.get_device(args.device) classifier = Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=device) extensions_list = [] eval_mode = args.eval_mode if eval_mode == 1: train_eval_iter = I.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) extensions_list.append( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor_, device=device, converter=concat_mols, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) extensions_list.append( ROCAUCEvaluator(val_iter, classifier, eval_func=predictor_, device=device, converter=concat_mols, name='val', pos_labels=1, ignore_labels=-1)) run_train(classifier, train_iter, valid=val, batch_size=args.batchsize, epoch=args.epoch, out=args.out, device=device, converter=concat_mols, extensions_list=extensions_list, resume_path=args.resume) # frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) # trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) # trainer.run() config = { 'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label } save_json(os.path.join(args.out, 'config.json'), config) classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) # Load the cached dataset. filename = dataset_part_filename('test', num_data) path = os.path.join(cache_dir, filename) if os.path.exists(path): print('Loading cached dataset from {}.'.format(path)) test = NumpyTupleDataset.load(path) else: _, _, test = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) # # Load the standard scaler parameters, if necessary. # if args.scale == 'standardize': # scaler_path = os.path.join(args.in_dir, 'scaler.pkl') # print('Loading scaler parameters from {}.'.format(scaler_path)) # with open(scaler_path, mode='rb') as f: # scaler = pickle.load(f) # else: # print('No standard scaling was selected.') # scaler = None # Model-related data is stored this directory. model_dir = os.path.join(args.in_dir, os.path.basename(cache_dir)) model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } task_type = molnet_default_config[dataset_name]['task_type'] model_path = os.path.join(model_dir, model_filename[task_type]) print("model_path=" + model_path) print('Loading model weights from {}...'.format(model_path)) if task_type == 'classification': model = Classifier.load_pickle(model_path, device=args.gpu) elif task_type == 'regression': model = Regressor.load_pickle(model_path, device=args.gpu) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Proposed by Ishiguro # ToDo: consider go/no-go with following modification # Re-load the best-validation score snapshot serializers.load_npz( os.path.join(model_dir, "best_val_" + model_filename[task_type]), model) # # Replace the default predictor with one that scales the output labels. # scaled_predictor = ScaledGraphConvPredictor(model.predictor) # scaled_predictor.scaler = scaler # model.predictor = scaled_predictor # Run an evaluator on the test dataset. print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, model, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) # Proposed by Ishiguro: add more stats # ToDo: considre go/no-go with the following modification if task_type == 'regression': # loss = cuda.to_cpu(numpy.array(eval_result['main/loss'])) # eval_result['main/loss'] = loss # convert to native values.. for k, v in eval_result.items(): eval_result[k] = float(v) save_json(os.path.join(args.in_dir, 'eval_result.json'), eval_result) elif task_type == "classification": # For Classifier, we do not equip the model with ROC-AUC evalation function # use a seperate ROC-AUC Evaluator here rocauc_result = ROCAUCEvaluator(test_iterator, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result) save_json(os.path.join(args.in_dir, 'eval_result.json'), rocauc_result) else: pass # Save the evaluation results. save_json(os.path.join(model_dir, 'eval_result.json'), eval_result)
def setup_class(cls): cls.link = Classifier(links.Linear(10, 3)) cls.x = numpy.random.uniform(-1, 1, (5, 10)).astype(numpy.float32)
def main(): # Supported preprocessing/network list method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn', 'attention'] label_names = D.get_tox21_label_names() iterator_type = ['serial', 'balanced'] parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--iterator-type', type=str, choices=iterator_type, default='serial', help='iterator type. If `balanced` ' 'is specified, data is sampled to take same number of' 'positive/negative labels during training.') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=32, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation print("labels : ", labels) print("num_data : ", args.num_data) train, val, _ = data.load_dataset(method, labels, num_data=args.num_data) #train data : 11757 #len(train[0]) : 3 #len(train[0][0]) : 34 #len(train[0][1]) : 34 #len(train[0][1][0]) : 34 #len(train[0][1]) : 12 ? #import pdb;pdb.set_trace() #Network predictor_ = predictor.build_predictor(method, args.unit_num, args.conv_layers, class_num) iterator_type = args.iterator_type if iterator_type == 'serial': train_iter = I.SerialIterator(train, args.batchsize) elif iterator_type == 'balanced': if class_num > 1: raise ValueError('BalancedSerialIterator can be used with only one' 'label classification, please specify label to' 'be predicted by --label option.') train_iter = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1) train_iter.show_label_stats() else: raise ValueError('Invalid iterator type {}'.format(iterator_type)) val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) classifier = Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) optimizer = O.Adam() optimizer.setup(classifier) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) elif eval_mode == 1: train_eval_iter = I.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(val_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time' ])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() config = { 'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label } with open(os.path.join(args.out, 'config.json'), 'w') as o: o.write(json.dumps(config)) classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def test_invalid_label_key_type(self): with pytest.raises(TypeError): Classifier(links.Linear(10, 3), label_key=None)
def main(): args = parse_arguments() generate_drug_list = True if args.generate_drug_list == 'True' else False if args.label: labels = args.label # class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess test dataset...') preprocessor = preprocess_method_dict['ggnn']() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) test_dict = parser.parse(args.test_datafile, return_smiles_pair_original=True) test = test_dict['dataset'] # test_smiles_pairs = test_dict['smiles_pair_original'] from chainer.iterators import SerialIterator test_iter = SerialIterator(test, 1, repeat=False, shuffle=False) out = 'output' + '/' + args.out model_path = os.path.join(out, args.model_filename) # `load_pickle` is static method, call from Class to get an instance print('model_path: {}'.format(model_path)) model = Classifier.load_pickle(model_path, args.gpu) if args.gpu >= 0: model.to_gpu(args.gpu) else: model.to_cpu() snapshot_path = os.path.join(out, args.snapshot) try: chainer.serializers.load_npz(snapshot_path, model) except KeyError as e: print(e) evaluator = MyEvaluator( test_iter, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor.predict, # mediate_func=models.predictor.mediate_output, name='test', ignore_labels=-1) e1_total, e2_total = evaluator.generate_representations() y_total, t_total = evaluator.generate_y_and_t() # print('test_datafile: {}'.format(args.test_datafile)) test_filename = os.path.basename(args.test_datafile).split('.')[0] # print('test_filename: {}'.format(test_filename)) dst_repre_filename = test_filename + '_e' + '.csv' dst_repre_filepath = os.path.join(out, dst_repre_filename) add_representations(args.test_datafile, dst_repre_filepath, e1_total, e2_total, generate_drug_list=generate_drug_list) dst_filename = test_filename + '_e_y' + '.csv' dst_filepath = os.path.join(out, dst_filename) add_reprensentations_and_y(args.test_datafile, dst_filepath, e1_total, e2_total, y_total) perf_dict = dict() for metric in [ 'roc_auc', 'prc_auc', 'accuracy', 'precision', 'recall', 'f1' ]: result = evaluator.compuate(metric=metric) perf_dict[metric] = result print('{}: {}'.format(metric, result)) with open(os.path.join(ROOT_PATH, 'eval_result.json'), 'w') as f: json.dump(perf_dict, f) def eval_func(atoms_1, adj_1, atoms_2, adj_2, label): sample = [ (atoms_1, adj_1), (atoms_2, adj_2), ] sample = concat_mols(sample) atoms_1, adj_1 = sample[0] atoms_2, adj_2 = sample[1] print(atoms_1, adj_1) print('shape 1:', atoms_1.shape, adj_1.shape) print('shape 2:', atoms_2.shape, adj_2.shape) pred, _ = model.predictor.predict(atoms_1, adj_1, atoms_2, adj_2) return pred calculator = IntegratedGradientsCalculator( model.predictor, steps=5, eval_fun=eval_func, target_extractor=VariableMonitorLinkHook( model.predictor.graph_conv.embed, timing='post'), device=args.gpu) M = 1 # 2. compute # saliency_samples_vanilla = calculator.compute( # test, M=1, converter=concat_mols) # saliency_samples_smooth = calculator.compute( # test, M=M, converter=concat_mols, noise_sampler=GaussianNoiseSampler()) saliency_samples_bayes = calculator.compute(test, M=M, converter=concat_mols, train=True) visualizer = SmilesVisualizer() smiles = list(pd.read_csv(args.test_datafile, index_col=0)['smiles_2']) # from IPython.display import display, HTML def sv_visualize(i, ratio, method, view): # saliency_vanilla = calculator.aggregate( # saliency_samples_vanilla, ch_axis=3, method=method) # saliency_smooth = calculator.aggregate( # saliency_samples_smooth, ch_axis=3, method=method) saliency_bayes = calculator.aggregate(saliency_samples_bayes, ch_axis=3, method=method) scaler = abs_max_scaler if view == 'view': # svg_vanilla = visualizer.visualize(saliency_vanilla[i], smiles[i], visualize_ratio=ratio, scaler=scaler) # svg_smooth = visualizer.visualize(saliency_smooth[i], smiles[i], visualize_ratio=ratio, scaler=scaler) svg_bayes = visualizer.visualize(saliency_bayes[i], smiles[i], visualize_ratio=ratio, scaler=scaler) # display(svg_bayes) elif view == 'save': if not os.path.exists('results'): os.makedirs('results') # visualizer.visualize(saliency_vanilla[i], smiles[i], visualize_ratio=ratio, scaler=scaler, # save_filepath='results/{}_vanilla.png'.format(i)) # visualizer.visualize(saliency_smooth[i], smiles[i], visualize_ratio=ratio, scaler=scaler, # save_filepath='results/{}_smooth.png'.format(i)) visualizer.visualize( saliency_bayes[i], smiles[i], visualize_ratio=ratio, scaler=scaler, save_filepath='results/{}_bayes.svg'.format(i)) print('saved {}-th result!'.format(i)) else: print(view, 'not supported') sv_visualize(i=2, ratio=0.7, method='raw', view='save')
def test_predict_cpu(self): clf = Classifier(self.predictor) actual_t = clf.predict(self.x) assert actual_t.shape == (3, ) assert actual_t.dtype == numpy.int32 assert numpy.alltrue(actual_t == self.t)
def main(): args = parse_arguments() # Set up some useful variables that will be used later on. dataset_name = args.dataset method = args.method num_data = args.num_data n_unit = args.unit_num conv_layers = args.conv_layers task_type = molnet_default_config[dataset_name]['task_type'] model_filename = { 'classification': 'classifier.pkl', 'regression': 'regressor.pkl' } print('Using dataset: {}...'.format(dataset_name)) # Set up some useful variables that will be used later on. if args.label: labels = args.label cache_dir = os.path.join( 'input', '{}_{}_{}'.format(dataset_name, method, labels)) class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None cache_dir = os.path.join('input', '{}_{}_all'.format(dataset_name, method)) class_num = len(molnet_default_config[args.dataset]['tasks']) # Load the train and validation parts of the dataset. filenames = [ dataset_part_filename(p, num_data) for p in ['train', 'valid'] ] paths = [os.path.join(cache_dir, f) for f in filenames] if all([os.path.exists(path) for path in paths]): dataset_parts = [] for path in paths: print('Loading cached dataset from {}.'.format(path)) dataset_parts.append(NumpyTupleDataset.load(path)) else: dataset_parts = download_entire_dataset(dataset_name, num_data, labels, method, cache_dir) train, valid = dataset_parts[0], dataset_parts[1] # Scale the label values, if necessary. scaler = None if args.scale == 'standardize': if task_type == 'regression': print('Applying standard scaling to the labels.') scaler = fit_scaler(dataset_parts) else: print('Label scaling is not available for classification tasks.') else: print('No label scaling was selected.') # Set up the predictor. predictor = set_up_predictor(method, n_unit, conv_layers, class_num, label_scaler=scaler) # Set up the iterators. train_iter = iterators.SerialIterator(train, args.batchsize) valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # Load metrics for the current dataset. metrics = molnet_default_config[dataset_name]['metrics'] metrics_fun = { k: v for k, v in metrics.items() if isinstance(v, types.FunctionType) } loss_fun = molnet_default_config[dataset_name]['loss'] device = chainer.get_device(args.device) if task_type == 'regression': model = Regressor(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=device) elif task_type == 'classification': model = Classifier(predictor, lossfun=loss_fun, metrics_fun=metrics_fun, device=device) else: raise ValueError('Invalid task type ({}) encountered when processing ' 'dataset ({}).'.format(task_type, dataset_name)) # Set up the optimizer. optimizer = optimizers.Adam() optimizer.setup(model) # Save model-related output to this directory. model_dir = os.path.join(args.out, os.path.basename(cache_dir)) if not os.path.exists(model_dir): os.makedirs(model_dir) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=device, converter=concat_mols) # Set up the trainer. trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=model_dir) trainer.extend( E.Evaluator(valid_iter, model, device=device, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) # TODO: consider go/no-go of the following block # # (i) more reporting for val/evalutaion # # (ii) best validation score snapshot # if task_type == 'regression': # metric_name_list = list(metrics.keys()) # if 'RMSE' in metric_name_list: # trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), # trigger=training.triggers.MinValueTrigger('validation/main/RMSE')) # elif 'MAE' in metric_name_list: # trainer.extend(E.snapshot_object(model, "best_val_" + model_filename[task_type]), # trigger=training.triggers.MinValueTrigger('validation/main/MAE')) # else: # print("[WARNING] No validation metric defined?") # # elif task_type == 'classification': # train_eval_iter = iterators.SerialIterator( # train, args.batchsize, repeat=False, shuffle=False) # trainer.extend(ROCAUCEvaluator( # train_eval_iter, predictor, eval_func=predictor, # device=args.gpu, converter=concat_mols, name='train', # pos_labels=1, ignore_labels=-1, raise_value_error=False)) # # extension name='validation' is already used by `Evaluator`, # # instead extension name `val` is used. # trainer.extend(ROCAUCEvaluator( # valid_iter, predictor, eval_func=predictor, # device=args.gpu, converter=concat_mols, name='val', # pos_labels=1, ignore_labels=-1, raise_value_error=False)) # # trainer.extend(E.snapshot_object( # model, "best_val_" + model_filename[task_type]), # trigger=training.triggers.MaxValueTrigger('val/main/roc_auc')) # else: # raise NotImplementedError( # 'Not implemented task_type = {}'.format(task_type)) trainer.extend(AutoPrintReport()) trainer.extend(E.ProgressBar()) trainer.run() # Save the model's parameters. model_path = os.path.join(model_dir, model_filename[task_type]) print('Saving the trained model to {}...'.format(model_path)) model.save_pickle(model_path, protocol=args.protocol)
def main(): # Parse the arguments. args = parse_arguments() args.out = os.path.join(args.out, args.method) save_args(args, args.out) if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label_float(label_list): return numpy.asarray(label_list, dtype=numpy.float32) def postprocess_label_int(label_list): return numpy.asarray(label_list, dtype=numpy.int64) # Apply a preprocessor to the dataset. if args.train: ## training data fn,ext = os.path.splitext(args.train) if ext==".npz": print('Loading training dataset...') train = NumpyTupleDataset.load(args.train) else: print('Preprocessing training dataset...') preprocessor = preprocess_method_dict[args.method]() if args.classification: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_int,labels=labels, smiles_col='SMILES') else: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_float,labels=labels, smiles_col='SMILES') train = parser.parse(args.train)['dataset'] NumpyTupleDataset.save(os.path.join(args.out,os.path.split(fn)[1]), train) # Scale the label values, if necessary. if args.scale == 'standardize': scaler = StandardScaler() scaler.fit(train.get_datasets()[-1]) else: scaler = None ## test data fn,ext = os.path.splitext(args.val) if ext==".npz": print('Loading test dataset...') test = NumpyTupleDataset.load(args.val) else: print('Preprocessing test dataset...') preprocessor = preprocess_method_dict[args.method]() if args.classification: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_int,labels=labels, smiles_col='SMILES') else: parser = CSVFileParser(preprocessor, postprocess_label=postprocess_label_float,labels=labels, smiles_col='SMILES') test = parser.parse(args.val)['dataset'] NumpyTupleDataset.save(os.path.join(args.out,os.path.split(fn)[1]), test) # Set up the model. device = chainer.get_device(args.device) converter = converter_method_dict[args.method] metrics_fun = {'mae': F.mean_absolute_error, 'rmse': rmse} if args.classification: if args.load_model: model = Classifier.load_pickle(args.load_model, device=device) print("model file loaded: ",args.load_model) else: predictor = set_up_predictor(args.method, args.unit_num, args.conv_layers, class_num) model = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=device) else: if args.load_model: model = Regressor.load_pickle(args.load_model, device=device) print("model file loaded: ",args.load_model) else: predictor = set_up_predictor( args.method+args.method_suffix, args.unit_num, args.conv_layers, class_num, label_scaler=scaler) model = Regressor(predictor, lossfun=F.mean_squared_error, metrics_fun=metrics_fun, device=device) if args.train: if args.balanced_iter: train = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1) train.show_label_stats() print('Training...') log_keys = ['main/mae','main/rmse','validation/main/mae','validation/main/rmse','validation/main/roc_auc'] extensions_list = [extensions.PlotReport(log_keys, 'iteration', trigger=(100, 'iteration'), file_name='loss.png')] if args.eval_roc and args.classification: extensions_list.append(ROCAUCEvaluator( test, model, eval_func=predictor, device=device, converter=converter, name='validation', pos_labels=1, ignore_labels=-1, raise_value_error=False)) save_json(os.path.join(args.out, 'args.json'), vars(args)) run_train(model, train, valid=test, batch_size=args.batchsize, epoch=args.epoch, out=args.out, extensions_list=extensions_list, device=device, converter=converter) #, resume_path=args.resume) # Save the model's parameters. model_path = os.path.join(args.out, args.model_filename) print('Saving the trained model to {}...'.format(model_path)) if hasattr(model.predictor.graph_conv, 'reset_state'): model.predictor.graph_conv.reset_state() model.save_pickle(model_path, protocol=args.protocol) ## prediction it = SerialIterator(test, args.batchsize, repeat=False, shuffle=False) result = [] for batch in it: in_arrays = convert._call_converter(converter, batch, device) with chainer.using_config('train', False), chainer.function.no_backprop_mode(): if isinstance(in_arrays, tuple): res = model(*in_arrays) elif isinstance(in_arrays, dict): res = model(**in_arrays) else: res = model(in_arrays) result.extend(model.y.array.get()) numpy.savetxt(os.path.join(args.out,"result.csv"), numpy.array(result)) eval_result = Evaluator(it, model, converter=converter,device=device)() print('Evaluation result: ', eval_result)