def deploy(name): try: build_predictor(name) return Response( 'The model {} has been deployed correctly'.format(name), status=200) except Exception as e: message = 'There was a problem deploying model {} error = {}'.format( name, str(e)) raise ServiceException(message)
def main(): label_names = D.get_tox21_label_names() parser = argparse.ArgumentParser( description='Inference with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='path result directory of training') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] if method == 'schnet': raise ValueError('Currently SchNet does not support prediction.') labels = config['labels'] if labels: class_num = len(labels) if isinstance(labels, list) else 1 else: class_num = len(label_names) _, _, test = data.load_dataset(method, labels) predictor_ = predictor.build_predictor(method, config['unit_num'], config['conv_layers'], class_num) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() predictor_.to_gpu() inference_loop = predictor.InferenceLoop(predictor_) y_pred = inference_loop.inference(test) numpy.save('prediction.npy', y_pred)
def main(): label_names = D.get_tox21_label_names() parser = argparse.ArgumentParser( description='Inference with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='Path to the result directory of the training ' 'script.') parser.add_argument('--trainer-snapshot', '-s', type=str, default='', help='Path to the snapshot file of the Chainer ' 'trainer from which serialized model parameters ' 'are extracted. If it is not specified, this ' 'script searches the training result directory ' 'for the latest snapshot, assuming that ' 'the naming convension of snapshot files is ' '`snapshot_iter_N` where N is the number of ' 'iterations, which is the default configuration ' 'of Chainer.') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] if method == 'schnet': raise ValueError('Currently SchNet does not support prediction.') labels = config['labels'] if labels: class_num = len(labels) if isinstance(labels, list) else 1 else: class_num = len(label_names) _, test, _ = data.load_dataset(method, labels) test = test.get_datasets() X_test = D.NumpyTupleDataset(*test[:-1]) y_test = test[-1] # Load pretrained model predictor_ = predictor.build_predictor( method, config['unit_num'], config['conv_layers'], class_num) snapshot_file = args.trainer_snapshot if not snapshot_file: snapshot_file = _find_latest_snapshot(args.in_dir) print('Loading pretrained model parameters from {}'.format(snapshot_file)) chainer.serializers.load_npz(snapshot_file, predictor_, 'updater/model:main/predictor/') if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() predictor_.to_gpu() inference_loop = predictor.InferenceLoop(predictor_) y_pred = inference_loop.inference(X_test) if y_pred.shape != y_test.shape: raise RuntimeError('The shape of the prediction result array and ' 'that of the ground truth array do not match. ' 'Contents of the input directory may be corrupted ' 'or modified.') if y_pred.ndim == 1: y_pred = y_pred[:, None] y_test = y_test[:, None] statistics = [] for t, p in six.moves.zip(y_test.T, y_pred.T): idx = t != -1 n_correct = (t[idx] == p[idx]).sum() n_total = len(t[idx]) accuracy = float(n_correct) / n_total statistics.append([n_correct, n_total, accuracy]) print('{:>6} {:>8} {:>8} {:>8}' .format('TaskID', 'Correct', 'Total', 'Accuracy')) for idx, (n_correct, n_total, accuracy) in enumerate(statistics): print('task{:>2} {:>8} {:>8} {:>8.4f}' .format(idx, n_correct, n_total, accuracy)) prediction_result_file = 'prediction.npz' print('Save prediction result to {}'.format(prediction_result_file)) numpy.savez_compressed(prediction_result_file, y_pred)
def main(): # Supported preprocessing/network list method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn', 'attention'] label_names = D.get_tox21_label_names() iterator_type = ['serial', 'balanced'] parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--iterator-type', type=str, choices=iterator_type, default='serial', help='iterator type. If `balanced` ' 'is specified, data is sampled to take same number of' 'positive/negative labels during training.') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=32, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation print("labels : ", labels) print("num_data : ", args.num_data) train, val, _ = data.load_dataset(method, labels, num_data=args.num_data) #train data : 11757 #len(train[0]) : 3 #len(train[0][0]) : 34 #len(train[0][1]) : 34 #len(train[0][1][0]) : 34 #len(train[0][1]) : 12 ? #import pdb;pdb.set_trace() #Network predictor_ = predictor.build_predictor(method, args.unit_num, args.conv_layers, class_num) iterator_type = args.iterator_type if iterator_type == 'serial': train_iter = I.SerialIterator(train, args.batchsize) elif iterator_type == 'balanced': if class_num > 1: raise ValueError('BalancedSerialIterator can be used with only one' 'label classification, please specify label to' 'be predicted by --label option.') train_iter = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1) train_iter.show_label_stats() else: raise ValueError('Invalid iterator type {}'.format(iterator_type)) val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) classifier = Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) optimizer = O.Adam() optimizer.setup(classifier) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) elif eval_mode == 1: train_eval_iter = I.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(val_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time' ])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() config = { 'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label } with open(os.path.join(args.out, 'config.json'), 'w') as o: o.write(json.dumps(config)) classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): # Supported preprocessing/network list method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn'] label_names = D.get_tox21_label_names() iterator_type = ['serial', 'balanced'] parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--iterator-type', type=str, choices=iterator_type, default='serial', help='iterator type. If `balanced` ' 'is specified, data is sampled to take same number of' 'positive/negative labels during training.') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation train, val, _ = data.load_dataset(method, labels) # Network predictor_ = predictor.build_predictor(method, args.unit_num, args.conv_layers, class_num) iterator_type = args.iterator_type if iterator_type == 'serial': train_iter = I.SerialIterator(train, args.batchsize) elif iterator_type == 'balanced': if class_num > 1: raise ValueError('BalancedSerialIterator can be used with only one' 'label classification, please specify label to' 'be predicted by --label option.') train_iter = BalancedSerialIterator(train, args.batchsize, train.features[:, -1], ignore_labels=-1) train_iter.show_label_stats() else: raise ValueError('Invalid iterator type {}'.format(iterator_type)) val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) classifier = L.Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, accfun=F.binary_accuracy) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() classifier.to_gpu() optimizer = O.Adam() optimizer.setup(classifier) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() config = { 'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label } with open(os.path.join(args.out, 'config.json'), 'w') as o: o.write(json.dumps(config))
def main(): # Supported preprocessing/network list method_list = ['nfp', 'ggnn', 'schnet', 'weavenet', 'rsgcn', 'relgcn', 'relgat'] label_names = D.get_tox21_label_names() iterator_type = ['serial', 'balanced'] parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--iterator-type', type=str, choices=iterator_type, default='serial', help='iterator type. If `balanced` ' 'is specified, data is sampled to take same number of' 'positive/negative labels during training.') parser.add_argument('--eval-mode', type=int, default=1, help='Evaluation mode.' '0: only binary_accuracy is calculated.' '1: binary_accuracy and ROC-AUC score is calculated') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=32, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') parser.add_argument('--resume', '-r', type=str, default='', help='path to a trainer snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--protocol', type=int, default=2, help='protocol version for pickle') parser.add_argument('--model-filename', type=str, default='classifier.pkl', help='file name for pickled model') parser.add_argument('--num-data', type=int, default=-1, help='Number of data to be parsed from parser.' '-1 indicates to parse all data.') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation train, val, _ = data.load_dataset(method, labels, num_data=args.num_data) # Network predictor_ = predictor.build_predictor( method, args.unit_num, args.conv_layers, class_num) iterator_type = args.iterator_type if iterator_type == 'serial': train_iter = I.SerialIterator(train, args.batchsize) elif iterator_type == 'balanced': if class_num > 1: raise ValueError('BalancedSerialIterator can be used with only one' 'label classification, please specify label to' 'be predicted by --label option.') train_iter = BalancedSerialIterator( train, args.batchsize, train.features[:, -1], ignore_labels=-1) train_iter.show_label_stats() else: raise ValueError('Invalid iterator type {}'.format(iterator_type)) val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) classifier = Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy, device=args.gpu) optimizer = O.Adam() optimizer.setup(classifier) updater = training.StandardUpdater( train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) trainer.extend(E.LogReport()) eval_mode = args.eval_mode if eval_mode == 0: trainer.extend(E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time'])) elif eval_mode == 1: train_eval_iter = I.SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='train', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( val_iter, classifier, eval_func=predictor_, device=args.gpu, converter=concat_mols, name='val', pos_labels=1, ignore_labels=-1)) trainer.extend(E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'train/main/roc_auc', 'validation/main/loss', 'validation/main/accuracy', 'val/main/roc_auc', 'elapsed_time'])) else: raise ValueError('Invalid accfun_mode {}'.format(eval_mode)) trainer.extend(E.ProgressBar(update_interval=10)) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(E.snapshot(), trigger=(frequency, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() config = {'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label} with open(os.path.join(args.out, 'config.json'), 'w') as o: o.write(json.dumps(config)) classifier.save_pickle(os.path.join(args.out, args.model_filename), protocol=args.protocol)
def main(): label_names = D.get_tox21_label_names() parser = argparse.ArgumentParser( description='Predict with a trained model.') parser.add_argument('--in-dir', '-i', type=str, default='result', help='Path to the result directory of the training ' 'script.') parser.add_argument('--trainer-snapshot', '-s', type=str, default='', help='Path to the snapshot file of the Chainer ' 'trainer from which serialized model parameters ' 'are extracted. If it is not specified, this ' 'script searches the training result directory ' 'for the latest snapshot, assuming that ' 'the naming convension of snapshot files is ' '`snapshot_iter_N` where N is the number of ' 'iterations, which is the default configuration ' 'of Chainer.') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') args = parser.parse_args() with open(os.path.join(args.in_dir, 'config.json'), 'r') as i: config = json.loads(i.read()) method = config['method'] labels = config['labels'] if labels: class_num = len(labels) if isinstance(labels, list) else 1 else: class_num = len(label_names) _, test, _ = data.load_dataset(method, labels) y_test = test.get_datasets()[-1] # Load pretrained model predictor_ = predictor.build_predictor(method, config['unit_num'], config['conv_layers'], class_num) snapshot_file = args.trainer_snapshot if not snapshot_file: snapshot_file = _find_latest_snapshot(args.in_dir) print('Loading pretrained model parameters from {}'.format(snapshot_file)) chainer.serializers.load_npz(snapshot_file, predictor_, 'updater/model:main/predictor/') clf = Classifier(predictor=predictor_, device=args.gpu, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.binary_accuracy) # ---- predict --- print('Predicting...') # We need to feed only input features `x` to `predict`/`predict_proba`. # This converter extracts only inputs (x1, x2, ...) from the features which # consist of input `x` and label `t` (x1, x2, ..., t). def extract_inputs(batch, device=None): return concat_mols(batch, device=device)[:-1] def postprocess_pred(x): x_array = cuda.to_cpu(x.data) return numpy.where(x_array > 0, 1, 0) y_pred = clf.predict(test, converter=extract_inputs, postprocess_fn=postprocess_pred) y_proba = clf.predict_proba(test, converter=extract_inputs, postprocess_fn=F.sigmoid) # `predict` method returns the prediction label (0: non-toxic, 1:toxic) print('y_pread.shape = {}, y_pred[:5, 0] = {}'.format( y_pred.shape, y_pred[:5, 0])) # `predict_proba` method returns the probability to be toxic print('y_proba.shape = {}, y_proba[:5, 0] = {}'.format( y_proba.shape, y_proba[:5, 0])) # --- predict end --- if y_pred.ndim == 1: y_pred = y_pred[:, None] if y_pred.shape != y_test.shape: raise RuntimeError('The shape of the prediction result array and ' 'that of the ground truth array do not match. ' 'Contents of the input directory may be corrupted ' 'or modified.') statistics = [] for t, p in six.moves.zip(y_test.T, y_pred.T): idx = t != -1 n_correct = (t[idx] == p[idx]).sum() n_total = len(t[idx]) accuracy = float(n_correct) / n_total statistics.append([n_correct, n_total, accuracy]) print('{:>6} {:>8} {:>8} {:>8}'.format('TaskID', 'Correct', 'Total', 'Accuracy')) for idx, (n_correct, n_total, accuracy) in enumerate(statistics): print('task{:>2} {:>8} {:>8} {:>8.4f}'.format(idx, n_correct, n_total, accuracy)) prediction_result_file = 'prediction.npz' print('Save prediction result to {}'.format(prediction_result_file)) numpy.savez_compressed(prediction_result_file, y_pred) # --- evaluate --- # To calc loss/accuracy, we can use `Evaluator`, `ROCAUCEvaluator` print('Evaluating...') test_iterator = SerialIterator(test, 16, repeat=False, shuffle=False) eval_result = Evaluator(test_iterator, clf, converter=concat_mols, device=args.gpu)() print('Evaluation result: ', eval_result) rocauc_result = ROCAUCEvaluator(test_iterator, clf, converter=concat_mols, device=args.gpu, eval_func=predictor_, name='test', ignore_labels=-1)() print('ROCAUC Evaluation result: ', rocauc_result)
from predictor import build_predictor import unittest import json from model import PassengerSchema predictor = build_predictor('random_forest_classifier') schema = PassengerSchema() def get_passenger_from_file(file): with open(file) as json_file: data = json.load(json_file) return schema.load(data) class PredictionTest(unittest.TestCase): def test_survive_1(self): """ Testing model prediction with a passenger that survive """ passenger = get_passenger_from_file('test/passenger1.json') prediction = predictor.predict(passenger.to_dict()) self.assertEqual(1, prediction) def test_survive_2(self): """ Testing model prediction with a passenger that survive """ passenger = get_passenger_from_file('test/passenger2.json') prediction = predictor.predict(passenger.to_dict()) self.assertEqual(1, prediction)
def main(): # Supported preprocessing/network list method_list = ['nfp', 'ggnn', 'schnet'] label_names = D.get_tox21_label_names() parser = argparse.ArgumentParser( description='Multitask Learning with Tox21.') parser.add_argument('--method', '-m', type=str, choices=method_list, default='nfp', help='graph convolution model to use ' 'as a predictor.') parser.add_argument('--label', '-l', type=str, choices=label_names, default='', help='target label for logistic ' 'regression. Use all labels if this option ' 'is not specified.') parser.add_argument('--conv-layers', '-c', type=int, default=4, help='number of convolution layers') parser.add_argument('--batchsize', '-b', type=int, default=128, help='batch size') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID to use. Negative value indicates ' 'not to use GPU and to run the code in CPU.') parser.add_argument('--out', '-o', type=str, default='result', help='path to output directory') parser.add_argument('--epoch', '-e', type=int, default=10, help='number of epochs') parser.add_argument('--unit-num', '-u', type=int, default=16, help='number of units in one layer of the model') args = parser.parse_args() method = args.method if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: labels = None class_num = len(label_names) # Dataset preparation train, val, _ = data.load_dataset(method, labels) # Network predictor_ = predictor.build_predictor(method, args.unit_num, args.conv_layers, class_num) train_iter = I.SerialIterator(train, args.batchsize) val_iter = I.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) classifier = L.Classifier(predictor_, lossfun=F.sigmoid_cross_entropy, accfun=F.binary_accuracy) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() classifier.to_gpu() optimizer = O.Adam() optimizer.setup(classifier) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(E.ProgressBar(update_interval=10)) trainer.run() config = { 'method': args.method, 'conv_layers': args.conv_layers, 'unit_num': args.unit_num, 'labels': args.label } with open(os.path.join(args.out, 'config.json'), 'w') as o: o.write(json.dumps(config))