def main(): args = parse_arguments() generate_drug_list = True if args.generate_drug_list == 'True' else False if args.label: labels = args.label # class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess test dataset...') preprocessor = preprocess_method_dict['ggnn']() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) test_dict = parser.parse(args.test_datafile, return_smiles_pair_original=True) test = test_dict['dataset'] # test_smiles_pairs = test_dict['smiles_pair_original'] from chainer.iterators import SerialIterator test_iter = SerialIterator(test, 64, repeat=False, shuffle=False) out = 'output' + '/' + args.out model_path = os.path.join(out, args.model_filename) # `load_pickle` is static method, call from Class to get an instance print('model_path: {}'.format(model_path)) from chainer_chemistry.models.prediction import Classifier model = Classifier.load_pickle(model_path, args.gpu) if args.gpu >= 0: model.to_gpu(args.gpu) else: model.to_cpu() snapshot_path = os.path.join(out, args.snapshot) try: chainer.serializers.load_npz(snapshot_path, model) except KeyError as e: print(e) def eval_func(atoms_1, adj_1, atoms_2, adj_2): sample = [ (atoms_1, adj_1), (atoms_2, adj_2), ] sample = concat_mols(sample) atoms_1, adj_1 = sample[0] atoms_2, adj_2 = sample[1] print(atoms_1, adj_1) print('shape 1:', atoms_1.shape, adj_1.shape) print('shape 2:', atoms_2.shape, adj_2.shape) pred, _ = model.predictor.predict(atoms_1, adj_1, atoms_2, adj_2) return pred evaluator = MyEvaluator( test_iter, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor.predict, # eval_func=eval_func, # mediate_func=models.predictor.mediate_output, name='test', ignore_labels=-1) e1_total, e2_total = evaluator.generate_representations() y_total, t_total = evaluator.generate_y_and_t() # print('test_datafile: {}'.format(args.test_datafile)) test_filename = os.path.basename(args.test_datafile).split('.')[0] # print('test_filename: {}'.format(test_filename)) dst_repre_filename = test_filename + '_e' + '.csv' dst_repre_filepath = os.path.join(out, dst_repre_filename) add_representations(args.test_datafile, dst_repre_filepath, e1_total, e2_total, generate_drug_list=generate_drug_list) dst_filename = test_filename + '_e_y' + '.csv' dst_filepath = os.path.join(out, dst_filename) add_reprensentations_and_y(args.test_datafile, dst_filepath, e1_total, e2_total, y_total) perf_dict = dict() for metric in [ 'roc_auc', 'prc_auc', 'accuracy', 'precision', 'recall', 'f1' ]: result = evaluator.compuate(metric=metric) perf_dict[metric] = result print('{}: {}'.format(metric, result)) with open(os.path.join(ROOT_PATH, 'eval_result.json'), 'w') as f: json.dump(perf_dict, f)
def main(): args = parse_arguments() generate_drug_list = True if args.generate_drug_list == 'True' else False if args.label: labels = args.label # class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess test dataset...') preprocessor = preprocess_method_dict['ggnn']() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) test_dict = parser.parse(args.test_datafile, return_smiles_pair_original=True) test = test_dict['dataset'] # test_smiles_pairs = test_dict['smiles_pair_original'] from chainer.iterators import SerialIterator test_iter = SerialIterator(test, 1, repeat=False, shuffle=False) out = 'output' + '/' + args.out model_path = os.path.join(out, args.model_filename) # `load_pickle` is static method, call from Class to get an instance print('model_path: {}'.format(model_path)) model = Classifier.load_pickle(model_path, args.gpu) if args.gpu >= 0: model.to_gpu(args.gpu) else: model.to_cpu() snapshot_path = os.path.join(out, args.snapshot) try: chainer.serializers.load_npz(snapshot_path, model) except KeyError as e: print(e) evaluator = MyEvaluator( test_iter, model, converter=concat_mols, device=args.gpu, eval_func=model.predictor.predict, # mediate_func=models.predictor.mediate_output, name='test', ignore_labels=-1) e1_total, e2_total = evaluator.generate_representations() y_total, t_total = evaluator.generate_y_and_t() # print('test_datafile: {}'.format(args.test_datafile)) test_filename = os.path.basename(args.test_datafile).split('.')[0] # print('test_filename: {}'.format(test_filename)) dst_repre_filename = test_filename + '_e' + '.csv' dst_repre_filepath = os.path.join(out, dst_repre_filename) add_representations(args.test_datafile, dst_repre_filepath, e1_total, e2_total, generate_drug_list=generate_drug_list) dst_filename = test_filename + '_e_y' + '.csv' dst_filepath = os.path.join(out, dst_filename) add_reprensentations_and_y(args.test_datafile, dst_filepath, e1_total, e2_total, y_total) perf_dict = dict() for metric in [ 'roc_auc', 'prc_auc', 'accuracy', 'precision', 'recall', 'f1' ]: result = evaluator.compuate(metric=metric) perf_dict[metric] = result print('{}: {}'.format(metric, result)) with open(os.path.join(ROOT_PATH, 'eval_result.json'), 'w') as f: json.dump(perf_dict, f) def eval_func(atoms_1, adj_1, atoms_2, adj_2, label): sample = [ (atoms_1, adj_1), (atoms_2, adj_2), ] sample = concat_mols(sample) atoms_1, adj_1 = sample[0] atoms_2, adj_2 = sample[1] print(atoms_1, adj_1) print('shape 1:', atoms_1.shape, adj_1.shape) print('shape 2:', atoms_2.shape, adj_2.shape) pred, _ = model.predictor.predict(atoms_1, adj_1, atoms_2, adj_2) return pred calculator = IntegratedGradientsCalculator( model.predictor, steps=5, eval_fun=eval_func, target_extractor=VariableMonitorLinkHook( model.predictor.graph_conv.embed, timing='post'), device=args.gpu) M = 1 # 2. compute # saliency_samples_vanilla = calculator.compute( # test, M=1, converter=concat_mols) # saliency_samples_smooth = calculator.compute( # test, M=M, converter=concat_mols, noise_sampler=GaussianNoiseSampler()) saliency_samples_bayes = calculator.compute(test, M=M, converter=concat_mols, train=True) visualizer = SmilesVisualizer() smiles = list(pd.read_csv(args.test_datafile, index_col=0)['smiles_2']) # from IPython.display import display, HTML def sv_visualize(i, ratio, method, view): # saliency_vanilla = calculator.aggregate( # saliency_samples_vanilla, ch_axis=3, method=method) # saliency_smooth = calculator.aggregate( # saliency_samples_smooth, ch_axis=3, method=method) saliency_bayes = calculator.aggregate(saliency_samples_bayes, ch_axis=3, method=method) scaler = abs_max_scaler if view == 'view': # svg_vanilla = visualizer.visualize(saliency_vanilla[i], smiles[i], visualize_ratio=ratio, scaler=scaler) # svg_smooth = visualizer.visualize(saliency_smooth[i], smiles[i], visualize_ratio=ratio, scaler=scaler) svg_bayes = visualizer.visualize(saliency_bayes[i], smiles[i], visualize_ratio=ratio, scaler=scaler) # display(svg_bayes) elif view == 'save': if not os.path.exists('results'): os.makedirs('results') # visualizer.visualize(saliency_vanilla[i], smiles[i], visualize_ratio=ratio, scaler=scaler, # save_filepath='results/{}_vanilla.png'.format(i)) # visualizer.visualize(saliency_smooth[i], smiles[i], visualize_ratio=ratio, scaler=scaler, # save_filepath='results/{}_smooth.png'.format(i)) visualizer.visualize( saliency_bayes[i], smiles[i], visualize_ratio=ratio, scaler=scaler, save_filepath='results/{}_bayes.svg'.format(i)) print('saved {}-th result!'.format(i)) else: print(view, 'not supported') sv_visualize(i=2, ratio=0.7, method='raw', view='save')
def main(): # Parse the arguments. args = parse_arguments() if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. print('Preprocessing dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) dataset = parser.parse(args.datafile)['dataset'] # Split the dataset into training and validation. train_data_size = int(len(dataset) * args.train_data_ratio) train, val = split_dataset_random(dataset, train_data_size, args.seed) # Set up the predictor. predictor = set_up_predictor(args.method, args.unit_num, args.conv_layers, class_num) # Set up the iterator. train_iter = SerialIterator(train, args.batchsize) val_iter = SerialIterator(val, args.batchsize, repeat=False, shuffle=False) # Set up the regressor. metric_fun = { 'accuracy': F.accuracy, # 'precision': F.precision, # 'recall': F.recall, # 'F1-score': F.f1_score, } classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=F.accuracy, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam() optimizer.setup(classifier) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}, # converter=concat_mols) # Set up the trainer. print('Training...') trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(val_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend( PRCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( PRCAUCEvaluator(val_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend( E.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', # 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', 'validation/main/loss', 'validation/main/accuracy', # 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', 'elapsed_time' ])) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.run() # Save the regressor's parameters. model_path = os.path.join(args.out, args.model_filename) print('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)
def main(): # Parse the arguments. args = parse_arguments() augment = False if args.augment == 'False' else True multi_gpu = False if args.multi_gpu == 'False' else True if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and test dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args.train_datafile)['dataset'] valid = parser.parse(args.valid_datafile)['dataset'] if augment: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_valid = valid.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_valid)) if len(args.net_hidden_dims): net_hidden_dims = tuple([int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',')]) else: net_hidden_dims = () fp_attention = True if args.fp_attention else False update_attention = True if args.update_attention else False weight_tying = False if args.weight_tying == 'False' else True attention_tying = False if args.attention_tying == 'False' else True fp_batch_normalization = True if args.fp_bn == 'True' else False layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator context = False if args.context == 'False' else True output_activation = functions.relu if args.output_activation == 'relu' else None predictor = set_up_predictor(method=args.method, fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers, concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator, fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization, net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying, update_attention=update_attention, context=context, context_layers=args.context_layers, context_dropout=args.context_dropout, message_function=args.message_function, readout_function=args.readout_function, num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers, output_hidden_dim=args.output_hidden_dim, output_activation=output_activation, symmetric=args.symmetric ) train_iter = SerialIterator(train, args.batchsize) test_iter = SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args.max_norm > 0: optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=args.max_norm)) if args.l2_rate > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate)) if args.l1_rate > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate)) # Set up the updater. if multi_gpu: logging.info('Using multiple GPUs') updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}, converter=concat_mols) else: logging.info('Using single GPU') updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=30, max_trigger=(500, 'epoch')) out = 'output' + '/' + args.out trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(test_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(AccuracyEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(AccuracyEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend(PRCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(PRCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend(F1Evaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(F1Evaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args.exp_shift_strategy == 1: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50, 60], 'epoch')) elif args.exp_shift_strategy == 2: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30], 'epoch')) elif args.exp_shift_strategy == 3: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time'] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args.resume: resume_path = os.path.join(out, args.resume) logging.info('Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args.model_filename) logging.info('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)
def main(): # Parse the arguments. args = parse_arguments() if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. print('Preprocessing dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) dataset = parser.parse(args.datafile)['dataset'] # Split the dataset into training and validation. train_data_size = int(len(dataset) * args.train_data_ratio) train, val = split_dataset_random(dataset, train_data_size, args.seed) # Set up the predictor. # def set_up_predictor(method, fp_hidden_dim, fp_out_dim, conv_layers, net_hidden_num, class_num, net_layers): # predictor = set_up_predictor(args.method, args.unit_num, # args.conv_layers, class_num) if len(args.net_hidden_dims): net_hidden_dims = tuple([ int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',') ]) else: net_hidden_dims = () predictor = set_up_predictor(method=args.method, fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers, concat_hidden=args.concat_hidden, fp_dropout_rate=args.fp_dropout_rate, net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args.sim_method) # Set up the iterator. train_iter = SerialIterator(train, args.batchsize) val_iter = SerialIterator(val, args.batchsize, repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # Set up the updater. updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. print('Training...') trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( E.Evaluator(val_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend( AccuracyEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( AccuracyEvaluator(val_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(val_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend( PRCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( PRCAUCEvaluator(val_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) # trainer.extend(PrecisionEvaluator( # train_eval_iter, classifier, eval_func=predictor, # device=args.gpu, converter=concat_mols, name='train_p', # pos_labels=1, ignore_labels=-1, raise_value_error=False)) # # extension name='validation' is already used by `Evaluator`, # # instead extension name `val` is used. # trainer.extend(PrecisionEvaluator( # val_iter, classifier, eval_func=predictor, # device=args.gpu, converter=concat_mols, name='val_p', # pos_labels=1, ignore_labels=-1)) # # trainer.extend(RecallEvaluator( # train_eval_iter, classifier, eval_func=predictor, # device=args.gpu, converter=concat_mols, name='train_r', # pos_labels=1, ignore_labels=-1, raise_value_error=False)) # # extension name='validation' is already used by `Evaluator`, # # instead extension name `val` is used. # trainer.extend(RecallEvaluator( # val_iter, classifier, eval_func=predictor, # device=args.gpu, converter=concat_mols, name='val_r', # pos_labels=1, ignore_labels=-1)) trainer.extend( F1Evaluator(train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( F1Evaluator(val_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50], 'epoch')) # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time' ] trainer.extend(E.PrintReport(entries=entries)) trainer.extend(E.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) if args.resume: resume_path = os.path.join(args.out, args.resume) logging.info( 'Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(args.out, args.model_filename) print('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)