help= 'input file with the test data, in isbi challenge format (default=stdout).' ) parser.add_argument( '--output_metrics', type=str, help='input file with the test data, in text format (default=stdout).') parser.add_argument( '--pool_by_id', type=str, default='none', help= 'pool answers of contiguous identical ids: none (default), avg, max, xtrm') FLAGS = parser.parse_args() first = start = su.print_and_time('Reading trained model...', file=sys.stderr) model_file = open(FLAGS.input_model, 'rb') preprocessor = pickle.load(model_file) classifier_m = pickle.load(model_file) classifier_k = pickle.load(model_file) model_file.close() start = su.print_and_time('Reading test data...', past=start, file=sys.stderr) image_ids, labels, features = su.read_pickled_data(FLAGS.input_test) num_samples = len(image_ids) start = su.print_and_time('Preprocessing test data...', file=sys.stderr) features = preprocessor.transform(features) # "Probabilities" should come between quotes here
if not FLAGS.svm_method in valid_svm_methods : print('--svm_method must be one of ', ', '.join(valid_svm_methods), file=sys.stderr) sys.exit(1) SVM_LINEAR = FLAGS.svm_method == 'LINEAR_DUAL' or FLAGS.svm_method == 'LINEAR_PRIMAL' SVM_DUAL = FLAGS.svm_method == 'LINEAR_DUAL' SVM_MAX_ITER = FLAGS.max_iter_svm HYPER_MAX_ITER = FLAGS.max_iter_hyper HYPER_JOBS = FLAGS.jobs valid_preprocesses = [ 'PCA', 'PCA_WHITEN', 'Z_SCORE', 'NONE' ] if not FLAGS.preprocess in valid_preprocesses : print('--preprocess must be one of ', ' '.join(valid_preprocesses), file=sys.stderr) sys.exit(1) first = start = su.print_and_time('Reading training data...', file=sys.stderr) ids, labels, features = su.read_pickled_data(FLAGS.input_training) start = su.print_and_time('', past=start, file=sys.stderr) num_samples = len(ids) min_gamma = np.floor(np.log2(1.0/num_samples)) - 4.0 max_gamma = min(3.0, min_gamma+32.0) scale_gamma = max_gamma-min_gamma print('\tSamples: ', num_samples, file=sys.stderr) if not SVM_LINEAR : print('\tGamma: ', min_gamma, min_gamma+scale_gamma, file=sys.stderr) start = su.print_and_time('Training preprocessor...', file=sys.stderr) if FLAGS.preprocess == 'PCA' :
def main(): valid_svm_methods = ['RBF', 'LINEAR_DUAL', 'LINEAR_PRIMAL'] if not FLAGS.svm_method in valid_svm_methods: print('--svm_method must be one of ', ', '.join(valid_svm_methods), file=sys.stderr) sys.exit(1) SVM_LINEAR = FLAGS.svm_method == 'LINEAR_DUAL' or FLAGS.svm_method == 'LINEAR_PRIMAL' SVM_DUAL = FLAGS.svm_method == 'LINEAR_DUAL' SVM_MAX_ITER = FLAGS.max_iter_svm HYPER_MAX_ITER = FLAGS.max_iter_hyper HYPER_JOBS = FLAGS.jobs valid_preprocesses = ['PCA', 'PCA_WHITEN', 'Z_SCORE', 'NONE'] if not FLAGS.preprocess in valid_preprocesses: print('--preprocess must be one of ', ' '.join(valid_preprocesses), file=sys.stderr) sys.exit(1) first = start = su.print_and_time('Reading training data...', file=sys.stderr) ids, labels, features = su.read_pickled_data(FLAGS.input_training) start = su.print_and_time('', past=start, file=sys.stderr) num_samples = len(ids) min_gamma = np.floor(np.log2(1.0 / num_samples)) - 4.0 max_gamma = min(3.0, min_gamma + 32.0) scale_gamma = max_gamma - min_gamma print('\tSamples: ', num_samples, file=sys.stderr) if not SVM_LINEAR: print('\tGamma: ', min_gamma, min_gamma + scale_gamma, file=sys.stderr) start = su.print_and_time('Training preprocessor...', file=sys.stderr) if FLAGS.preprocess == 'PCA': preprocessor = sk.decomposition.PCA(copy=False, whiten=False) elif FLAGS.preprocess == 'PCA_WHITEN': preprocessor = sk.decomposition.PCA(copy=False, whiten=True) elif FLAGS.preprocess == 'Z_SCORE': preprocessor = sk.preprocessing.StandardScaler(copy=False) elif FLAGS.preprocess == 'NONE': # func=None implies identity function preprocessor = sk.preprocessing.FunctionTransformer( func=None, inverse_func=None, validate=False, accept_sparse=False, pass_y=False, kw_args=None, inv_kw_args=None) else: assert False, '(bug) Invalid value for FLAGS.preprocess: %s' % FLAGS.preprocess features = preprocessor.fit_transform(features) group_msg = 'ungrouped' if FLAGS.no_group else 'grouped' start = su.print_and_time( '====================\nTraining melanoma classifier (%s)...\n' % group_msg, past=start, file=sys.stderr) classifier, tuning = su.new_classifier(linear=SVM_LINEAR, dual=SVM_DUAL, max_iter=SVM_MAX_ITER, min_gamma=min_gamma, scale_gamma=scale_gamma) classifier_m = su.hyperoptimizer(classifier, tuning, max_iter=HYPER_MAX_ITER, n_jobs=HYPER_JOBS, group=not FLAGS.no_group) classifier_m.fit(features, (labels == 1).astype(np.int), groups=None if FLAGS.no_group else ids) print('Best params:', classifier_m.best_params_, file=sys.stderr) print('...', classifier_m.best_params_, end='', file=sys.stderr) start = su.print_and_time( '====================\nTraining keratosis classifier (%s)...\n' % group_msg, past=start, file=sys.stderr) classifier, tuning = su.new_classifier(linear=SVM_LINEAR, dual=SVM_DUAL, max_iter=SVM_MAX_ITER, min_gamma=min_gamma, scale_gamma=scale_gamma) classifier_k = su.hyperoptimizer(classifier, tuning, max_iter=HYPER_MAX_ITER, n_jobs=HYPER_JOBS, group=not FLAGS.no_group) classifier_k.fit(features, (labels == 2).astype(np.int), groups=None if FLAGS.no_group else ids) print('Best params:', classifier_k.best_params_, file=sys.stderr) print('...', classifier_k.best_params_, end='', file=sys.stderr) start = su.print_and_time('====================\nWriting model...', past=start, file=sys.stderr) model_file = open(FLAGS.output_model, 'wb') pickle.dump(preprocessor, model_file) pickle.dump(classifier_m, model_file) pickle.dump(classifier_k, model_file) pickle.dump(FLAGS, model_file) model_file.close() print('\n Total time ', end='', file=sys.stderr) _ = su.print_and_time('Done!\n', past=first, file=sys.stderr)
def main(): first = start = su.print_and_time('Reading trained model...', file=sys.stderr) model_file = open(FLAGS.input_model, 'rb') preprocessor = pickle.load(model_file) classifier_m = pickle.load(model_file) classifier_k = pickle.load(model_file) model_file.close() start = su.print_and_time('Reading test data...', past=start, file=sys.stderr) image_ids, labels, features = su.read_pickled_data(FLAGS.input_test) num_samples = len(image_ids) start = su.print_and_time('Preprocessing test data...', file=sys.stderr) features = preprocessor.transform(features) # "Probabilities" should come between quotes here # Only if the scores are true logits the probabilities will be consistent def probability_from_logits(logits): odds = np.exp(logits) return odds / (odds + 1.0) def logits_from_probability(prob): with np.errstate(divide='ignore'): odds = prob / (1.0 - prob) return np.log(odds) def extreme_probability(prob): return prob[np.argmax(np.abs(logits_from_probability(prob)))] start = su.print_and_time('Predicting test data...\n', past=start, file=sys.stderr) predictions_m = probability_from_logits( classifier_m.decision_function(features)) predictions_k = probability_from_logits( classifier_k.decision_function(features)) outfile = open(FLAGS.output_file, 'wt') if FLAGS.output_file else sys.stdout if FLAGS.pool_by_id == 'none': for i in range(num_samples): print(image_ids[i], predictions_m[i], predictions_k[i], sep=',', file=outfile) else: previous_id = None def print_result(): if FLAGS.pool_by_id == 'avg': print(previous_id, np.mean(all_m), np.mean(all_k), sep=',', file=outfile) elif FLAGS.pool_by_id == 'max': print(previous_id, np.amax(all_m), np.amax(all_k), sep=',', file=outfile) elif FLAGS.pool_by_id == 'xtrm': print(previous_id, extreme_probability(all_m), extreme_probability(all_k), sep=',', file=outfile) else: raise ValueError('Invalid value for FLAGS.pool_by_id: %s' % FLAGS.pool_by_id) for i in range(num_samples): if image_ids[i] != previous_id: if previous_id is not None: print_result() previous_id = image_ids[i] all_m = np.asarray([predictions_m[i]]) all_k = np.asarray([predictions_k[i]]) else: all_m = np.concatenate((all_m, np.asarray([predictions_m[i]]))) all_k = np.concatenate((all_k, np.asarray([predictions_k[i]]))) if previous_id is not None: print_result() metfile = open(FLAGS.metrics_file, 'wt') if FLAGS.metrics_file else sys.stderr try: accs = [] aucs = [] mAPs = [] for j, scores_j in [[1, predictions_m], [2, predictions_k]]: labels_j = (labels == j).astype(np.int) acc = sk.metrics.accuracy_score(labels, scores_j.astype(np.int)) print('Acc: ', acc, file=metfile) accs.append(acc) auc = sk.metrics.roc_auc_score(labels_j, scores_j) aucs.append(auc) print('AUC[%d]: ' % j, auc, file=metfile) mAP = sk.metrics.average_precision_score(labels_j, scores_j) mAPs.append(mAP) print('mAP[%d]: ' % j, mAP, file=metfile) print('Acc_avg: ', sum(accs) / 2.0, file=metfile) print('AUC_avg: ', sum(aucs) / 2.0, file=metfile) print('mAP_avg: ', sum(mAPs) / 2.0, file=metfile) except ValueError: pass print('\n Total time ', end='', file=sys.stderr) _ = su.print_and_time('Done!\n', past=first, file=sys.stderr)
sys.exit(1) SVM_LINEAR = FLAGS.svm_method == 'LINEAR_DUAL' or FLAGS.svm_method == 'LINEAR_PRIMAL' SVM_DUAL = FLAGS.svm_method == 'LINEAR_DUAL' SVM_MAX_ITER = FLAGS.max_iter_svm HYPER_MAX_ITER = FLAGS.max_iter_hyper HYPER_JOBS = FLAGS.jobs valid_preprocesses = ['PCA', 'PCA_WHITEN', 'Z_SCORE', 'NONE'] if not FLAGS.preprocess in valid_preprocesses: print('--preprocess must be one of ', ' '.join(valid_preprocesses), file=sys.stderr) sys.exit(1) first = start = su.print_and_time('Reading training data...', file=sys.stderr) ids, labels, features = su.read_pickled_data(FLAGS.input_training) start = su.print_and_time('', past=start, file=sys.stderr) num_samples = len(ids) min_gamma = np.floor(np.log2(1.0 / num_samples)) - 4.0 max_gamma = min(3.0, min_gamma + 32.0) scale_gamma = max_gamma - min_gamma print('\tSamples: ', num_samples, file=sys.stderr) if not SVM_LINEAR: print('\tGamma: ', min_gamma, min_gamma + scale_gamma, file=sys.stderr) start = su.print_and_time('Training preprocessor...', file=sys.stderr) if FLAGS.preprocess == 'PCA': preprocessor = sk.decomposition.PCA(copy=False, whiten=False)