def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir): config = util.load_module(cnf).config image_files = data.get_image_files(config.get('train_dir')) names = data.get_names(image_files) labels = data.get_labels(names).astype(np.float32)[:, np.newaxis] if features_file is not None: runs = {'run': [features_file]} else: runs = data.parse_blend_config(yaml.load(open(blend_cnf))) scalers = {run: StandardScaler() for run in runs} tr, te = data.split_indices(image_files, labels) y_preds = [] for i in range(n_iter): print("iteration {} / {}".format(i + 1, n_iter)) for run, files in runs.items(): print("fitting features for run {}".format(run)) X = data.load_features(files) X = scalers[run].fit_transform(X) X = data.per_patient_reshape(X) if per_patient else X est = get_estimator(X.shape[1], image_files, labels, eval_size=0.0 if predict else 0.1) est.fit(X, labels) if not predict: y_pred = est.predict(X[te]).ravel() y_preds.append(y_pred) y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred).astype(int), np.min(labels), np.max(labels)) print("kappa after run {}, iter {}: {}".format( run, i, util.kappa(labels[te], y_pred))) print("confusion matrix") print(confusion_matrix(labels[te], y_pred)) else: X = data.load_features(files, test=True) X = scalers[run].transform(X) X = data.per_patient_reshape(X) if per_patient else X y_pred = est.predict(X).ravel() y_preds.append(y_pred) if predict: y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) submission_filename = util.get_submission_filename() image_files = data.get_image_files(test_dir or config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='image') level_column = pd.Series(y_pred, name='level') predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))
def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir): config = util.load_module(cnf).config image_files = data.get_image_files(config.get('train_dir')) names = data.get_names(image_files) labels = data.get_labels(names).astype(np.float32)[:, np.newaxis] if features_file is not None: runs = {'run': [features_file]} else: runs = data.parse_blend_config(yaml.load(open(blend_cnf))) scalers = {run: StandardScaler() for run in runs} tr, te = data.split_indices(image_files, labels) y_preds = [] for i in range(n_iter): print("iteration {} / {}".format(i + 1, n_iter)) for run, files in list(runs.items()): print("fitting features for run {}".format(run)) X = data.load_features(files) X = scalers[run].fit_transform(X) X = data.per_patient_reshape(X) if per_patient else X est = get_estimator(X.shape[1], image_files, labels, eval_size=0.0 if predict else 0.1) est.fit(X, labels) if not predict: y_pred = est.predict(X[te]).ravel() y_preds.append(y_pred) y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred).astype(int), np.min(labels), np.max(labels)) print("kappa after run {}, iter {}: {}".format( run, i, util.kappa(labels[te], y_pred))) print("confusion matrix") print(confusion_matrix(labels[te], y_pred)) else: X = data.load_features(files, test=True) X = scalers[run].transform(X) X = data.per_patient_reshape(X) if per_patient else X y_pred = est.predict(X).ravel() y_preds.append(y_pred) if predict: y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) submission_filename = util.get_submission_filename() image_files = data.get_image_files(test_dir or config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='image') level_column = pd.Series(y_pred, name='level') predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))
def split(X, y, eval_size): if eval_size: tr, te = data.split_indices(files, labels, eval_size) return X[tr], X[te], y[tr], y[te] else: return X, X[len(X):], y, y[len(y):]
#def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir): config = util.load_module(cnf).config image_files = data.get_image_files(config.get('train_dir')) names = data.get_names(image_files) labels = data.get_labels(names).astype(np.float32)[:, np.newaxis] if features_file is not None: runs = {'run': [features_file]} else: runs = data.parse_blend_config(yaml.load(open(blend_cnf))) scalers = {run: StandardScaler() for run in runs} tr, te = data.split_indices(image_files, labels) y_preds = [] for i in range(n_iter): print("iteration {} / {}".format(i + 1, n_iter)) for run, files in runs.items(): print("fitting features for run {}".format(run)) X = data.load_features(files) X = scalers[run].fit_transform(X) X = data.per_patient_reshape(X) if per_patient else X est = get_estimator(X.shape[1], image_files, labels, eval_size=0.0 if predict else 0.1) est.fit(X, labels) if not predict: y_pred = est.predict(X[te]).ravel() y_preds.append(y_pred)