def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir): config = util.load_module(cnf).config image_files = data.get_image_files(config.get('train_dir')) names = data.get_names(image_files) labels = data.get_labels(names).astype(np.float32)[:, np.newaxis] if features_file is not None: runs = {'run': [features_file]} else: runs = data.parse_blend_config(yaml.load(open(blend_cnf))) scalers = {run: StandardScaler() for run in runs} tr, te = data.split_indices(image_files, labels) y_preds = [] for i in range(n_iter): print("iteration {} / {}".format(i + 1, n_iter)) for run, files in runs.items(): print("fitting features for run {}".format(run)) X = data.load_features(files) X = scalers[run].fit_transform(X) X = data.per_patient_reshape(X) if per_patient else X est = get_estimator(X.shape[1], image_files, labels, eval_size=0.0 if predict else 0.1) est.fit(X, labels) if not predict: y_pred = est.predict(X[te]).ravel() y_preds.append(y_pred) y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred).astype(int), np.min(labels), np.max(labels)) print("kappa after run {}, iter {}: {}".format( run, i, util.kappa(labels[te], y_pred))) print("confusion matrix") print(confusion_matrix(labels[te], y_pred)) else: X = data.load_features(files, test=True) X = scalers[run].transform(X) X = data.per_patient_reshape(X) if per_patient else X y_pred = est.predict(X).ravel() y_preds.append(y_pred) if predict: y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) submission_filename = util.get_submission_filename() image_files = data.get_image_files(test_dir or config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='image') level_column = pd.Series(y_pred, name='level') predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))
def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir): config = util.load_module(cnf).config image_files = data.get_image_files(config.get('train_dir')) names = data.get_names(image_files) labels = data.get_labels(names).astype(np.float32)[:, np.newaxis] if features_file is not None: runs = {'run': [features_file]} else: runs = data.parse_blend_config(yaml.load(open(blend_cnf))) scalers = {run: StandardScaler() for run in runs} tr, te = data.split_indices(image_files, labels) y_preds = [] for i in range(n_iter): print("iteration {} / {}".format(i + 1, n_iter)) for run, files in list(runs.items()): print("fitting features for run {}".format(run)) X = data.load_features(files) X = scalers[run].fit_transform(X) X = data.per_patient_reshape(X) if per_patient else X est = get_estimator(X.shape[1], image_files, labels, eval_size=0.0 if predict else 0.1) est.fit(X, labels) if not predict: y_pred = est.predict(X[te]).ravel() y_preds.append(y_pred) y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred).astype(int), np.min(labels), np.max(labels)) print("kappa after run {}, iter {}: {}".format( run, i, util.kappa(labels[te], y_pred))) print("confusion matrix") print(confusion_matrix(labels[te], y_pred)) else: X = data.load_features(files, test=True) X = scalers[run].transform(X) X = data.per_patient_reshape(X) if per_patient else X y_pred = est.predict(X).ravel() y_preds.append(y_pred) if predict: y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) submission_filename = util.get_submission_filename() image_files = data.get_image_files(test_dir or config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='image') level_column = pd.Series(y_pred, name='level') predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))
def main(cnf, classes, weights_from, predict): config = util.load_module(cnf).config files = data.get_image_files(config.get('train_dir')) names = data.get_names(files) names = [int(x) for x in names ] data.classes = int(classes) labels = data.get_labels(names) net = create_net(config) print files.shape print labels.shape if predict : if weights_from is None: weights_from = config.weights_file else: weights_from = str(weights_from) print weights_from try: net.load_params_from(weights_from) print("loaded weights from {}".format(weights_from)) except IOError: print("couldn't load weights starting from scratch") if not predict: print("fitting ...") net.fit(files, labels) else: print("predicting ...") test_files = data.get_image_files(config.get('test_dir')) y_pred = net.predict(test_files) y_pred = y_pred.transpose() print y_pred y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) #print y_pred submission_filename = util.get_submission_filename() image_files = data.get_image_files(config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='photo_id') level_column = pd.DataFrame(y_pred)#name='labels') level_column = level_column.apply(lambda x : string_submit(x)) predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.columns = ['photo_id', 'labels'] predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))
def main(cnf, classes, weights_from, predict): config = util.load_module(cnf).config files = data.get_image_files(config.get('train_dir')) names = data.get_names(files) names = [int(x) for x in names] data.classes = int(classes) labels = data.get_labels(names) net = create_net(config) print files.shape print labels.shape if predict: if weights_from is None: weights_from = config.weights_file else: weights_from = str(weights_from) print weights_from try: net.load_params_from(weights_from) print("loaded weights from {}".format(weights_from)) except IOError: print("couldn't load weights starting from scratch") if not predict: print("fitting ...") net.fit(files, labels) else: print("predicting ...") test_files = data.get_image_files(config.get('test_dir')) y_pred = net.predict(test_files) y_pred = y_pred.transpose() print y_pred y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) #print y_pred submission_filename = util.get_submission_filename() image_files = data.get_image_files(config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='photo_id') level_column = pd.DataFrame(y_pred) #name='labels') level_column = level_column.apply(lambda x: string_submit(x)) predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.columns = ['photo_id', 'labels'] predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))
print("kappa after run {}, iter {}: {}".format( run, i, util.kappa(labels[te], y_pred))) print("confusion matrix") print(confusion_matrix(labels[te], y_pred)) else: X = data.load_features(files, test=True) X = np.nan_to_num(X) X = scalers[run].transform(X) X = data.per_patient_reshape(X) if per_patient else X y_pred = est.predict(X).ravel() y_preds.append(y_pred) if predict: y_pred = np.mean(y_preds, axis=0) y_pred = np.clip(np.round(y_pred), np.min(labels), np.max(labels)).astype(int) submission_filename = util.get_submission_filename() image_files = data.get_image_files(test_dir or config.get('test_dir')) names = data.get_names(image_files) image_column = pd.Series(names, name='image') level_column = pd.Series(y_pred, name='level') predictions = pd.concat([image_column, level_column], axis=1) print("tail of predictions file") print(predictions.tail()) predictions.to_csv(submission_filename, index=False) print("saved predictions to {}".format(submission_filename))