Ejemplo n.º 1
0
def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir):

    config = util.load_module(cnf).config
    image_files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(image_files)
    labels = data.get_labels(names).astype(np.float32)[:, np.newaxis]

    if features_file is not None:
        runs = {'run': [features_file]}
    else:
        runs = data.parse_blend_config(yaml.load(open(blend_cnf)))

    scalers = {run: StandardScaler() for run in runs}

    tr, te = data.split_indices(image_files, labels)

    y_preds = []
    for i in range(n_iter):
        print("iteration {} / {}".format(i + 1, n_iter))
        for run, files in runs.items():
            print("fitting features for run {}".format(run))
            X = data.load_features(files)
            X = scalers[run].fit_transform(X)
            X = data.per_patient_reshape(X) if per_patient else X
            est = get_estimator(X.shape[1], image_files, labels,
                                eval_size=0.0 if predict else 0.1)
            est.fit(X, labels)
            if not predict:
                y_pred = est.predict(X[te]).ravel()
                y_preds.append(y_pred)
                y_pred = np.mean(y_preds, axis=0)
                y_pred = np.clip(np.round(y_pred).astype(int),
                                 np.min(labels), np.max(labels))
                print("kappa after run {}, iter {}: {}".format(
                    run, i, util.kappa(labels[te], y_pred)))
                print("confusion matrix")
                print(confusion_matrix(labels[te], y_pred))
            else:
                X = data.load_features(files, test=True)
                X = scalers[run].transform(X)
                X = data.per_patient_reshape(X) if per_patient else X
                y_pred = est.predict(X).ravel()
                y_preds.append(y_pred)

    if predict:
        y_pred = np.mean(y_preds, axis=0)
        y_pred = np.clip(np.round(y_pred),
                         np.min(labels), np.max(labels)).astype(int)
        submission_filename = util.get_submission_filename()
        image_files = data.get_image_files(test_dir or config.get('test_dir'))
        names = data.get_names(image_files)
        image_column = pd.Series(names, name='image')
        level_column = pd.Series(y_pred, name='level')
        predictions = pd.concat([image_column, level_column], axis=1)

        print("tail of predictions file")
        print(predictions.tail())

        predictions.to_csv(submission_filename, index=False)
        print("saved predictions to {}".format(submission_filename))
def main(cnf, classes, weights_from, predict):

    config = util.load_module(cnf).config
    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    names = [int(x) for x in names ]
    data.classes = int(classes)
    labels = data.get_labels(names)
    net = create_net(config)
    
    print files.shape
    print labels.shape
    if predict : 
    	if weights_from is None:
        	weights_from = config.weights_file
    	else:
        	weights_from = str(weights_from)
	print weights_from    
    	try:
        	net.load_params_from(weights_from)
        	print("loaded weights from {}".format(weights_from))
    	except IOError:
        	print("couldn't load weights starting from scratch")
    if not predict:
    	print("fitting ...")
    	net.fit(files, labels)
    else:
	print("predicting ...")
    	test_files = data.get_image_files(config.get('test_dir'))
    	y_pred = net.predict(test_files)
	y_pred = y_pred.transpose()
	print y_pred
        y_pred = np.clip(np.round(y_pred),
                         np.min(labels), np.max(labels)).astype(int)
        #print y_pred
	submission_filename = util.get_submission_filename()
        image_files = data.get_image_files(config.get('test_dir'))
        names = data.get_names(image_files)
        image_column = pd.Series(names, name='photo_id')
        level_column = pd.DataFrame(y_pred)#name='labels')
	level_column = level_column.apply(lambda x : string_submit(x))        
        predictions = pd.concat([image_column, level_column], axis=1)
        print("tail of predictions file")
        print(predictions.tail())
	predictions.columns = ['photo_id', 'labels']
        predictions.to_csv(submission_filename, index=False)
        print("saved predictions to {}".format(submission_filename))
Ejemplo n.º 3
0
def transform(cnf, n_iter, skip, test, train, weights_from, test_dir):

    config = util.load_module(cnf).config

    runs = {}
    if train:
        runs["train"] = config.get("train_dir")
    if test or test_dir:
        runs["test"] = test_dir or config.get("test_dir")

    net = nn.create_net(config)

    if weights_from is None:
        net.load_params_from(config.weights_file)
        print("loaded weights from {}".format(config.weights_file))
    else:
        weights_from = str(weights_from)
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))

    if n_iter > 1:
        tfs, color_vecs = tta.build_quasirandom_transforms(
            n_iter, skip=skip, color_sigma=config.cnf["sigma"], **config.cnf["aug_params"]
        )
    else:
        tfs, color_vecs = tta.build_quasirandom_transforms(
            n_iter, skip=skip, color_sigma=0.0, **data.no_augmentation_params
        )

    for run, directory in sorted(runs.items(), reverse=True):

        print("extracting features for files in {}".format(directory))
        tic = time.time()

        files = data.get_image_files(directory)

        Xs, Xs2 = None, None

        for i, (tf, color_vec) in enumerate(zip(tfs, color_vecs), start=1):

            print("{} transform iter {}".format(run, i))

            X = net.transform(files, transform=tf, color_vec=color_vec)
            if Xs is None:
                Xs = X
                Xs2 = X ** 2
            else:
                Xs += X
                Xs2 += X ** 2

            print("took {:6.1f} seconds".format(time.time() - tic))
            if i % 5 == 0 or n_iter < 5:
                std = np.sqrt((Xs2 - Xs ** 2 / i) / (i - 1))
                config.save_features(Xs / i, i, skip=skip, test=True if run == "test" else False)
                config.save_std(std, i, skip=skip, test=True if run == "test" else False)
                print("saved {} iterations".format(i))
def main(directory):

    filenames = data.get_image_files(directory)

    bs = 1000
    batches = [filenames[i * bs : (i + 1) * bs] 
               for i in range(int(len(filenames) / bs) + 1)]

    Us, evs = [], []
    for batch in batches:
        images = np.array([data.load_augment(f, 256, 256) for f in batch])
        X = images.transpose(0, 2, 3, 1).reshape(-1, 3)
        cov = np.dot(X.T, X) / X.shape[0]
        U, S, V = np.linalg.svd(cov)
        ev = np.sqrt(S)
        Us.append(U)
        evs.append(ev)

    print('U')
    print(np.mean(Us, axis=0))
    print('eigenvalues')
    print(np.mean(evs, axis=0))
Ejemplo n.º 5
0
def main(cnf, weights_from):

    config = util.load_module(cnf).config

    if weights_from is None:
        weights_from = config.weights_file
    else:
        weights_from = str(weights_from)

    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    labels = data.get_labels(names).astype(np.float32)

    net = create_net(config)

    try:
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))
    except IOError:
        print("couldn't load weights starting from scratch")

    print("fitting ...")
    net.fit(files, labels)
Ejemplo n.º 6
0
def fit(cnf, exp_run_folder, classifier, features_file, n_iter, blend_cnf,
        test_dir, fold):

    config = util.load_module(cnf).config
    config.cnf[
        'fold'] = fold  # <-- used to change the directories for weights_best, weights_epoch and weights_final
    config.cnf['exp_run_folder'] = exp_run_folder

    folds = yaml.load(open('folds/' + data.settings['protocol'] + '.yml'))
    f0, f1 = fold.split('x')
    train_list = folds['Fold_' + f0][int(f1) - 1]
    test_list = folds['Fold_' + f0][0 if f1 == '2' else 1]

    image_files = data.get_image_files(config.get('train_dir'), train_list)
    names = data.get_names(image_files)
    labels = data.get_labels(names,
                             label_file='folds/' + data.settings['protocol'] +
                             '.csv').astype(np.int32)[:, np.newaxis]

    if features_file is not None:
        runs = {'run': [features_file]}
    else:
        runs = {
            run: [
                os.path.join(exp_run_folder + '/data/features', f)
                for f in files
            ]
            for run, files in yaml.load(open(blend_cnf)).items()
        }

    scalers = {run: StandardScaler() for run in runs}

    y_preds = []
    y_preds_proba = []
    for i in range(n_iter):
        print("iteration {} / {}".format(i + 1, n_iter))
        for run, files in runs.items():
            files = [
                f.replace('f0xf1.npy', '{}.npy'.format(fold)) for f in files
            ]

            if classifier is None:
                X_test = data.load_features(files, test=True)
                if data.settings['protocol'] != 'protocol3':
                    y_pred_proba = X_test
                    y_proba = []
                    for i in range(0, len(X_test)):
                        y_proba.append(
                            y_pred_proba[i][1])  #using score from the positive
                    y_pred = np.clip(np.round(y_proba), 0, 1).astype(int)
                else:
                    y_pred_proba = est.predict_proba(X)
            else:
                print("fitting features for run {}".format(run))
                X_train = data.load_features(files)
                l2Norm = np.linalg.norm(X_train, axis=1)
                X_train = np.divide(X_train.T, l2Norm).T
                est = estimator(data.settings['protocol'],
                                classifier,
                                X_train.shape[1],
                                image_files,
                                X_train,
                                labels,
                                run,
                                fold,
                                eval_size=0.1)
                open(
                    exp_run_folder +
                    "/best_estimator_fold_{}.txt".format(fold),
                    "w").write(str(est))
                X_test = data.load_features(files, test=True)
                l2Norm = np.linalg.norm(X_test, axis=1)
                X_test = np.divide(X_test.T, l2Norm).T
                if data.settings['protocol'] != 'protocol3':
                    y_pred = est.predict(X_test).ravel()
                    y_pred_proba = est.predict_proba(X_test).ravel()
                    y_proba = []
                    for i in range(0, 2 * len(X_test), 2):
                        y_proba.append(
                            y_pred_proba[i +
                                         1])  #using score from the positive
                else:
                    y_pred_binary = est.predict(X_test)
                    y_pred = preprocessing.LabelBinarizer().fit([0, 1, 2])
                    y_pred = y_pred.inverse_transform(y_pred_binary)
                    y_proba = est.predict_proba(X_test)

    image_files = data.get_image_files(test_dir or config.get('test_dir'),
                                       test_list)
    names = data.get_names(image_files)
    labels = data.get_labels(
        names, label_file='folds/' + data.settings['protocol'] +
        '.csv').astype(np.int32)[:, np.newaxis]  # , per_patient=per_patient

    image_column = pd.Series(names, name='image')
    labels_column = pd.Series(np.squeeze(labels), name='true')

    level_column = pd.Series(y_pred, name='pred')
    if data.settings['protocol'] != 'protocol3':
        proba_column = pd.Series(y_proba, name='proba')
        predictions = pd.concat(
            [image_column, labels_column, level_column, proba_column], axis=1)
    else:
        proba_label_0 = pd.Series(y_proba[:, 0], name='proba_label_0')
        proba_label_1 = pd.Series(y_proba[:, 1], name='proba_label_1')
        proba_label_2 = pd.Series(y_proba[:, 2], name='proba_label_2')
        predictions = pd.concat([
            image_column, labels_column, level_column, proba_label_0,
            proba_label_1, proba_label_2
        ],
                                axis=1)

    predictions.to_csv(exp_run_folder +
                       "/ranked_list_fold_{}.csv".format(fold),
                       sep=';')

    print("tail of predictions")
    print(predictions.tail())
    acc = len(filter(lambda
                     (l, y): l == y, zip(labels, y_pred))) / float(len(labels))
    print("accuracy: {}".format(acc))
    print("confusion matrix")
    print(confusion_matrix(labels, y_pred))

    if data.settings['protocol'] != 'protocol3':
        auc = calc_auc(y_proba, labels, exp_run_folder, classifier, fold)
        print("AUC: {}".format(auc))
        average_precision = average_precision_score(labels, y_proba)
        print("average precision: {}".format(average_precision))
        c_matrix = confusion_matrix(labels, y_pred)
        print("sensitivity: {}".format(c_matrix[1][1] /
                                       (c_matrix[1][1] + c_matrix[0][1])))
        print("specificity: {}".format(c_matrix[0][0] /
                                       (c_matrix[0][0] + c_matrix[1][0])))
    else:
        y_test = label_binarize(labels, classes=[0, 1, 2])
        auc = roc_auc_score(y_test, y_proba, average='macro')
        print("AUC: {}".format(auc))
        average_precision = average_precision_score(y_test,
                                                    y_proba,
                                                    average="macro")
        print("mean average precision: {}".format(average_precision))

    results = pd.concat([
        pd.Series(exp_run_folder, name='folder'),
        pd.Series(fold, name='fold'),
        pd.Series(auc, name='auc'),
        pd.Series(average_precision, name='ap'),
        pd.Series(acc, name='acc')
    ],
                        axis=1)
    with open('results.csv', 'a') as f:
        results.to_csv(f, header=False)
Ejemplo n.º 7
0
def transform(cnf, n_iter, skip, test, train, weights_from,  test_dir):

    config = util.load_module(cnf).config

    config.cnf['batch_size_train'] = 128
    config.cnf['batch_size_test'] = 128

    runs = {}
    if train:
        runs['train'] = config.get('train_dir')
    if test or test_dir:
        runs['test'] = test_dir or config.get('test_dir')

    net = nn.create_net(config)

    if weights_from is None:
        net.load_params_from(config.weights_file)
        print("loaded weights from {}".format(config.weights_file))
    else:
        weights_from = str(weights_from)
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))

    if n_iter > 1:
        tfs, color_vecs = tta.build_quasirandom_transforms(
                n_iter, skip=skip, color_sigma=config.cnf['sigma'],
                **config.cnf['aug_params'])
    else:
        tfs, color_vecs = tta.build_quasirandom_transforms(
               n_iter, skip=skip, color_sigma=0.0,
                **data.no_augmentation_params)

    for run, directory in sorted(runs.items(), reverse=True):

        print("extracting features for files in {}".format(directory))
        tic = time.time()

        files = data.get_image_files(directory)

        Xs, Xs2 = None, None

        for i, (tf, color_vec) in enumerate(zip(tfs, color_vecs), start=1):

            print("{} transform iter {}".format(run, i))

            X = net.transform(files, transform=tf, color_vec=color_vec)
            if Xs is None:
                Xs = X
                Xs2 = X**2
            else:
                Xs += X
                Xs2 += X**2

            print('took {:6.1f} seconds'.format(time.time() - tic))
            if i % 10 == 0 or n_iter < 5:
                std = np.sqrt((Xs2 - Xs**2 / i) / (i - 1))
            if i % 50 == 0:
                config.save_features(Xs / i, i, skip=skip,
                                     test=True if run == 'test' else False)
                config.save_std(std, i, skip=skip,
                               test=True if run == 'test' else False)
                print('saved {} iterations'.format(i))