Example #1
0
def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir):

    config = util.load_module(cnf).config
    image_files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(image_files)
    labels = data.get_labels(names).astype(np.float32)[:, np.newaxis]

    if features_file is not None:
        runs = {'run': [features_file]}
    else:
        runs = data.parse_blend_config(yaml.load(open(blend_cnf)))

    scalers = {run: StandardScaler() for run in runs}

    tr, te = data.split_indices(image_files, labels)

    y_preds = []
    for i in range(n_iter):
        print("iteration {} / {}".format(i + 1, n_iter))
        for run, files in runs.items():
            print("fitting features for run {}".format(run))
            X = data.load_features(files)
            X = scalers[run].fit_transform(X)
            X = data.per_patient_reshape(X) if per_patient else X
            est = get_estimator(X.shape[1], image_files, labels,
                                eval_size=0.0 if predict else 0.1)
            est.fit(X, labels)
            if not predict:
                y_pred = est.predict(X[te]).ravel()
                y_preds.append(y_pred)
                y_pred = np.mean(y_preds, axis=0)
                y_pred = np.clip(np.round(y_pred).astype(int),
                                 np.min(labels), np.max(labels))
                print("kappa after run {}, iter {}: {}".format(
                    run, i, util.kappa(labels[te], y_pred)))
                print("confusion matrix")
                print(confusion_matrix(labels[te], y_pred))
            else:
                X = data.load_features(files, test=True)
                X = scalers[run].transform(X)
                X = data.per_patient_reshape(X) if per_patient else X
                y_pred = est.predict(X).ravel()
                y_preds.append(y_pred)

    if predict:
        y_pred = np.mean(y_preds, axis=0)
        y_pred = np.clip(np.round(y_pred),
                         np.min(labels), np.max(labels)).astype(int)
        submission_filename = util.get_submission_filename()
        image_files = data.get_image_files(test_dir or config.get('test_dir'))
        names = data.get_names(image_files)
        image_column = pd.Series(names, name='image')
        level_column = pd.Series(y_pred, name='level')
        predictions = pd.concat([image_column, level_column], axis=1)

        print("tail of predictions file")
        print(predictions.tail())

        predictions.to_csv(submission_filename, index=False)
        print("saved predictions to {}".format(submission_filename))
Example #2
0
def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir):

    config = util.load_module(cnf).config
    image_files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(image_files)
    labels = data.get_labels(names).astype(np.float32)[:, np.newaxis]

    if features_file is not None:
        runs = {'run': [features_file]}
    else:
        runs = data.parse_blend_config(yaml.load(open(blend_cnf)))

    scalers = {run: StandardScaler() for run in runs}

    tr, te = data.split_indices(image_files, labels)

    y_preds = []
    for i in range(n_iter):
        print("iteration {} / {}".format(i + 1, n_iter))
        for run, files in list(runs.items()):
            print("fitting features for run {}".format(run))
            X = data.load_features(files)
            X = scalers[run].fit_transform(X)
            X = data.per_patient_reshape(X) if per_patient else X
            est = get_estimator(X.shape[1], image_files, labels,
                                eval_size=0.0 if predict else 0.1)
            est.fit(X, labels)
            if not predict:
                y_pred = est.predict(X[te]).ravel()
                y_preds.append(y_pred)
                y_pred = np.mean(y_preds, axis=0)
                y_pred = np.clip(np.round(y_pred).astype(int),
                                 np.min(labels), np.max(labels))
                print("kappa after run {}, iter {}: {}".format(
                    run, i, util.kappa(labels[te], y_pred)))
                print("confusion matrix")
                print(confusion_matrix(labels[te], y_pred))
            else:
                X = data.load_features(files, test=True)
                X = scalers[run].transform(X)
                X = data.per_patient_reshape(X) if per_patient else X
                y_pred = est.predict(X).ravel()
                y_preds.append(y_pred)

    if predict:
        y_pred = np.mean(y_preds, axis=0)
        y_pred = np.clip(np.round(y_pred),
                         np.min(labels), np.max(labels)).astype(int)
        submission_filename = util.get_submission_filename()
        image_files = data.get_image_files(test_dir or config.get('test_dir'))
        names = data.get_names(image_files)
        image_column = pd.Series(names, name='image')
        level_column = pd.Series(y_pred, name='level')
        predictions = pd.concat([image_column, level_column], axis=1)

        print("tail of predictions file")
        print(predictions.tail())

        predictions.to_csv(submission_filename, index=False)
        print("saved predictions to {}".format(submission_filename))
Example #3
0
 def process_post(self, post):
     """
     Process post received from the message queue.
     """
     # is this a post matching one or more persons?
     post_add = False
     text = data.normalize(post['text']).lower()
     self.first_person = None
     # check post language
     if data.get_text_language(text) == 'fr':
         for person in self.persons:
             names = data.get_names(person)
             if data.check_names(names, text, person['words']) == 1:
                 # one more post for this person
                 if not post_add:
                     post_add = True
                     # get next post id
                     post_id = self.db.incr('nextPostId')
                 # add post to person's posts list
                 key = 'person:%d:posts:%d' % (person['id'],
                         self.stats_last_update)
                 self.db.rpush(key, post_id)
                 # update stats for this person
                 self.update_person_stats(person)
         if post_add:
             # add post to db
             self.db.set_post(int(post_id),
                 json.dumps(post))
             # add post id to current hour
             key = 'posts:%d' % (self.stats_last_update)
             self.db.rpush(key, post_id)
     else:
         logging.debug('found english word in %s', text)
Example #4
0
def main(cnf, weights_from):

    config = util.load_module(cnf).config
    # print(config)
    if weights_from is None:
        weights_from = config.weights_file
    else:
        weights_from = str(weights_from)
    print(config.get('train_dir'))
    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    labels = data.get_labels(names).astype(np.float32)
    print("Checkpoint 5")
    net = create_net(config)
    print("Checkpoint 6")
    print(weights_from)
    # print(net.load_params_from())
    try:
        print("Checkpoint 7")
        net.load_params_from(weights_from)
        print("Checkpoint 8")
        print("loaded weights from {}".format(weights_from))
    except IOError:
        print("couldn't load weights starting from scratch")

    print("fitting ...")
    print(files)
    print(labels)
    net.fit(files, labels)
Example #5
0
def main(cnf, weights_from):

    config = util.load_module(cnf).config

    if weights_from is None:
        weights_from = config.weights_file
    else:
        weights_from = str(weights_from)

    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    labels = data.get_labels(names).astype(np.float32)

    net = create_net(config)

    try:
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))
    except IOError:
        print("couldn't load weights starting from scratch")
    print("Shape of files: " + str(files.shape))
    print("Shape of labels: " + str(labels.shape))
    start = time.time()
    print("fitting ...")
    net.fit(files, labels)
    end = time.time()
    print("Time elapsed for fitting: " + str(end - start))
def main(cnf, classes, weights_from, predict):

    config = util.load_module(cnf).config
    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    names = [int(x) for x in names ]
    data.classes = int(classes)
    labels = data.get_labels(names)
    net = create_net(config)
    
    print files.shape
    print labels.shape
    if predict : 
    	if weights_from is None:
        	weights_from = config.weights_file
    	else:
        	weights_from = str(weights_from)
	print weights_from    
    	try:
        	net.load_params_from(weights_from)
        	print("loaded weights from {}".format(weights_from))
    	except IOError:
        	print("couldn't load weights starting from scratch")
    if not predict:
    	print("fitting ...")
    	net.fit(files, labels)
    else:
	print("predicting ...")
    	test_files = data.get_image_files(config.get('test_dir'))
    	y_pred = net.predict(test_files)
	y_pred = y_pred.transpose()
	print y_pred
        y_pred = np.clip(np.round(y_pred),
                         np.min(labels), np.max(labels)).astype(int)
        #print y_pred
	submission_filename = util.get_submission_filename()
        image_files = data.get_image_files(config.get('test_dir'))
        names = data.get_names(image_files)
        image_column = pd.Series(names, name='photo_id')
        level_column = pd.DataFrame(y_pred)#name='labels')
	level_column = level_column.apply(lambda x : string_submit(x))        
        predictions = pd.concat([image_column, level_column], axis=1)
        print("tail of predictions file")
        print(predictions.tail())
	predictions.columns = ['photo_id', 'labels']
        predictions.to_csv(submission_filename, index=False)
        print("saved predictions to {}".format(submission_filename))
def main(cnf, classes, weights_from, predict):

    config = util.load_module(cnf).config
    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    names = [int(x) for x in names]
    data.classes = int(classes)
    labels = data.get_labels(names)
    net = create_net(config)

    print files.shape
    print labels.shape
    if predict:
        if weights_from is None:
            weights_from = config.weights_file
        else:
            weights_from = str(weights_from)
        print weights_from
        try:
            net.load_params_from(weights_from)
            print("loaded weights from {}".format(weights_from))
        except IOError:
            print("couldn't load weights starting from scratch")
    if not predict:
        print("fitting ...")
        net.fit(files, labels)
    else:
        print("predicting ...")
        test_files = data.get_image_files(config.get('test_dir'))
        y_pred = net.predict(test_files)
        y_pred = y_pred.transpose()
        print y_pred
        y_pred = np.clip(np.round(y_pred), np.min(labels),
                         np.max(labels)).astype(int)
        #print y_pred
        submission_filename = util.get_submission_filename()
        image_files = data.get_image_files(config.get('test_dir'))
        names = data.get_names(image_files)
        image_column = pd.Series(names, name='photo_id')
        level_column = pd.DataFrame(y_pred)  #name='labels')
        level_column = level_column.apply(lambda x: string_submit(x))
        predictions = pd.concat([image_column, level_column], axis=1)
        print("tail of predictions file")
        print(predictions.tail())
        predictions.columns = ['photo_id', 'labels']
        predictions.to_csv(submission_filename, index=False)
        print("saved predictions to {}".format(submission_filename))
Example #8
0
def main(directory, convert_directory, test, crop_size, extension):
    try:
        os.mkdir(convert_directory)
    except OSError:
        pass

    filenames = [os.path.join(dp, f) for dp, dn, fn in os.walk(directory)
                 for f in fn if f.endswith('jpeg') or f.endswith('tiff')]
    filenames = sorted(filenames)

    if test:
        names = data.get_names(filenames)
        y = data.get_labels(names)
        for f, level in zip(filenames, y):
            if level == 1:
                try:
                    img = convert(f, crop_size)
                    img.show()
                    Image.open(f).show()
                    real_raw_input = vars(__builtins__).get('raw_input',input)
                    real_raw_input('enter for next')
                except KeyboardInterrupt:
                    exit(0)

    print("Resizing images in {} to {}, this takes a while."
          "".format(directory, convert_directory))

    n = len(filenames)
    # process in batches, sometimes weird things happen with Pool on my machine
    batchsize = 500
    batches = n // batchsize + 1
    pool = Pool(N_PROC)

    args = []
    label= {}
    csv = open('trainLabels.csv')
    csv_lines = csv.readlines()[1:]
    for line in csv_lines:
        line = line.rstrip('\n')
        cols = line.split(',')
        label[cols[0]] = cols[1]
    csv.close()

    for f in filenames:
        args.append((convert, (directory, convert_directory, f, crop_size,
                           extension), label))


    for i in range(batches):
        print("batch {:>2} / {}".format(i + 1, batches))
        pool.map(process, args[i * batchsize: (i + 1) * batchsize])

    pool.close()

    print('done')
Example #9
0
def main(directory, convert_directory, test, crop_size, extension):

    try:
        os.mkdir(convert_directory)
    except OSError:
        pass

    filenames = [os.path.join(dp, f) for dp, dn, fn in os.walk(directory)
                 for f in fn if f.endswith('jpeg') or f.endswith('tiff')] 
    filenames = sorted(filenames)

    if test:
        names = data.get_names(filenames)
        y = data.get_labels(names)
        for f, level in zip(filenames, y):
            if level == 1:
                try:
                    img = convert(f, crop_size)
                    img.show()
                    Image.open(f).show()
                    real_raw_input = vars(__builtins__).get('raw_input',input)
                    real_raw_input('enter for next')
                except KeyboardInterrupt:
                    exit(0)

    print("Resizing images in {} to {}, this takes a while."
          "".format(directory, convert_directory))

    n = len(filenames)
    # process in batches, sometimes weird things happen with Pool on my machine
    batchsize = 500
    batches = n // batchsize + 1
    pool = Pool(N_PROC)

    args = []

    for f in filenames:
        args.append((convert, (directory, convert_directory, f, crop_size, 
                           extension)))

    for i in range(batches):
        print("batch {:>2} / {}".format(i + 1, batches))
        pool.map(process, args[i * batchsize: (i + 1) * batchsize])

    pool.close()

    print('done')
Example #10
0
def main(cnf, weights_from, fold, exp_run_folder, train_retina):
    config = util.load_module(cnf).config
    config.cnf[
        'fold'] = fold  # <-- used to change the directories for weights_best, weights_epoch and weights_final
    config.cnf['exp_run_folder'] = exp_run_folder
    protocol = data.settings['protocol']

    if train_retina != 'train_retina':
        folds = yaml.load(open('folds/' + protocol + '.yml'))
        f0, f1 = fold.split('x')
        train_list = folds['Fold_' + f0][int(f1) - 1]
        files = data.get_image_files(config.get('train_dir'), train_list)
    else:
        files = data.get_image_files(config.get('train_dir'))

    if weights_from is None:
        weights_from = config.weights_file
    else:
        weights_from = str(weights_from)

    names = data.get_names(files)
    labels = data.get_labels(names, label_file='folds/' + protocol +
                             '.csv').astype(np.int32)
    net = nn.create_net(config)

    try:
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))
    except IOError:
        print("couldn't load weights, starting from scratch")

    #Print layerinfo
    print("## Layer information")
    import nolearn
    layer_info = nolearn.lasagne.PrintLayerInfo()
    print(layer_info._get_greeting(net))
    layer_info, legend = layer_info._get_layer_info_conv(net)
    print(layer_info)
    print(legend)
    print("fitting ...")
    net.fit(files, labels)
Example #11
0
def main(cnf, weights_from):

    config = util.load_module(cnf).config

    if weights_from is None:
        weights_from = config.weights_file
    else:
        weights_from = str(weights_from)

    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    labels = data.get_labels(names).astype(np.float32)

    net = create_net(config)

    try:
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))
    except IOError:
        print("couldn't load weights starting from scratch")

    print("fitting ...")
    net.fit(files, labels)
Example #12
0
def build(cnf, weights_from):

    config = util.load_module(cnf).config

    if weights_from is None:
        weights_from = config.weights_file
    else:
        weights_from = str(weights_from)

    files = data.get_image_files(config.get('train_dir'))
    names = data.get_names(files)
    labels = data.get_labels(names).astype(np.float32)

    net = create_net(config)

    try:
        net.load_params_from(weights_from)
        print("loaded weights from {}".format(weights_from))
    except IOError:
        print("couldn't load weights starting from scratch")

    print("fitting ...")
    # net.fit(files, labels)
    return net, files, names, labels
Example #13
0
import argparse
import numpy as np
import tensorflow as tf

from model import ANN
import data

parser = argparse.ArgumentParser(description='Visualize ANN')
parser.add_argument('-d',
                    '--dataset',
                    type=str,
                    default='mnist',
                    choices=data.get_names())
parser.add_argument('--num_iter', type=int, default=5000)
args = parser.parse_args()

dataset = data.init_dataset(name=args.dataset)

model = ANN(dataset.shape)

model.train(dataset.tr_data, dataset.tr_labels, num_iter=args.num_iter)
Example #14
0
and the final loss for each minibatch is calculated from this average.
CrossEntropyLoss is used because this is a categorization task, since we're
predicting the next letter of the name given previous letters.
"""

import torch
import torch.nn as nn
import torch.optim as optim
import data
import helpers
from NamesRNN import NamesRNN
from hyperparameters import hps
import random

model = NamesRNN()
names = data.get_names(hps['filename'])

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=hps['learning_rate'])

batch_size = hps['batch_size']

for t in range(hps['epochs']):
    batch = random.sample(names, batch_size)
    # keep losses from each batch to be averaged later
    batch_losses = []
    for name in batch:
        xs, ys = helpers.name_to_xy(name)

        # keep losses from each sequence (name) to be averaged later
        seq_losses = []
Example #15
0
#@click.option('--test_dir', default=None, show_default=True,
#              help="Override directory with test set images.")
cnf = 'configs/c_512_5x5_32.py'
predict = True
per_patient = True
features_file = None
n_iter =3 
blend_cnf = 'blend.yml'
test_dir = None


#def fit(cnf, predict, per_patient, features_file, n_iter, blend_cnf, test_dir):

config = util.load_module(cnf).config
image_files = data.get_image_files(config.get('train_dir'))
names = data.get_names(image_files)
labels = data.get_labels(names).astype(np.float32)[:, np.newaxis]

if features_file is not None:
    runs = {'run': [features_file]}
else:
    runs = data.parse_blend_config(yaml.load(open(blend_cnf)))

scalers = {run: StandardScaler() for run in runs}

tr, te = data.split_indices(image_files, labels)

y_preds = []
for i in range(n_iter):
    print("iteration {} / {}".format(i + 1, n_iter))
    for run, files in runs.items():
Example #16
0
def fit(cnf, exp_run_folder, classifier, features_file, n_iter, blend_cnf,
        test_dir, fold):

    config = util.load_module(cnf).config
    config.cnf[
        'fold'] = fold  # <-- used to change the directories for weights_best, weights_epoch and weights_final
    config.cnf['exp_run_folder'] = exp_run_folder

    folds = yaml.load(open('folds/' + data.settings['protocol'] + '.yml'))
    f0, f1 = fold.split('x')
    train_list = folds['Fold_' + f0][int(f1) - 1]
    test_list = folds['Fold_' + f0][0 if f1 == '2' else 1]

    image_files = data.get_image_files(config.get('train_dir'), train_list)
    names = data.get_names(image_files)
    labels = data.get_labels(names,
                             label_file='folds/' + data.settings['protocol'] +
                             '.csv').astype(np.int32)[:, np.newaxis]

    if features_file is not None:
        runs = {'run': [features_file]}
    else:
        runs = {
            run: [
                os.path.join(exp_run_folder + '/data/features', f)
                for f in files
            ]
            for run, files in yaml.load(open(blend_cnf)).items()
        }

    scalers = {run: StandardScaler() for run in runs}

    y_preds = []
    y_preds_proba = []
    for i in range(n_iter):
        print("iteration {} / {}".format(i + 1, n_iter))
        for run, files in runs.items():
            files = [
                f.replace('f0xf1.npy', '{}.npy'.format(fold)) for f in files
            ]

            if classifier is None:
                X_test = data.load_features(files, test=True)
                if data.settings['protocol'] != 'protocol3':
                    y_pred_proba = X_test
                    y_proba = []
                    for i in range(0, len(X_test)):
                        y_proba.append(
                            y_pred_proba[i][1])  #using score from the positive
                    y_pred = np.clip(np.round(y_proba), 0, 1).astype(int)
                else:
                    y_pred_proba = est.predict_proba(X)
            else:
                print("fitting features for run {}".format(run))
                X_train = data.load_features(files)
                l2Norm = np.linalg.norm(X_train, axis=1)
                X_train = np.divide(X_train.T, l2Norm).T
                est = estimator(data.settings['protocol'],
                                classifier,
                                X_train.shape[1],
                                image_files,
                                X_train,
                                labels,
                                run,
                                fold,
                                eval_size=0.1)
                open(
                    exp_run_folder +
                    "/best_estimator_fold_{}.txt".format(fold),
                    "w").write(str(est))
                X_test = data.load_features(files, test=True)
                l2Norm = np.linalg.norm(X_test, axis=1)
                X_test = np.divide(X_test.T, l2Norm).T
                if data.settings['protocol'] != 'protocol3':
                    y_pred = est.predict(X_test).ravel()
                    y_pred_proba = est.predict_proba(X_test).ravel()
                    y_proba = []
                    for i in range(0, 2 * len(X_test), 2):
                        y_proba.append(
                            y_pred_proba[i +
                                         1])  #using score from the positive
                else:
                    y_pred_binary = est.predict(X_test)
                    y_pred = preprocessing.LabelBinarizer().fit([0, 1, 2])
                    y_pred = y_pred.inverse_transform(y_pred_binary)
                    y_proba = est.predict_proba(X_test)

    image_files = data.get_image_files(test_dir or config.get('test_dir'),
                                       test_list)
    names = data.get_names(image_files)
    labels = data.get_labels(
        names, label_file='folds/' + data.settings['protocol'] +
        '.csv').astype(np.int32)[:, np.newaxis]  # , per_patient=per_patient

    image_column = pd.Series(names, name='image')
    labels_column = pd.Series(np.squeeze(labels), name='true')

    level_column = pd.Series(y_pred, name='pred')
    if data.settings['protocol'] != 'protocol3':
        proba_column = pd.Series(y_proba, name='proba')
        predictions = pd.concat(
            [image_column, labels_column, level_column, proba_column], axis=1)
    else:
        proba_label_0 = pd.Series(y_proba[:, 0], name='proba_label_0')
        proba_label_1 = pd.Series(y_proba[:, 1], name='proba_label_1')
        proba_label_2 = pd.Series(y_proba[:, 2], name='proba_label_2')
        predictions = pd.concat([
            image_column, labels_column, level_column, proba_label_0,
            proba_label_1, proba_label_2
        ],
                                axis=1)

    predictions.to_csv(exp_run_folder +
                       "/ranked_list_fold_{}.csv".format(fold),
                       sep=';')

    print("tail of predictions")
    print(predictions.tail())
    acc = len(filter(lambda
                     (l, y): l == y, zip(labels, y_pred))) / float(len(labels))
    print("accuracy: {}".format(acc))
    print("confusion matrix")
    print(confusion_matrix(labels, y_pred))

    if data.settings['protocol'] != 'protocol3':
        auc = calc_auc(y_proba, labels, exp_run_folder, classifier, fold)
        print("AUC: {}".format(auc))
        average_precision = average_precision_score(labels, y_proba)
        print("average precision: {}".format(average_precision))
        c_matrix = confusion_matrix(labels, y_pred)
        print("sensitivity: {}".format(c_matrix[1][1] /
                                       (c_matrix[1][1] + c_matrix[0][1])))
        print("specificity: {}".format(c_matrix[0][0] /
                                       (c_matrix[0][0] + c_matrix[1][0])))
    else:
        y_test = label_binarize(labels, classes=[0, 1, 2])
        auc = roc_auc_score(y_test, y_proba, average='macro')
        print("AUC: {}".format(auc))
        average_precision = average_precision_score(y_test,
                                                    y_proba,
                                                    average="macro")
        print("mean average precision: {}".format(average_precision))

    results = pd.concat([
        pd.Series(exp_run_folder, name='folder'),
        pd.Series(fold, name='fold'),
        pd.Series(auc, name='auc'),
        pd.Series(average_precision, name='ap'),
        pd.Series(acc, name='acc')
    ],
                        axis=1)
    with open('results.csv', 'a') as f:
        results.to_csv(f, header=False)