Example #1
0
def mpd_hiss(client, args):
    logging.info("Connecting to MPD...")
    client.connect(args.host, args.port)
    logging.debug("Connected.")

    if args.password is not None:
        try:
            logging.debug("Authenticating...")
            client.password(args.password)
            logging.debug("Authenticated.")
        except mpd.CommandError as e:
            raise AuthError(e)

    last_status = client.status()

    icon_cache = {
        'last_dir': None,
        'last_image': None,
        'default': growl_icon,
    }

    while True:
        client.send_idle("player")
        client.fetch_idle()

        status = client.status()
        started_playing = (last_status["state"] != "play"
                           and status["state"] == "play")
        last_songid = last_status.get("songid", None)
        songid = status.get("songid", None)
        track_changed = songid not in (None, last_songid)

        if started_playing or track_changed:
            song = client.currentsong()
            icon = album_art(icon_cache,
                             get_album_dir(song.get("file"), args.album_art),
                             args.scale_icons)

            song_data = {
                "artist":
                song.get("artist", "Unknown artist"),
                "title": (song.get("title") or basename(song.get("file"))
                          or "Unknown track"),
                "album":
                song.get("album", ""),
                "duration":
                hms(int(song.get("time", 0)))
            }
            logging.info("Sending Now Playing notification for "
                         "{artist} - [{album}] {title}.".format(**song_data))
            description = args.description_format.format(**song_data)
            notify(title=args.title_format.format(**song_data),
                   description=description.rstrip("\n"),
                   icon=icon)
        last_status = status
Example #2
0
def mpd_hiss(client, args):
    logging.info("Connecting to MPD...")
    client.connect(args.host, args.port)
    logging.debug("Connected.")

    if args.password is not None:
        try:
            logging.debug("Authenticating...")
            client.password(args.password)
            logging.debug("Authenticated.")
        except mpd.CommandError as e:
            raise AuthError(e)

    last_status = client.status()

    icon_cache = {
        'last_dir': None,
        'last_image': None,
        'default': growl_icon,
    }

    while True:
        client.send_idle("player")
        client.fetch_idle()

        status = client.status()
        started_playing = (last_status["state"] != "play"
                           and status["state"] == "play")
        last_songid = last_status.get("songid", None)
        songid = status.get("songid", None)
        track_changed = songid not in (None, last_songid)

        if started_playing or track_changed:
            song = client.currentsong()
            icon = album_art(icon_cache, get_album_dir(song.get("file"),
                                                       args.album_art),
                             args.scale_icons)

            song_data = {
                "artist": song.get("artist", "Unknown artist"),
                "title": (song.get("title") or basename(song.get("file"))
                          or "Unknown track"),
                "album": song.get("album", ""),
                "duration": hms(int(song.get("time", 0)))
            }
            logging.info("Sending Now Playing notification for "
                "{artist} - [{album}] {title}.".format(**song_data))
            description = args.description_format.format(**song_data)
            notify(title=args.title_format.format(**song_data),
                   description=description.rstrip("\n"),
                   icon=icon)
        last_status = status
Example #3
0
def main():
    def training(num_batches, batch_size, x_train, label_train, mask_train):
        for i in range(num_batches):
            idx = range(i * batch_size, (i + 1) * batch_size)
            x_batch = x_train[idx]
            y_batch = label_train[idx]
            mask_batch = mask_train[idx]
            loss, out, batch_norm = train(x_batch, y_batch, mask_batch)
            norms.append(batch_norm)
            preds.append(out)
            losses.append(loss)

        predictions = np.concatenate(preds, axis=0)
        loss_train = np.mean(losses)
        all_losses_train.append(loss_train)
        acc_train = utils.proteins_acc(predictions,
                                       label_train[0:num_batches * batch_size],
                                       mask_train[0:num_batches * batch_size])

        print('acc_train: ', acc_train)
        all_accuracy_train.append(acc_train)
        mean_norm = np.mean(norms)
        all_mean_norm.append(mean_norm)
        print "  average training loss: %.5f" % loss_train
        print "  average training accuracy: %.5f" % acc_train
        print "  average norm: %.5f" % mean_norm

    def testing(num_batches, batch_size, X, y, mask):
        for i in range(num_batches):
            idx = range(i * batch_size, (i + 1) * batch_size)
            x_batch = X[idx]
            y_batch = y[idx]
            mask_batch = mask[idx]
            loss, out = evaluate(x_batch, y_batch, mask_batch)
            preds.append(out)
            losses.append(loss)
        predictions = np.concatenate(preds, axis=0)
        loss_eval = np.mean(losses)
        all_losses.append(loss_eval)

        acc_eval = utils.proteins_acc(predictions, y, mask)
        all_accuracy.append(acc_eval)

        print("Average evaluation loss ({}): {:.5f}".format(subset, loss_eval))
        print("Average evaluation accuracy ({}): {:.5f}".format(
            subset, acc_eval))
        return i

    global momentum_schedule, momentum, i
    sym_y = T.imatrix('target_output')
    sym_mask = T.matrix('mask')
    sym_x = T.tensor3()

    tol = 1e-5
    num_epochs = config.epochs
    batch_size = config.batch_size

    print("Building network ...")
    # DEBUG #
    l_in, l_out = config.build_model()
    # DEBUG #
    all_layers = las.layers.get_all_layers(l_out)
    num_params = las.layers.count_params(l_out)
    print("  number of parameters: %d" % num_params)
    print("  layer output shapes:")
    # output for debugging (names and dimensions) # InputLayer(None, 700, 42)
    for layer in all_layers:
        name = string.ljust(layer.__class__.__name__, 32)
        print("    %s %s" % (name, las.layers.get_output_shape(layer)))

    print("Creating cost function")
    # lasagne.layers.get_output produces a variable for the output of the net
    out_train = las.layers.get_output(l_out, sym_x, deterministic=False)
    print('out_train: ', out_train)

    print("Creating eval function")
    out_eval = las.layers.get_output(l_out, sym_x, deterministic=True)
    probs_flat = out_train.reshape((-1, num_classes))
    print("probs_flat: ", probs_flat)
    lambda_reg = config.lambda_reg
    params = las.layers.get_all_params(l_out, regularizable=True)

    reg_term = sum(T.sum(p**2) for p in params)
    cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, tol, 1 - tol),
                                           sym_y.flatten())
    print('cost: ', cost)
    cost = T.sum(
        cost * sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term
    print('cost_2: ', cost)

    # Retrieve all parameters from the network
    all_params = las.layers.get_all_params(l_out, trainable=True)

    # Compute SGD updates for training
    print("Computing updates ...")
    if hasattr(config, 'learning_rate_schedule'):
        learning_rate_schedule = config.learning_rate_schedule  # Import learning rate schedule
    # else:
    #     learning_rate_schedule = {0: config.learning_rate}
    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))
    all_grads = T.grad(cost, all_params)
    cut_norm = config.cut_grad

    updates, norm_calc = las.updates.total_norm_constraint(all_grads,
                                                           max_norm=cut_norm,
                                                           return_norm=True)

    if optimizer == "rmsprop":
        updates = las.updates.rmsprop(updates, all_params, learning_rate)
    else:
        sys.exit("please choose <rmsprop> in configfile")

    # Theano functions for training and computing cost
    print "config.batch_size %d" % batch_size
    print "data.num_classes %d" % num_classes
    if hasattr(config, 'build_model'):
        print("has build model")
    print("Compiling train ...")

    # Use this for training (see deterministic = False above)
    train = theano.function([sym_x, sym_y, sym_mask],
                            [cost, out_train, norm_calc],
                            updates=updates)

    print("Compiling eval ...")
    # use this for eval (deterministic = True + no updates)
    evaluate = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval])

    # Start timers
    start_time = time.time()
    prev_time = start_time
    all_losses_train = []
    all_accuracy_train = []
    all_losses_eval_train = []
    all_losses_eval_valid = []
    all_losses_eval_test = []
    all_accuracy_eval_train = []
    all_accuracy_eval_valid = []
    all_accuracy_eval_test = []
    all_mean_norm = []

    import Data_Manipulator
    x_train, x_valid, label_train, label_valid, mask_train, mask_valid, num_seq_train = Data_Manipulator.get_train(
    )
    # print("y shape")
    # print(label_valid.shape)
    # print("x_test shape")
    # print(x_valid.shape)

    # Start training
    for epoch in range(num_epochs):
        if (epoch % 10) == 0:
            print "Epoch %d of %d" % (epoch + 1, num_epochs)
        if epoch in learning_rate_schedule:
            lr = np.float32(learning_rate_schedule[epoch])
            print "  setting learning rate to %.7f" % lr
            learning_rate.set_value(lr)

        # print "Shuffling data"
        seq_names = np.arange(0, num_seq_train)
        np.random.shuffle(seq_names)
        x_train = x_train[seq_names]
        label_train = label_train[seq_names]
        mask_train = mask_train[seq_names]
        num_batches = num_seq_train // batch_size  # integer division
        losses = []
        preds = []
        norms = []

        training(num_batches, batch_size, x_train, label_train, mask_train)

        sets = [('valid', x_valid, label_valid, mask_valid,
                 all_losses_eval_valid, all_accuracy_eval_valid)]
        for subset, X, y, mask, all_losses, all_accuracy in sets:
            print "  validating: %s loss" % subset
            preds = []
            num_batches = np.size(X, axis=0) // config.batch_size
            testing(num_batches, batch_size, X, y, mask)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * num_epochs
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start),
                                             time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left),
                                                  eta_str)

        if (epoch >= config.start_saving_at) and ((epoch % config.save_every)
                                                  == 0):
            print "  saving parameters and metadata"
            with open((metadata_path + "-%d" % epoch + ".pkl"), 'w') as f:
                pickle.dump(
                    {
                        'config_name': config_name,
                        'param_values': las.layers.get_all_param_values(l_out),
                        'losses_train': all_losses_train,
                        'accuracy_train': all_accuracy_train,
                        'losses_eval_train': all_losses_eval_train,
                        'losses_eval_valid': all_losses_eval_valid,
                        'losses_eval_test': all_losses_eval_test,
                        'accuracy_eval_valid': all_accuracy_eval_valid,
                        'accuracy_eval_train': all_accuracy_eval_train,
                        'accuracy_eval_test': all_accuracy_eval_test,
                        'mean_norm': all_mean_norm,
                        'time_since_start': time_since_start
                    }, f, pickle.HIGHEST_PROTOCOL)

            print "  stored in %s" % metadata_path
Example #4
0
    res_df.metric_eval_valid.max(),
    res_df.metric_eval_valid.iloc[-1]
)

model_arch += '\nBEST/LAST ACC TRAIN: %.2f - %.2f.\n' % (
    res_df.acc_eval_train.max() * 100,
    res_df.acc_eval_train.iloc[-1] * 100
)

model_arch += 'BEST/LAST ACC VALID: %.2f - %.2f.\n' % (
    res_df.acc_eval_valid.max() * 100,
    res_df.acc_eval_valid.iloc[-1] * 100
)

model_arch += '\nTOTAL TRAINING TIME: %s' % \
              hms(model_data['time_since_start'])

#print model_arch
train_conf_mat, hist_rater_a, \
        hist_rater_b, train_nom, \
        train_denom = model_data['metric_extra_eval_train'][-1]
valid_conf_mat, hist_rater_a, \
        hist_rater_b, valid_nom, \
        valid_denom = model_data['metric_extra_eval_valid'][-1]
# Normalised train confusion matrix (with argmax decoding).
#print train_conf_mat / train_conf_mat.sum()
# Normalised validation confusion matrix (with argmax decoding).
#print valid_conf_mat / valid_conf_mat.sum()
chunk_size = model_data['chunk_size'] * 2
batch_size = model_data['batch_size']
Example #5
0
        for obj_idx, obj_name in enumerate(config().order_objectives):
            valid_mean = np.mean(tmp_losses_valid[obj_name])
            losses_eval_valid[obj_name] = valid_mean
            means.append(valid_mean)
            print obj_name, valid_mean
        print 'Sum of mean losses:', sum(means) 


        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    if ((chunk_idx + 1) % config().save_every) == 0:
        print
        print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)
        print 'Saving metadata, parameters'

        with open(metadata_path, 'w') as f:
            pickle.dump({
                'configuration_file': config_name,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'chunks_since_start': chunk_idx,
                'losses_eval_train': losses_eval_train,
Example #6
0
            prefix_test=img_dir,
            transfo_params=transfos,
            paired_transfos=paired_transfos,
        )

    all_outputs = []
    for i in range(tta_times):
        start_time = time.time()

        print("\t\t\tTTA %i OF %i...\n\n" % (i + 1, tta_times))
        pred = do_pred(test_gen)
        all_outputs.append(pred)

        time_since_start = time.time() - start_time
        print("\nOne TTA iteration took %s.\n" % \
              hms(time_since_start))
        print("Estimated %s to go...\n\n" % \
              hms((tta_times - (i + 1)) * time_since_start))

    print("\n\nDone doing TTA predictions! Ensembling ...\n")
    if tta_ensemble_method == 'mean':
        outputs = np.mean(np.asarray(all_outputs), axis=0)
    elif tta_ensemble_method == 'log_mean':
        outputs = np.mean(np.log(1e-5 + np.asarray(all_outputs)), axis=0)

    subm_fn = 'subm/' + "%s--%s[%s][%s][%i][%s].csv" % \
                        (model_data['model_id'],
                         model_data['configuration'],
                         dataset,
                         tta_transfos,
                         tta_times,
Example #7
0
            valid_mean = np.mean(tmp_losses_valid[obj_name])
            losses_eval_valid[obj_name] = valid_mean
            means.append(valid_mean)
            print obj_name, valid_mean
        print 'Sum of mean losses:', sum(means)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx +
                                            1.) / (chunk_idx + 1. -
                                                   start_chunk_idx)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start),
                                             time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left),
                                                  eta_str)
        print

    if ((chunk_idx + 1) % config().save_every) == 0:
        print
        print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)
        print 'Saving metadata, parameters'

        with open(metadata_path, 'w') as f:
            pickle.dump(
                {
                    'configuration_file': config_name,
                    'git_revision_hash': utils.get_git_revision_hash(),
def predict_slice_model(expid, outfile, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    _check_slicemodel(input_layers)

    # Print the architecture
    _print_architecture(top_layer)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore",
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = range(1, num_chunks+1)

    create_test_gen = partial(config().create_test_gen,
                              required_input_keys = xs_shared.keys(),
                              required_output_keys = ["patients", "slices"],
                              )

    print "Generate predictions with this model"
    start_time = time.time()
    prev_time = start_time


    predictions = [{"patient": i+1,
                    "slices": {
                        slice_id: {
                            "systole": np.zeros((0,600)),
                            "diastole": np.zeros((0,600))
                        } for slice_id in data_loader.get_slice_ids_for_patient(i+1)
                    }
                   } for i in xrange(NUM_PATIENTS)]


    # Loop over data and generate predictions
    for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())):
        print "  load testing data onto GPU"

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])


        patient_ids = test_data["output"]["patients"]
        slice_ids = test_data["output"]["slices"]
        print "  patients:", " ".join(map(str, patient_ids))
        print "  chunk %d/%d" % (e, num_chunks)

        for b in xrange(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))}
            kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64')
            for idx, (patient_id, slice_id) in enumerate(
                    zip(patient_ids[b*config().batch_size:(b+1)*config().batch_size],
                        slice_ids[b*config().batch_size:(b+1)*config().batch_size])):
                if patient_id != 0:
                    index = patient_id-1
                    patient_data = predictions[index]
                    assert patient_id==patient_data["patient"]
                    patient_slice_data = patient_data["slices"][slice_id]
                    patient_slice_data["systole"] =  np.concatenate((patient_slice_data["systole"],  kaggle_systoles[idx:idx+1,:]),axis=0)
                    patient_slice_data["diastole"] = np.concatenate((patient_slice_data["diastole"], kaggle_diastoles[idx:idx+1,:]),axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    # Average predictions
    already_printed = False
    for prediction in predictions:
        for prediction_slice_id in prediction["slices"]:
            prediction_slice = prediction["slices"][prediction_slice_id]
            if prediction_slice["systole"].size>0 and prediction_slice["diastole"].size>0:
                average_method =  getattr(config(), 'tta_average_method', partial(np.mean, axis=0))
                prediction_slice["systole_average"] = average_method(prediction_slice["systole"])
                prediction_slice["diastole_average"] = average_method(prediction_slice["diastole"])
                try:
                    test_if_valid_distribution(prediction_slice["systole_average"])
                    test_if_valid_distribution(prediction_slice["diastole_average"])
                except:
                    if not already_printed:
                        print "WARNING: These distributions are not distributions"
                        already_printed = True
                    prediction_slice["systole_average"] = make_monotone_distribution(prediction_slice["systole_average"])
                    prediction_slice["diastole_average"] = make_monotone_distribution(prediction_slice["diastole_average"])


    print "Calculating training and validation set scores for reference"
    # Add CRPS scores to the predictions
    # Iterate over train and validation sets
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                      (train_patients_indices,  "train")]:
        # Iterate over patients in the set
        for patient in patient_ids:
            prediction = predictions[patient-1]
            # Iterate over the slices
            for slice_id in prediction["slices"]:
                prediction_slice = prediction["slices"][slice_id]
                if "systole_average" in prediction_slice:
                    assert patient == regular_labels[patient-1, 0]
                    error_sys = CRSP(prediction_slice["systole_average"], regular_labels[patient-1, 1])
                    prediction_slice["systole_CRPS"] = error_sys
                    prediction_slice["target_systole"] = regular_labels[patient-1, 1]
                    error_dia = CRSP(prediction_slice["diastole_average"], regular_labels[patient-1, 2])
                    prediction_slice["diastole_CRPS"] = error_dia
                    prediction_slice["target_diastole"] = regular_labels[patient-1, 2]
                    prediction_slice["CRPS"] = 0.5 * error_sys + 0.5 * error_dia


    print "dumping prediction file to %s" % outfile
    with open(outfile, 'w') as f:
        pickle.dump({
                        'metadata_path': metadata_path,
                        'configuration_file': config().__name__,
                        'git_revision_hash': utils.get_git_revision_hash(),
                        'experiment_id': expid,
                        'time_since_start': time_since_start,
                        'param_values': lasagne.layers.get_all_param_values(top_layer),
                        'predictions_per_slice': predictions,
                    }, f, pickle.HIGHEST_PROTOCOL)
    print "prediction file dumped"


    return
Example #9
0
File: roi.py Project: thesby/dsb3
def extract_rois(expid):
    metadata_path = MODEL_PATH + "%s.pkl" % config.model.__name__
    assert os.path.exists(metadata_path)
    prediction_path = MODEL_PREDICTIONS_PATH + "%s.pkl" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % prediction_path

    print "Build model"

    interface_layers = config.model.build_model(image_size=config.patch_shape)
    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    for old_key, new_key in config.replace_input_tags.items():
        input_layers[new_key] = input_layers.pop(old_key)

    # merge all output layers into a fictional dummy layer which is not actually used
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )

    # get all the trainable parameters from the model
    all_layers = lasagne.layers.get_all_layers(top_layer)
    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)

    # Count all the parameters we are actually optimizing, and visualize what the model looks like.
    print string.ljust("  layer output shapes:", 26),
    print string.ljust("#params:", 10),
    print string.ljust("#data:", 10),
    print "output shape:"
    def comma_seperator(v):
        return '{:,.0f}'.format(v)
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 22)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(comma_seperator(num_param), 10)
        num_size = string.ljust(comma_seperator(np.prod(layer.output_shape[1:])), 10)
        print "    %s %s %s %s" % (name, num_param, num_size, layer.output_shape)
    num_params = sum([np.prod(p.get_value().shape) for p in all_params])
    print "  number of parameters: %d" % num_params

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32')
        for (key, l_in) in input_layers.iteritems()
    }

    idx = T.lscalar('idx')

    givens = dict()

    for (key, l_in) in input_layers.iteritems():
        givens[l_in.input_var] = xs_shared[key][idx*config.batch_size:(idx+1)*config.batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    print "Compiling..."
    iter_test = theano.function([idx],
                                network_outputs + theano_printer.get_the_stuff_to_print(),
                                givens=givens, on_unused_input="ignore",
                                # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                )

    print "Preparing dataloaders"
    config.data_loader.prepare()

    print "Load model parameters"
    metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, metadata['param_values'])

    start_time, prev_time = None, None

    import multiprocessing as mp
    jobs = []

    for set in [VALIDATION, TRAIN, TEST]:
        set_indices = config.data_loader.indices[set]
        for _i, sample_id in enumerate(set_indices):

            if start_time is None:
                start_time = time.time()
                prev_time = start_time
            print "sample_id", sample_id, _i+1, "/", len(set_indices), "in", set

            filenametag = input_layers.keys()[0].split(":")[0] + ":patient_id"
            data = config.data_loader.load_sample(sample_id,
                                                  input_layers.keys()+config.extra_tags+[filenametag],{})

            patient_id = data["input"][filenametag]
            print patient_id
            seg_shape = output_layers["predicted_segmentation"].output_shape[1:]
            patch_gen = patch_generator(data, seg_shape, input_layers.keys()[0].split(":")[0]+":")
            t0 = time.time()
            preds = []
            patches = []
            for patch_idx, patch in enumerate(patch_gen):
                for key in xs_shared:
                    xs_shared[key].set_value(patch[key][None,:])

                print " patch_generator", time.time() - t0

                t0 = time.time()
                th_result = iter_test(0)
                print " iter_test", time.time()-t0

                predictions = th_result[:len(network_outputs)]

                preds.append(predictions[0][0])
                patches.append(patch[xs_shared.keys()[0]])
                t0 = time.time()

            pred = glue_patches(preds)

            if not config.plot and config.multiprocess:
                jobs = [job for job in jobs if job.is_alive]
                if len(jobs) >= 3:
                    # print "waiting", len(jobs)
                    jobs[0].join()
                    del jobs[0]
                jobs.append(mp.Process(target=extract_nodules, args=((pred, patient_id, expid),) ) )
                jobs[-1].daemon=True
                jobs[-1].start()
            else:
                rois = extract_nodules((pred, patient_id, expid))
                print "patient", patient_id, len(rois), "nodules"

            now = time.time()
            time_since_start = now - start_time
            time_since_prev = now - prev_time
            prev_time = now
            print "  %s since start (+%.2f s)" % (utils.hms(time_since_start), time_since_prev)

            if config.plot:
                plot_segmentation_and_nodules(patches, rois, pred, patient_id)

    for job in jobs: job.join()
    return
            prefix_test=img_dir,
            transfo_params=transfos,
            paired_transfos=paired_transfos,
        )

    all_outputs = []
    for i in xrange(tta_times):
        start_time = time.time()

        print "\t\t\tTTA %i OF %i...\n\n" % (i + 1, tta_times)
        pred = do_pred(test_gen)
        all_outputs.append(pred)

        time_since_start = time.time() - start_time
        print "\nOne TTA iteration took %s.\n" % \
              hms(time_since_start)
        print "Estimated %s to go...\n\n" % \
              hms((tta_times - (i + 1)) * time_since_start)

    print "\n\nDone doing TTA predictions! Ensembling ...\n"
    if tta_ensemble_method == 'mean':
        outputs = np.mean(np.asarray(all_outputs), axis=0)
    elif tta_ensemble_method == 'log_mean':
        outputs = np.mean(np.log(1e-5 + np.asarray(all_outputs)), axis=0)

    subm_fn = 'subm/' + "%s--%s[%s][%s][%i][%s].csv" % \
                        (model_data['model_id'],
                         model_data['configuration'],
                         dataset,
                         tta_transfos,
                         tta_times,
Example #11
0
def train_model(expid):
    """
    This function trains the model, and will use the name expid to store and report the results
    :param expid: the name
    :return:
    """
    metadata_path = MODEL_PATH + "%s.pkl" % expid

    # Fast_run is very slow, but might be better of debugging.
    # Make sure you don't leave it on accidentally!
    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Build model"
    # Get the input and output layers of our model
    interface_layers = config.build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]

    # merge all output layers into a fictional dummy layer which is not actually used
    top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values())
    # get all the trainable parameters from the model
    all_layers = lasagne.layers.get_all_layers(top_layer)
    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)

    # do not train beyond the layers in cutoff_gradients. Remove all their parameters from the optimization process
    if "cutoff_gradients" in interface_layers:
        submodel_params = [
            param for value in interface_layers["cutoff_gradients"]
            for param in lasagne.layers.get_all_params(value)
        ]
        all_params = [p for p in all_params if p not in submodel_params]

    # some parameters might already be pretrained! Load their values from the requested configuration name.
    if "pretrained" in interface_layers:
        for config_name, layers_dict in interface_layers[
                "pretrained"].iteritems():
            pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name
            pretrained_resume_metadata = np.load(pretrained_metadata_path)
            pretrained_top_layer = lasagne.layers.MergeLayer(
                incomings=layers_dict.values())
            lasagne.layers.set_all_param_values(
                pretrained_top_layer,
                pretrained_resume_metadata['param_values'])

    # Count all the parameters we are actually optimizing, and visualize what the model looks like.

    print string.ljust("  layer output shapes:", 26),
    print string.ljust("#params:", 10),
    print string.ljust("#data:", 10),
    print "output shape:"

    def comma_seperator(v):
        return '{:,.0f}'.format(v)

    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 22)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(comma_seperator(num_param), 10)
        num_size = string.ljust(
            comma_seperator(np.prod(layer.output_shape[1:])), 10)
        print "    %s %s %s %s" % (name, num_param, num_size,
                                   layer.output_shape)

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])
    print "  number of parameters:", comma_seperator(num_params)

    # Build all the objectives requested by the configuration
    objectives = config.build_objectives(interface_layers)

    train_losses_theano = {
        key: ob.get_loss()
        for key, ob in objectives["train"].iteritems()
    }

    validate_losses_theano = {
        key: ob.get_loss(deterministic=True)
        for key, ob in objectives["validate"].iteritems()
    }

    # Create the Theano variables necessary to interface with the models
    # the input:
    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.iteritems()
    }

    # the output:
    ys_shared = {
        key: lasagne.utils.shared_empty(dim=target_var.ndim,
                                        dtype=target_var.dtype)
        for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                       objectives["validate"].iteritems())
        for (key, target_var) in ob.target_vars.iteritems()
    }

    # Set up the learning rate schedule
    learning_rate_schedule = config.learning_rate_schedule
    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))

    # We only work on one batch at the time on our chunk. Set up the Theano code which does this
    idx = T.lscalar(
        'idx'
    )  # the value representing the number of the batch we are currently into our chunk of data

    givens = dict()
    for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                   objectives["validate"].iteritems()):
        for (key, target_var) in ob.target_vars.iteritems():
            givens[target_var] = ys_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    for (key, l_in) in input_layers.iteritems():
        givens[l_in.input_var] = xs_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    # sum over the losses of the objective we optimize. We will optimize this sum (either minimize or maximize)
    # sum makes the learning rate independent of batch size!
    if hasattr(config, "dont_sum_losses") and config.dont_sum_losses:
        train_loss_theano = T.mean(train_losses_theano["objective"])
    else:
        train_loss_theano = T.sum(train_losses_theano["objective"]) * (
            -1 if objectives["train"]["objective"].optimize == MAXIMIZE else 1)

    # build the update step for Theano
    updates = config.build_updates(train_loss_theano, all_params,
                                   learning_rate)

    if hasattr(config, "print_gradnorm") and config.print_gradnorm:
        all_grads = theano.grad(train_loss_theano,
                                all_params,
                                disconnected_inputs='warn')
        grad_norm = T.sqrt(T.sum([(g**2).sum() for g in all_grads]) + 1e-9)
        grad_norm.name = "grad_norm"
        theano_printer.print_me_this("  grad norm", grad_norm)
        # train_losses_theano["grad_norm"] = grad_norm

    # Compile the Theano function of your model+objective
    print "Compiling..."
    iter_train = theano.function(
        [idx],
        train_losses_theano.values() + theano_printer.get_the_stuff_to_print(),
        givens=givens,
        on_unused_input="ignore",
        updates=updates,
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    if hasattr(config, "print_gradnorm") and config.print_gradnorm:
        del theano_printer._stuff_to_print[-1]

    # For validation, we also like to have something which returns the output of our model without the objective
    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer,
                                         deterministic=True)
        for network_output_layer in output_layers.values()
    ]
    iter_predict = theano.function([idx],
                                   network_outputs +
                                   theano_printer.get_the_stuff_to_print(),
                                   givens=givens,
                                   on_unused_input="ignore")

    # The data loader will need to know which kinds of data it actually needs to load
    # collect all the necessary tags for the model.
    required_input = {
        key: l_in.output_shape
        for (key, l_in) in input_layers.iteritems()
    }
    required_output = {
        key: None  # size is not needed
        for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                       objectives["validate"].iteritems())
        for (key, target_var) in ob.target_vars.iteritems()
    }

    # The data loaders need to prepare before they should start
    # This is usually where the data is loaded from disk onto memory
    print "Preparing dataloaders"
    config.training_data.prepare()
    for validation_data in config.validation_data.values():
        validation_data.prepare()

    print "Will train for %s epochs" % config.training_data.epochs

    # If this is the second time we run this configuration, we might need to load the results of the previous
    # optimization. Check if this is the case, and load the parameters and stuff. If not, start from zero.
    if config.restart_from_save and os.path.isfile(metadata_path):
        print "Load model parameters for resuming"
        resume_metadata = np.load(metadata_path)
        lasagne.layers.set_all_param_values(top_layer,
                                            resume_metadata['param_values'])
        start_chunk_idx = resume_metadata['chunks_since_start'] + 1

        # set lr to the correct value
        current_lr = np.float32(
            utils.current_learning_rate(learning_rate_schedule,
                                        start_chunk_idx))
        print "  setting learning rate to %.7f" % current_lr
        learning_rate.set_value(current_lr)
        losses = resume_metadata['losses']
        config.training_data.skip_first_chunks(start_chunk_idx)
    else:
        start_chunk_idx = 0
        losses = dict()
        losses[TRAINING] = dict()
        losses[VALIDATION] = dict()
        for loss_name in train_losses_theano.keys():
            losses[TRAINING][loss_name] = list()

        for dataset_name in config.validation_data.keys():
            losses[VALIDATION][dataset_name] = dict()
            for loss_name in validate_losses_theano.keys():
                losses[VALIDATION][dataset_name][loss_name] = list()

    # Make a data generator which returns preprocessed chunks of data which are fed to the model
    # Note that this is a generator object! It is a special kind of iterator.
    chunk_size = config.batches_per_chunk * config.batch_size

    # Weight normalization
    if hasattr(config, "init_weight_norm") and not config.restart_from_save:
        theano_printer._stuff_to_print = []
        from theano_utils.weight_norm import train_weight_norm
        train_weight_norm(config, output_layers, all_layers, idx, givens,
                          xs_shared, chunk_size, required_input,
                          required_output)

    training_data_generator = buffering.buffered_gen_threaded(
        config.training_data.generate_batch(
            chunk_size=chunk_size,
            required_input=required_input,
            required_output=required_output,
        ))

    # Estimate the number of batches we will train for.
    chunks_train_idcs = itertools.count(start_chunk_idx)
    if config.training_data.epochs:
        num_chunks_train = int(1.0 * config.training_data.epochs *
                               config.training_data.number_of_samples /
                               (config.batch_size * config.batches_per_chunk))
    else:
        num_chunks_train = None

    # Start the timer objects
    start_time, prev_time = None, None
    print "Loading first chunks"
    data_load_time = Timer()
    gpu_time = Timer()

    #========================#
    # This is the train loop #
    #========================#
    data_load_time.start()
    for e, train_data in izip(chunks_train_idcs, training_data_generator):
        data_load_time.stop()
        if start_time is None:
            start_time = time.time()
            prev_time = start_time

        print
        if num_chunks_train:
            print "Chunk %d/%d" % (e + 1, num_chunks_train)
        else:
            print "Chunk %d" % (e + 1)
        print "=============="
        print "  %s" % config.__name__

        # Estimate the current epoch we are at
        epoch = (1.0 * config.batch_size * config.batches_per_chunk * (e + 1) /
                 config.training_data.number_of_samples)
        if epoch >= 0.1:
            print "  Epoch %.1f/%s" % (epoch, str(config.training_data.epochs))
        else:
            print "  Epoch %.0e/%s" % (epoch, str(config.training_data.epochs))

        # for debugging the data loader, it might be useful to dump everything it loaded and analyze it.
        if config.dump_network_loaded_data:
            pickle.dump(train_data,
                        open("data_loader_dump_train_%d.pkl" % e, "wb"))

        # Update the learning rate with the new epoch the number
        for key, rate in learning_rate_schedule.iteritems():
            if epoch >= key:
                lr = np.float32(rate)
                learning_rate.set_value(lr)
        print "  learning rate %.0e" % lr

        # Move this data from the data loader onto the Theano variables
        for key in xs_shared:
            xs_shared[key].set_value(train_data["input"][key])

        for key in ys_shared:
            if key not in train_data["output"]:
                raise Exception(
                    "You forgot to add key %s to OUTPUT_DATA_SIZE_TYPE in your data loader"
                    % key)
            ys_shared[key].set_value(train_data["output"][key])

        # loop over all the batches in one chunk, and keep the losses
        chunk_losses = np.zeros((len(train_losses_theano), 0))
        for b in xrange(config.batches_per_chunk):
            gpu_time.start()
            th_result = iter_train(b)
            gpu_time.stop()

            resulting_losses = np.stack(th_result[:len(train_losses_theano)],
                                        axis=0)

            # these are not needed anyway, just to make Theano call the print function
            # stuff_to_print = th_result[-len(theano_printer.get_the_stuff_to_print()):]
            # print resulting_losses.shape, chunk_losses.shape
            chunk_losses = np.concatenate((chunk_losses, resulting_losses),
                                          axis=1)

        # check if we found NaN's. When there are NaN's we might as well exit.
        utils.detect_nans(chunk_losses, xs_shared, ys_shared, all_params)

        # Average our losses, and print them.
        mean_train_loss = np.mean(chunk_losses, axis=1)
        for loss_name, loss in zip(train_losses_theano.keys(),
                                   mean_train_loss):
            losses[TRAINING][loss_name].append(loss)
            print string.rjust(loss_name + ":", 15), "%.6f" % loss

        # Now, we will do validation. We do this about every config.epochs_per_validation epochs.
        # We also always validate at the end of every training!
        validate_every = max(
            int((config.epochs_per_validation *
                 config.training_data.number_of_samples) /
                (config.batch_size * config.batches_per_chunk)), 1)

        if ((e + 1) % validate_every) == 0 or (num_chunks_train
                                               and e + 1 >= num_chunks_train):
            print
            print "  Validating "

            # We might test on multiple datasets, such as the Train set, Validation set, ...
            for dataset_name, dataset_generator in config.validation_data.iteritems(
            ):

                # Start loading the validation data!
                validation_chunk_generator = dataset_generator.generate_batch(
                    chunk_size=chunk_size,
                    required_input=required_input,
                    required_output=required_output,
                )

                print "  %s (%d/%d samples)" % (
                    dataset_name,
                    dataset_generator.number_of_samples_in_iterator,
                    dataset_generator.number_of_samples)
                print "  -----------------------"

                # If there are no validation samples, don't bother validating.
                if dataset_generator.number_of_samples == 0:
                    continue

                validation_predictions = None

                # Keep the labels of the validation data for later.
                output_keys_to_store = set()
                losses_to_store = dict()
                for key, ob in objectives["validate"].iteritems():
                    if ob.mean_over_samples:
                        losses_to_store[key] = None
                    else:
                        output_keys_to_store.add(ob.target_key)
                chunk_labels = {k: None for k in output_keys_to_store}
                store_network_output = (len(output_keys_to_store) > 0)

                # loop over all validation data chunks
                data_load_time.start()
                for validation_data in buffering.buffered_gen_threaded(
                        validation_chunk_generator):
                    data_load_time.stop()
                    num_batches_chunk_eval = config.batches_per_chunk

                    # set the validation data to the required Theano variables. Note, there is no
                    # use setting the output variables, as we do not have labels of the validation set!
                    for key in xs_shared:
                        xs_shared[key].set_value(validation_data["input"][key])

                    # store all the output keys required for finding the validation error
                    for key in output_keys_to_store:
                        new_data = validation_data["output"][
                            key][:validation_data["valid_samples"]]

                        if chunk_labels[key] is None:
                            chunk_labels[key] = new_data
                        else:
                            chunk_labels[key] = np.concatenate(
                                (chunk_labels[key], new_data), axis=0)

                    # loop over the batches of one chunk, and keep the predictions
                    chunk_predictions = None
                    for b in xrange(num_batches_chunk_eval):
                        gpu_time.start()
                        th_result = iter_predict(b)
                        gpu_time.stop()
                        resulting_predictions = np.stack(
                            th_result[:len(network_outputs)], axis=0)
                        assert len(
                            network_outputs
                        ) == 1, "Multiple outputs not implemented yet"
                        if chunk_predictions is None:
                            chunk_predictions = resulting_predictions
                        else:
                            chunk_predictions = np.concatenate(
                                (chunk_predictions, resulting_predictions),
                                axis=1)

                    # Check for NaN's. Panic if there are NaN's during validation.
                    utils.detect_nans(chunk_predictions, xs_shared, ys_shared,
                                      all_params)

                    # add the predictions of this chunk, to the global predictions (if needed)
                    if chunk_predictions is not None:
                        chunk_predictions = chunk_predictions[:validation_data[
                            VALID_SAMPLES]]
                        if store_network_output:
                            if validation_predictions is None:
                                validation_predictions = chunk_predictions
                            else:
                                validation_predictions = np.concatenate(
                                    (validation_predictions,
                                     chunk_predictions),
                                    axis=1)

                    # if you can calculate the losses per chunk, and take the mean afterwards, do that.
                    for key, ob in objectives["validate"].iteritems():
                        if ob.mean_over_samples:
                            new_losses = []
                            for i in xrange(validation_data[VALID_SAMPLES]):
                                loss = ob.get_loss_from_lists(
                                    chunk_predictions[0, i:i + 1],
                                    validation_data["output"][
                                        ob.target_key][i:i + 1])
                                new_losses.append(loss)

                            new_losses = np.array(new_losses)
                            if losses_to_store[key] is None:
                                losses_to_store[key] = new_losses
                            else:
                                losses_to_store[key] = np.concatenate(
                                    (losses_to_store[key], new_losses), axis=0)

                    data_load_time.start()
                data_load_time.stop()

                # Compare the predictions with the actual labels and print them.
                for key, ob in objectives["validate"].iteritems():
                    if ob.mean_over_samples:
                        loss = np.mean(losses_to_store[key])
                    else:
                        loss = ob.get_loss_from_lists(
                            validation_predictions[0, :],
                            chunk_labels[ob.target_key])
                    losses[VALIDATION][dataset_name][key].append(loss)
                    print string.rjust(key + ":", 17), "%.6f" % loss
                print

        # Good, we did one chunk. Let us check how much time this took us. Print out some stats.
        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        # This is the most useful stat of all! Keep this number low, and your total optimization time will be low too.
        print "  on average %dms per training sample" % (
            1000. * time_since_start /
            ((e + 1 - start_chunk_idx) * config.batch_size *
             config.batches_per_chunk))
        print "  %s since start (+%.2f s)" % (utils.hms(time_since_start),
                                              time_since_prev)
        print "  %s waiting on gpu vs %s waiting for data" % (gpu_time,
                                                              data_load_time)
        try:
            if num_chunks_train:  # only if we ever stop running
                est_time_left = time_since_start * (
                    float(num_chunks_train - (e + 1 - start_chunk_idx)) /
                    float(e + 1 - start_chunk_idx))
                eta = datetime.datetime.now() + datetime.timedelta(
                    seconds=est_time_left)
                eta_str = eta.strftime("%c")
                print "  estimated %s to go" % utils.hms(est_time_left)
                print "  (ETA: %s)" % eta_str
                if hasattr(config, "print_mean_chunks"):
                    avg_train = losses[TRAINING]["objective"]
                    n = min(len(avg_train), config.print_mean_chunks)
                    avg_train = avg_train[-n:]
                    print "  mean loss last %i chunks: %.3f" % (
                        n, np.mean(avg_train))
        except OverflowError:
            # Shit happens
            print "  This will take really long, like REALLY long."
        if hasattr(config, "print_score_every_chunk") and config.print_score_every_chunk\
                and len(losses[VALIDATION]["training set"]["objective"]) > 0:
            print "  train: best %.3f latest %.3f, valid: best %.3f latest %.3f " % (
                np.min(losses[VALIDATION]["training set"]["objective"]),
                losses[VALIDATION]["training set"]["objective"][-1],
                np.min(losses[VALIDATION]["validation set"]["objective"]),
                losses[VALIDATION]["validation set"]["objective"][-1])

        # Save the data every config.save_every_chunks chunks. Or at the end of the training.
        # We should make it config.save_every_epochs epochs sometimes. Consistency
        if ((e + 1) % config.save_every_chunks) == 0 or (
                num_chunks_train and e + 1 >= num_chunks_train):
            print
            print "Saving metadata, parameters"

            with open(metadata_path, 'w') as f:
                pickle.dump(
                    {
                        'metadata_path':
                        metadata_path,
                        'configuration_file':
                        config.__name__,
                        'git_revision_hash':
                        utils.get_git_revision_hash(),
                        'experiment_id':
                        expid,
                        'chunks_since_start':
                        e,
                        'losses':
                        losses,
                        'time_since_start':
                        time_since_start,
                        'param_values':
                        lasagne.layers.get_all_param_values(top_layer)
                    }, f, pickle.HIGHEST_PROTOCOL)

            print "  saved to %s" % metadata_path
            print

        # reset the timers for next round. This needs to happen here, because at the end of the big for loop
        # we already want te get a chunk immediately for the next loop. The iterator is an argument of the for loop.
        gpu_time.reset()
        data_load_time.reset()
        data_load_time.start()

    return
Example #12
0
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % prediction_path
    print "  %s" % submission_path

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    all_layers = lasagne.layers.get_all_layers(top_layer)
    num_params = lasagne.layers.count_params(top_layer)
    print "  number of parameters: %d" % num_params
    print string.ljust("  layer output shapes:",36),
    print string.ljust("#params:",10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print "    %s %s %s" % (name,  num_param, layer.output_shape)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer, deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    iter_test = theano.function([idx], network_outputs + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore",
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = range(1, num_chunks+1)

    data_loader.filter_patient_folders()

    create_test_gen = partial(config().create_test_gen,
                              required_input_keys = xs_shared.keys(),
                              required_output_keys = ["patients", "classification_correction_function"],
                              )

    print "Generate predictions with this model"
    start_time = time.time()
    prev_time = start_time


    predictions = [{"patient": i+1,
                    "systole": np.zeros((0,600)),
                    "diastole": np.zeros((0,600))
                    } for i in xrange(NUM_PATIENTS)]


    for e, test_data in izip(itertools.count(start=1), buffering.buffered_gen_threaded(create_test_gen())):
        print "  load testing data onto GPU"

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])


        patient_ids = test_data["output"]["patients"]
        classification_correction = test_data["output"]["classification_correction_function"]
        print "  patients:", " ".join(map(str, patient_ids))
        print "  chunk %d/%d" % (e, num_chunks)

        for b in xrange(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {output_layers.keys()[i]: network_outputs[i] for i in xrange(len(output_layers))}
            kaggle_systoles, kaggle_diastoles = config().postprocess(network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype('float64'), kaggle_diastoles.astype('float64')
            for idx, patient_id in enumerate(patient_ids[b*config().batch_size:(b+1)*config().batch_size]):
                if patient_id != 0:
                    index = patient_id-1
                    patient_data = predictions[index]
                    assert patient_id==patient_data["patient"]

                    kaggle_systole = kaggle_systoles[idx:idx+1,:]
                    kaggle_diastole = kaggle_diastoles[idx:idx+1,:]
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all()
                    kaggle_systole = classification_correction[b*config().batch_size + idx](kaggle_systole)
                    kaggle_diastole = classification_correction[b*config().batch_size + idx](kaggle_diastole)
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(kaggle_systole).all()
                    patient_data["systole"] =  np.concatenate((patient_data["systole"], kaggle_systole ),axis=0)
                    patient_data["diastole"] = np.concatenate((patient_data["diastole"], kaggle_diastole ),axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

    already_printed = False
    for prediction in predictions:
        if prediction["systole"].size>0 and prediction["diastole"].size>0:
            average_method =  getattr(config(), 'tta_average_method', partial(np.mean, axis=0))
            prediction["systole_average"] = average_method(prediction["systole"])
            prediction["diastole_average"] = average_method(prediction["diastole"])
            try:
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])
            except:
                if not already_printed:
                    print "WARNING: These distributions are not distributions"
                    already_printed = True
                prediction["systole_average"] = make_monotone_distribution(prediction["systole_average"])
                prediction["diastole_average"] = make_monotone_distribution(prediction["diastole_average"])
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])


    print "Calculating training and validation set scores for reference"

    validation_dict = {}
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                      (train_patients_indices,  "train")]:
        errors = []
        for patient in patient_ids:
            prediction = predictions[patient-1]
            if "systole_average" in prediction:
                assert patient == regular_labels[patient-1, 0]
                error = CRSP(prediction["systole_average"], regular_labels[patient-1, 1])
                errors.append(error)
                error = CRSP(prediction["diastole_average"], regular_labels[patient-1, 2])
                errors.append(error)
        if len(errors)>0:
            errors = np.array(errors)
            estimated_CRSP = np.mean(errors)
            print "  %s kaggle loss: %f" % (string.rjust(set_name, 12), estimated_CRSP)
            validation_dict[set_name] = estimated_CRSP
        else:
            print "  %s kaggle loss: not calculated" % (string.rjust(set_name, 12))


    print "dumping prediction file to %s" % prediction_path
    with open(prediction_path, 'w') as f:
        pickle.dump({
                        'metadata_path': metadata_path,
                        'prediction_path': prediction_path,
                        'submission_path': submission_path,
                        'configuration_file': config().__name__,
                        'git_revision_hash': utils.get_git_revision_hash(),
                        'experiment_id': expid,
                        'time_since_start': time_since_start,
                        'param_values': lasagne.layers.get_all_param_values(top_layer),
                        'predictions': predictions,
                        'validation_errors': validation_dict,
                    }, f, pickle.HIGHEST_PROTOCOL)
    print "prediction file dumped"

    print "dumping submission file to %s" % submission_path
    with open(submission_path, 'w') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(['Id'] + ['P%d'%i for i in xrange(600)])
        for prediction in predictions:
            # the submission only has patients 501 to 700
            if prediction["patient"] in data_loader.test_patients_indices:
                if "diastole_average" not in prediction or "systole_average" not in prediction:
                    raise Exception("Not all test-set patients were predicted")
                csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + ["%.18f" % p for p in prediction["diastole_average"].flatten()])
                csvwriter.writerow(["%d_Systole" % prediction["patient"]] + ["%.18f" % p for p in prediction["systole_average"].flatten()])
    print "submission file dumped"

    return
Example #13
0
def analyze_model(expid, path_to_function, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    analysis_path = ANALYSIS_PATH + "%s/" % expid
    if not os.path.exists(analysis_path):
        os.mkdir(analysis_path)

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % analysis_path

    interface_layers = config.build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values())
    all_layers = lasagne.layers.get_all_layers(top_layer)
    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)

    if "cutoff_gradients" in interface_layers:
        submodel_params = [
            param for value in interface_layers["cutoff_gradients"]
            for param in lasagne.layers.get_all_params(value)
        ]
        all_params = [p for p in all_params if p not in submodel_params]

    if "pretrained" in interface_layers:
        for config_name, layers_dict in interface_layers[
                "pretrained"].iteritems():
            pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split(
                '.')[1]
            pretrained_resume_metadata = np.load(pretrained_metadata_path)
            pretrained_top_layer = lasagne.layers.MergeLayer(
                incomings=layers_dict.values())
            lasagne.layers.set_all_param_values(
                pretrained_top_layer,
                pretrained_resume_metadata['param_values'])

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])

    print string.ljust("  layer output shapes:", 34),
    print string.ljust("#params:", 10),
    print string.ljust("#data:", 10),
    print "output shape:"

    def comma_seperator(v):
        return '{:,.0f}'.format(v)

    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 30)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(comma_seperator(num_param), 10)
        num_size = string.ljust(
            comma_seperator(np.prod(layer.output_shape[1:])), 10)
        print "    %s %s %s %s" % (name, num_param, num_size,
                                   layer.output_shape)
    print "  number of parameters:", comma_seperator(num_params)

    objectives = config.build_objectives(interface_layers)

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.iteritems()
    }

    ys_shared = {
        key: lasagne.utils.shared_empty(dim=target_var.ndim,
                                        dtype=target_var.dtype)
        for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                       objectives["validate"].iteritems())
        for (key, target_var) in ob.target_vars.iteritems()
    }

    idx = T.lscalar('idx')

    givens = dict()

    for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                   objectives["validate"].iteritems()):
        for (key, target_var) in ob.target_vars.iteritems():
            givens[target_var] = ys_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    for (key, l_in) in input_layers.iteritems():
        givens[l_in.input_var] = xs_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    print "Compiling..."
    outputs = [
        lasagne.layers.helper.get_output(interface, deterministic=True)
        for interface in interface_layers["outputs"].values()
    ]

    iter_validate = theano.function([idx],
                                    outputs +
                                    theano_printer.get_the_stuff_to_print(),
                                    givens=givens,
                                    on_unused_input="ignore")

    required_input = {
        key: l_in.output_shape
        for (key, l_in) in input_layers.iteritems()
    }
    required_output = {
        key: None  # size is not needed
        for (_, ob) in itertools.chain(objectives["train"].iteritems(),
                                       objectives["validate"].iteritems())
        for (key, target_var) in ob.target_vars.iteritems()
    }

    print "Preparing dataloaders"
    config.training_data.prepare()
    for validation_data in config.validation_data.values():
        validation_data.prepare()

    chunk_size = config.batches_per_chunk * config.batch_size

    training_data_generator = buffering.buffered_gen_threaded(
        config.training_data.generate_batch(
            chunk_size=chunk_size,
            required_input=required_input,
            required_output=required_output,
        ))

    print "Will train for %s epochs" % config.training_data.epochs

    if os.path.isfile(metadata_path):
        print "Load model parameters for resuming"
        resume_metadata = np.load(metadata_path)
        lasagne.layers.set_all_param_values(top_layer,
                                            resume_metadata['param_values'])
    else:
        raise Exception("No previous parameters found!")

    start_time, prev_time = None, None
    print "Loading first chunks"
    data_load_time = Timer()
    gpu_time = Timer()

    data_load_time.start()
    for dataset_name, dataset_generator in config.validation_data.iteritems():
        data_load_time.stop()
        if start_time is None:
            start_time = time.time()
            prev_time = start_time

        validation_chunk_generator = dataset_generator.generate_batch(
            chunk_size=chunk_size,
            required_input=required_input,
            required_output=required_output,
        )

        print "  %s (%d/%d samples)" % (
            dataset_name, dataset_generator.number_of_samples_in_iterator,
            dataset_generator.number_of_samples)
        print "  -----------------------"

        data_load_time.start()
        for validation_data in buffering.buffered_gen_threaded(
                validation_chunk_generator):
            data_load_time.stop()
            num_batches_chunk_eval = config.batches_per_chunk

            for key in xs_shared:
                xs_shared[key].set_value(validation_data["input"][key])

            for key in ys_shared:
                ys_shared[key].set_value(validation_data["output"][key])

            idx = 0
            for b in xrange(num_batches_chunk_eval):
                gpu_time.start()
                th_result = iter_validate(b)
                gpu_time.stop()

                for idx_ex in xrange(config.batch_size):
                    # Create all the kwargs to analyze for each test run
                    kwargs = {}
                    for key in xs_shared.keys():
                        kwargs[key] = validation_data["input"][key][idx +
                                                                    idx_ex]

                    for key in ys_shared.keys():
                        kwargs[key] = validation_data["output"][key][idx +
                                                                     idx_ex]

                    for index, key in enumerate(
                            interface_layers["outputs"].keys()):
                        kwargs[key] = th_result[index][idx_ex]

                    id = validation_data[IDS][idx + idx_ex]
                    if id is not None:
                        # Load the required function in dynamically
                        importable = path_to_importable_string(
                            path_to_function)
                        analysis_module = importlib.import_module(importable)
                        analysis_module.analyze(id=id,
                                                analysis_path=analysis_path,
                                                **kwargs)

                idx += config.batch_size

            data_load_time.start()
        data_load_time.stop()
        print

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        print "  %s since start (+%.2f s)" % (utils.hms(time_since_start),
                                              time_since_prev)
        print "  (%s waiting on gpu vs %s waiting for data)" % (gpu_time,
                                                                data_load_time)
        gpu_time.reset()
        data_load_time.reset()
        data_load_time.start()

    return
Example #14
0
    all_accuracy_train.append(acc_train)
    auc_train = utils.auc(predictions, labels)
    all_auc_train.append(auc_train)
    if 1 == 1:
        print "  average training loss: %.5f" % loss_train
        print "  average training accuracy: %.5f" % acc_train
        print "  average auc: %.5f" % auc_train

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    #	est_time_left = time_since_start * num_epochs
    #	eta = datetime.now() + timedelta(seconds=est_time_left)
    #	eta_str = eta.strftime("%c")
    print "  %s since start (%.2f s)" % (utils.hms(time_since_start),
                                         time_since_prev)
    #	print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
    print

    if (epoch >= config.start_saving_at) and ((epoch % config.save_every)
                                              == 0):
        print "  saving parameters and metadata"
        with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f:
            pickle.dump(
                {
                    'config_name': config_name,
                    'param_values': nn.layers.get_all_param_values(l_out),
                    'losses_train': all_losses_train,
                    'accuracy_train': all_accuracy_train,
                    'auc_train': all_auc_train,
                                       cont_denom])
            del outputs

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * \
        ((num_chunks_train - (e + 1)) /
         float(e + 1 - chunks_train_ids[0]))
    eta = datetime.datetime.now() + \
        datetime.timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")

    print "  %s since start (%.2f s)" % (
        hms(time_since_start),
        time_since_prev
    )
    print "  estimated %s to go (ETA: %s)\n" % (
        hms(est_time_left),
        eta_str
    )

    # Save after every validate.
    if (((e + 1) % save_every) == 0 or
        ((e + 1) % validate_every) == 0 or
            ((e + 1) == num_chunks_train)):
        print "\nSaving model ..."

        with open(dump_path, 'w') as f:
            pickle.dump({
Example #16
0
            tmp_losses_valid.append(l_valid)

        # calculate validation loss across validation set
        valid_loss = np.mean(tmp_losses_valid)
        # TODO: taking mean is not correct if chunks have different sizes!!!
        print('Validation loss: ', valid_loss)
        losses_eval_valid.append(valid_loss)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print("  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev))
        print("  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str))
        print()

    if ((chunk_idx + 1) % config().save_every) == 0:
        print()
        print('Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks))
        print('Saving metadata, parameters')

        with open(metadata_path, 'w') as f:
            pickle.dump({
                'configuration_file': config_name,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'chunks_since_start': chunk_idx,
                'losses_eval_train': losses_eval_train,
Example #17
0
	auc_train = utils.auc(predictions, labels)
	all_auc_train.append(auc_train)
	if 1==1:
		print "  average training loss: %.5f" % loss_train
		print "  average training accuracy: %.5f" % acc_train
		print "  average auc: %.5f" % auc_train


	now = time.time()
	time_since_start = now - start_time
	time_since_prev = now - prev_time
	prev_time = now
#	est_time_left = time_since_start * num_epochs
#	eta = datetime.now() + timedelta(seconds=est_time_left)
#	eta_str = eta.strftime("%c")
	print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
#	print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
	print

	if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0):
		print "  saving parameters and metadata"
		with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f:
			pickle.dump({
			'config_name': config_name,
			'param_values': nn.layers.get_all_param_values(l_out),
			'losses_train': all_losses_train,
			'accuracy_train': all_accuracy_train,
			'auc_train': all_auc_train,
			'accuracy_eval_valid': all_accuracy_eval_valid,
			'accuracy_eval_train': all_accuracy_eval_train,
			'auc_eval_train': all_auc_eval_train,
Example #18
0
                cont_denom
            ])
            del outputs

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * \
        ((num_chunks_train - (e + 1)) /
         float(e + 1 - chunks_train_ids[0]))
    eta = datetime.datetime.now() + \
        datetime.timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")

    print "  %s since start (%.2f s)" % (hms(time_since_start),
                                         time_since_prev)
    print "  estimated %s to go (ETA: %s)\n" % (hms(est_time_left), eta_str)

    # Save after every validate.
    if (((e + 1) % save_every) == 0 or ((e + 1) % validate_every) == 0
            or ((e + 1) == num_chunks_train)):
        print "\nSaving model ..."

        with open(dump_path, 'w') as f:
            pickle.dump(
                {
                    'configuration': model.config_name,
                    'model_id': model_id,
                    'chunks_since_start': e,
                    'time_since_start': time_since_start,
Example #19
0
def train_model(expid):
    metadata_path = MODEL_PATH + "%s.pkl" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    data_loader.filter_patient_folders()

    print "Build model"
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=output_layers.values()
    )
    all_layers = lasagne.layers.get_all_layers(top_layer)

    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)
    if "cutoff_gradients" in interface_layers:
        submodel_params = [param for value in interface_layers["cutoff_gradients"] for param in lasagne.layers.get_all_params(value)]
        all_params = [p for p in all_params if p not in submodel_params]

    if "pretrained" in interface_layers:
        for config_name, layers_dict in interface_layers["pretrained"].iteritems():
            pretrained_metadata_path = MODEL_PATH + "%s.pkl" % config_name.split('.')[1]
            pretrained_resume_metadata = np.load(pretrained_metadata_path)
            pretrained_top_layer = lasagne.layers.MergeLayer(
                incomings = layers_dict.values()
            )
            lasagne.layers.set_all_param_values(pretrained_top_layer, pretrained_resume_metadata['param_values'])

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])

    print string.ljust("  layer output shapes:",36),
    print string.ljust("#params:",10),
    print string.ljust("#data:",10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(int(num_param).__str__(), 10)
        num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10)
        print "    %s %s %s %s" % (name,  num_param, num_size, layer.output_shape)
    print "  number of parameters: %d" % num_params

    obj = config().build_objective(interface_layers)

    train_loss_theano = obj.get_loss()
    kaggle_loss_theano = obj.get_kaggle_loss()
    segmentation_loss_theano = obj.get_segmentation_loss()

    validation_other_losses = collections.OrderedDict()
    validation_train_loss = obj.get_loss(average=False, deterministic=True, validation=True, other_losses=validation_other_losses)
    validation_kaggle_loss = obj.get_kaggle_loss(average=False, deterministic=True, validation=True)
    validation_segmentation_loss = obj.get_segmentation_loss(average=False, deterministic=True, validation=True)


    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape), dtype='float32') for (key, l_in) in input_layers.iteritems()
    }

    # contains target_vars of the objective! Not the output layers desired values!
    # There can be more output layers than are strictly required for the objective
    # e.g. for debugging

    ys_shared = {
        key: lasagne.utils.shared_empty(dim=target_var.ndim, dtype='float32') for (key, target_var) in obj.target_vars.iteritems()
    }

    learning_rate_schedule = config().learning_rate_schedule

    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))
    idx = T.lscalar('idx')

    givens = dict()
    for key in obj.target_vars.keys():
        if key=="segmentation":
            givens[obj.target_vars[key]] = ys_shared[key][idx*config().sunny_batch_size : (idx+1)*config().sunny_batch_size]
        else:
            givens[obj.target_vars[key]] = ys_shared[key][idx*config().batch_size : (idx+1)*config().batch_size]

    for key in input_layers.keys():
        if key=="sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().sunny_batch_size:(idx+1)*config().sunny_batch_size]
        else:
            givens[input_layers[key].input_var] = xs_shared[key][idx*config().batch_size:(idx+1)*config().batch_size]

    updates = config().build_updates(train_loss_theano, all_params, learning_rate)

    #grad_norm = T.sqrt(T.sum([(g**2).sum() for g in theano.grad(train_loss_theano, all_params)]))
    #theano_printer.print_me_this("Grad norm", grad_norm)

    iter_train = theano.function([idx], [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print(),
                                 givens=givens, on_unused_input="ignore", updates=updates,
                                 # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                 )
    iter_validate = theano.function([idx], [validation_train_loss, validation_kaggle_loss, validation_segmentation_loss] + [v for _, v in validation_other_losses.items()] + theano_printer.get_the_stuff_to_print(),
                                    givens=givens, on_unused_input="ignore")

    num_chunks_train = int(config().num_epochs_train * NUM_TRAIN_PATIENTS / (config().batch_size * config().batches_per_chunk))
    print "Will train for %d chunks" % num_chunks_train
    if config().restart_from_save and os.path.isfile(metadata_path):
        print "Load model parameters for resuming"
        resume_metadata = np.load(metadata_path)
        lasagne.layers.set_all_param_values(top_layer, resume_metadata['param_values'])
        start_chunk_idx = resume_metadata['chunks_since_start'] + 1
        chunks_train_idcs = range(start_chunk_idx, num_chunks_train)

        # set lr to the correct value
        current_lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx))
        print "  setting learning rate to %.7f" % current_lr
        learning_rate.set_value(current_lr)
        losses_train = resume_metadata['losses_train']
        losses_eval_valid = resume_metadata['losses_eval_valid']
        losses_eval_train = resume_metadata['losses_eval_train']
        losses_eval_valid_kaggle = [] #resume_metadata['losses_eval_valid_kaggle']
        losses_eval_train_kaggle = [] #resume_metadata['losses_eval_train_kaggle']
    else:
        chunks_train_idcs = range(num_chunks_train)
        losses_train = []
        losses_eval_valid = []
        losses_eval_train = []
        losses_eval_valid_kaggle = []
        losses_eval_train_kaggle = []


    create_train_gen = partial(config().create_train_gen,
                               required_input_keys = xs_shared.keys(),
                               required_output_keys = ys_shared.keys()# + ["patients"],
                               )


    create_eval_valid_gen = partial(config().create_eval_valid_gen,
                                   required_input_keys = xs_shared.keys(),
                                   required_output_keys = ys_shared.keys()# + ["patients"]
                                   )

    create_eval_train_gen = partial(config().create_eval_train_gen,
                                   required_input_keys = xs_shared.keys(),
                                   required_output_keys = ys_shared.keys()
                                   )

    print "Train model"
    start_time = time.time()
    prev_time = start_time

    num_batches_chunk = config().batches_per_chunk


    for e, train_data in izip(chunks_train_idcs, buffering.buffered_gen_threaded(create_train_gen())):
        print "Chunk %d/%d" % (e + 1, num_chunks_train)
        epoch = (1.0 * config().batch_size * config().batches_per_chunk * (e+1) / NUM_TRAIN_PATIENTS)
        print "  Epoch %.1f" % epoch

        for key, rate in learning_rate_schedule.iteritems():
            if epoch >= key:
                lr = np.float32(rate)
                learning_rate.set_value(lr)
        print "  learning rate %.7f" % lr

        if config().dump_network_loaded_data:
            pickle.dump(train_data, open("data_loader_dump_train_%d.pkl"%e, "wb"))

        for key in xs_shared:
            xs_shared[key].set_value(train_data["input"][key])

        for key in ys_shared:
            ys_shared[key].set_value(train_data["output"][key])

        #print "train:", sorted(train_data["output"]["patients"])
        losses = []
        kaggle_losses = []
        segmentation_losses = []
        for b in xrange(num_batches_chunk):
            iter_result = iter_train(b)

            loss, kaggle_loss, segmentation_loss = tuple(iter_result[:3])
            utils.detect_nans(loss, xs_shared, ys_shared, all_params)
 
            losses.append(loss)
            kaggle_losses.append(kaggle_loss)
            segmentation_losses.append(segmentation_loss)

        mean_train_loss = np.mean(losses)
        print "  mean training loss:\t\t%.6f" % mean_train_loss
        losses_train.append(mean_train_loss)

        print "  mean kaggle loss:\t\t%.6f" % np.mean(kaggle_losses)
        print "  mean segment loss:\t\t%.6f" % np.mean(segmentation_losses)

        if ((e + 1) % config().validate_every) == 0:
            print
            print "Validating"
            if config().validate_train_set:
                subsets = ["validation", "train"]
                gens = [create_eval_valid_gen, create_eval_train_gen]
                losses_eval = [losses_eval_valid, losses_eval_train]
                losses_kaggle = [losses_eval_valid_kaggle, losses_eval_train_kaggle]
            else:
                subsets = ["validation"]
                gens = [create_eval_valid_gen]
                losses_eval = [losses_eval_valid]
                losses_kaggle = [losses_eval_valid_kaggle]

            for subset, create_gen, losses_validation, losses_kgl in zip(subsets, gens, losses_eval, losses_kaggle):

                vld_losses = []
                vld_kaggle_losses = []
                vld_segmentation_losses = []
                vld_other_losses = {k:[] for k,_ in validation_other_losses.items()}
                print "  %s set (%d samples)" % (subset, get_number_of_validation_samples(set=subset))

                for validation_data in buffering.buffered_gen_threaded(create_gen()):
                    num_batches_chunk_eval = config().batches_per_chunk

                    if config().dump_network_loaded_data:
                        pickle.dump(validation_data, open("data_loader_dump_valid_%d.pkl"%e, "wb"))

                    for key in xs_shared:
                        xs_shared[key].set_value(validation_data["input"][key])

                    for key in ys_shared:
                        ys_shared[key].set_value(validation_data["output"][key])

                    #print "validate:", validation_data["output"]["patients"]

                    for b in xrange(num_batches_chunk_eval):
                        losses = tuple(iter_validate(b)[:3+len(validation_other_losses)])
                        loss, kaggle_loss, segmentation_loss = losses[:3]
                        other_losses = losses[3:]
                        vld_losses.extend(loss)
                        vld_kaggle_losses.extend(kaggle_loss)
                        vld_segmentation_losses.extend(segmentation_loss)
                        for k, other_loss in zip(validation_other_losses, other_losses):
                            vld_other_losses[k].extend(other_loss)

                vld_losses = np.array(vld_losses)
                vld_kaggle_losses = np.array(vld_kaggle_losses)
                vld_segmentation_losses = np.array(vld_segmentation_losses)
                for k in validation_other_losses:
                    vld_other_losses[k] = np.array(vld_other_losses[k])

                # now select only the relevant section to average
                sunny_len = get_lenght_of_set(name="sunny", set=subset)
                regular_len = get_lenght_of_set(name="regular", set=subset)
                num_valid_samples = get_number_of_validation_samples(set=subset)

                #print losses[:num_valid_samples]
                #print kaggle_losses[:regular_len]
                #print segmentation_losses[:sunny_len]
                loss_to_save = obj.compute_average(vld_losses[:num_valid_samples])
                print "  mean training loss:\t\t%.6f" % loss_to_save
                print "  mean kaggle loss:\t\t%.6f"   % np.mean(vld_kaggle_losses[:regular_len])
                print "  mean segment loss:\t\t%.6f"  % np.mean(vld_segmentation_losses[:sunny_len])
                # print "    acc:\t%.2f%%" % (acc * 100)
                for k, v in vld_other_losses.items():
                    print "  mean %s loss:\t\t%.6f"  % (k, obj.compute_average(v[:num_valid_samples], loss_name=k))
                print

                losses_validation.append(loss_to_save)

                kaggle_to_save = np.mean(vld_kaggle_losses[:regular_len])
                losses_kgl.append(kaggle_to_save)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (float(num_chunks_train - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

        if ((e + 1) % config().save_every) == 0:
            print
            print "Saving metadata, parameters"

            with open(metadata_path, 'w') as f:
                pickle.dump({
                    'metadata_path': metadata_path,
                    'configuration_file': config().__name__,
                    'git_revision_hash': utils.get_git_revision_hash(),
                    'experiment_id': expid,
                    'chunks_since_start': e,
                    'losses_train': losses_train,
                    'losses_eval_train': losses_eval_train,
                    'losses_eval_train_kaggle': losses_eval_train_kaggle,
                    'losses_eval_valid': losses_eval_valid,
                    'losses_eval_valid_kaggle': losses_eval_valid_kaggle,
                    'time_since_start': time_since_start,
                    'param_values': lasagne.layers.get_all_param_values(top_layer)
                }, f, pickle.HIGHEST_PROTOCOL)

            print "  saved to %s" % metadata_path
            print

    # store all known outputs from last batch:
    if config().take_a_dump:
        all_theano_variables = [train_loss_theano, kaggle_loss_theano, segmentation_loss_theano] + theano_printer.get_the_stuff_to_print()
        for layer in all_layers[:-1]:
            all_theano_variables.append(lasagne.layers.helper.get_output(layer))

        iter_train = theano.function([idx], all_theano_variables,
                                     givens=givens, on_unused_input="ignore", updates=updates,
                                     # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
                                     )
        train_data["intermediates"] = iter_train(0)
        pickle.dump(train_data, open(metadata_path + "-dump", "wb"))

    return
Example #20
0
def main():
    sym_y = T.imatrix('target_output')
    sym_mask = T.matrix('mask')
    sym_x = T.tensor3()

    TOL = 1e-5
    num_epochs = config.epochs
    batch_size = config.batch_size

    #### DATA ####
    #    print "@@@@TESTING@@@@"
    #    l_in = nn.layers.InputLayer(shape=(None, 700, 42))
    #    l_dim_a = nn.layers.DimshuffleLayer(
    #        l_in, (0,2,1))
    #    l_conv_a = nn.layers.Conv1DLayer(
    #        incoming=l_dim_a, num_filters=42, border_mode='same',
    #        filter_size=3, stride=1, nonlinearity=nn.nonlinearities.rectify)
    #    l_dim_b = nn.layers.DimshuffleLayer(
    #        l_conv_a, (0,2,1))
    #    out = nn.layers.get_output(l_dim_b, sym_x)
    #    testvar = np.ones((128, 700, 42)).astype('float32')
    #    print "@@@@EVAL@@@@"
    #    john = out.eval({sym_x: testvar})
    #    print("Johns shape")
    #    print(john.shape)

    print("Building network ...")
    ##########################DEBUG##########################
    l_in, l_out = config.build_model()

    ##########################DEBUG##########################
    all_layers = nn.layers.get_all_layers(l_out)
    num_params = nn.layers.count_params(l_out)
    print("  number of parameters: %d" % num_params)
    print("  layer output shapes:")
    for layer in all_layers:
        name = layer.__class__.__name__
        print("    %s %s" % (name, nn.layers.get_output_shape(layer)))
    print("Creating cost function")
    # lasagne.layers.get_output produces a variable for the output of the net
    out_train = nn.layers.get_output(
        l_out, sym_x, deterministic=False)

    #    testvar = np.ones((128, 700, 42)).astype('float32')
    #    john = out_train.eval({sym_x: testvar})
    #    print("@@@@@JOHN@@@@@")
    #    print(john.shape)
    #    print(john.reshape((-1, num_classes)).shape)

    print("Creating eval function")
    out_eval = nn.layers.get_output(
        l_out, sym_x, deterministic=True)

    probs_flat = out_train.reshape((-1, num_classes))

    lambda_reg = config.lambda_reg
    all_params = nn.layers.get_all_params(l_out)

    for i, p in enumerate(all_params):
        if p.ndim == 3:
            values = p.get_value()
            if side == 'right':
                values[..., int(values.shape[2] / 2.0 - 0.5):] = 0
                p.set_value(values)
                all_params[i] = p[..., : int(values.shape[2] / 2.0 - 0.5)]
            else:
                values[..., : int(values.shape[2] / 2.0 + 0.5)] = 0
                p.set_value(values)
                all_params[i] = p[..., int(values.shape[2] / 2.0 + 0.5):]

    params = [el for el in all_params if el.name == "W" or el.name == "gamma"]

    reg_term = sum(T.sum(p ** 2) for p in params)
    cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, TOL, 1 - TOL), sym_y.flatten())
    cost = T.sum(cost * sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term

    # Retrieve all parameters from the network
    all_params = [el for el in all_params if el.name == "W" or el.name == "gamma" or el.name == "beta"]

    # Setting the weights
    if hasattr(config, 'set_weights'):
        nn.layers.set_all_param_values(l_out, config.set_weights())
    # Compute SGD updates for training
    print("Computing updates ...")
    if hasattr(config, 'learning_rate_schedule'):
        learning_rate_schedule = config.learning_rate_schedule  # Import learning rate schedule
    else:
        learning_rate_schedule = {0: config.learning_rate}
    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))

    all_grads = T.grad(cost, all_params)

    cut_norm = config.cut_grad
    updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True)

    if optimizer == "rmsprop":
        updates = nn.updates.rmsprop(updates, all_params, learning_rate)
    elif optimizer == "adadelta":
        updates = nn.updates.adadelta(updates, all_params, learning_rate)
    elif optimizer == "adagrad":
        updates = nn.updates.adagrad(updates, all_params, learning_rate)
    elif optimizer == "nag":
        momentum_schedule = config.momentum_schedule
        momentum = theano.shared(np.float32(momentum_schedule[0]))
        updates = nn.updates.nesterov_momentum(updates, all_params, learning_rate, momentum)
    else:
        sys.exit("please choose either <rmsprop/adagrad/adadelta/nag> in configfile")

    # Theano functions for training and computing cost
    print ("config.batch_size %d" % batch_size)
    print ("data.num_classes %d" % num_classes)
    if hasattr(config, 'build_model'):
        print("has build model")
    print("Compiling train ...")
    # Use this for training (see deterministic = False above)
    train = theano.function(
        [sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates)

    print("Compiling eval ...")
    # use this for eval (deterministic = True + no updates)
    eval = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval])

    # Start timers
    start_time = time.time()
    prev_time = start_time

    all_losses_train = []
    all_accuracy_train = []
    all_losses_eval_train = []
    all_losses_eval_valid = []
    all_losses_eval_test = []
    all_accuracy_eval_train = []
    all_accuracy_eval_valid = []
    all_accuracy_eval_test = []
    all_mean_norm = []

    import data
    X_train, X_valid, y_train, y_valid, mask_train, mask_valid, num_seq_train \
        = data.get_train()
    X_train, X_valid = X_train[..., 21:], X_valid[..., 21:]  # Only train with pssm scores

    print("y shape")
    print(y_valid.shape)
    print("X shape")
    print(X_valid.shape)
    # Start training
    for i in range(y_train.shape[0]):
        for j in range(y_train.shape[1]):
            if y_train[i][j] == 5:
                y_train[i][j] = 1
            else:
                y_train[i][j] = 0

    for i in range(y_valid.shape[0]):
        for j in range(y_valid.shape[1]):
            if y_valid[i][j] == 5:
                y_valid[i][j] = 1
            else:
                y_valid[i][j] = 0

    for epoch in range(num_epochs):

        if (epoch % 10) == 0:
            print ("Epoch %d of %d" % (epoch + 1, num_epochs))

        if epoch in learning_rate_schedule:
            lr = np.float32(learning_rate_schedule[epoch])
            print ("  setting learning rate to %.7f" % lr)
            learning_rate.set_value(lr)
        if optimizer == "nag":
            if epoch in momentum_schedule:
                mu = np.float32(momentum_schedule[epoch])
                print ("  setting learning rate to %.7f" % mu)
                momentum.set_value(mu)
        #        print "Shuffling data"
        seq_names = np.arange(0, num_seq_train)
        np.random.shuffle(seq_names)
        X_train = X_train[seq_names]
        y_train = y_train[seq_names]
        mask_train = mask_train[seq_names]

        num_batches = num_seq_train // batch_size
        losses = []
        preds = []
        norms = []
        for i in range(num_batches):
            idx = range(i * batch_size, (i + 1) * batch_size)
            x_batch = X_train[idx]
            y_batch = y_train[idx]
            mask_batch = mask_train[idx]
            loss, out, batch_norm = train(x_batch, y_batch, mask_batch)
            #            print(batch_norm)
            norms.append(batch_norm)
            preds.append(out)
            losses.append(loss)

        #            if ((i+1) % config.write_every_batch == 0) | (i == 0):
        #                if i == 0:
        #                    start_place = 0
        #                else:
        #                    start_place = i-config.write_every_batch
        #                print "Batch %d of %d" % (i + 1, num_batches)
        #                print "  curbatch training loss: %.5f" % np.mean(losses[start_place:(i+1)])
        #                print "  curbatch training acc: %.5f" % np.mean(accuracy[start_place:(i+1)])
        predictions = np.concatenate(preds, axis=0)
        loss_train = np.mean(losses)
        all_losses_train.append(loss_train)

        acc_train = utils.proteins_acc(predictions, y_train[0:num_batches * batch_size],
                                       mask_train[0:num_batches * batch_size])
        all_accuracy_train.append(acc_train)

        mean_norm = np.mean(norms)
        all_mean_norm.append(mean_norm)

        if 1 == 1:
            print ("  average training loss: %.5f" % loss_train)
            print ("  average training accuracy: %.5f" % acc_train)
            print ("  average norm: %.5f" % mean_norm)

            sets = [  # ('train', X_train, y_train, mask_train, all_losses_eval_train, all_accuracy_eval_train),
                ('valid', X_valid, y_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid)]
            for subset, X, y, mask, all_losses, all_accuracy in sets:
                print ("  validating: %s loss" % subset)
                preds = []
                num_batches = np.size(X, axis=0) // config.batch_size
                for i in range(num_batches):  ## +1 to get the "rest"
                    #                    print(i)
                    idx = range(i * batch_size, (i + 1) * batch_size)
                    x_batch = X[idx]
                    y_batch = y[idx]
                    mask_batch = mask[idx]
                    loss, out = eval(x_batch, y_batch, mask_batch)
                    preds.append(out)
                    #                    acc = utils.proteins_acc(out, y_batch, mask_batch)
                    losses.append(loss)
                #                    accuracy.append(acc)
                predictions = np.concatenate(preds, axis=0)
                #                print "  pred"
                #                print(predictions.shape)
                #                print(predictions.dtype)
                loss_eval = np.mean(losses)
                all_losses.append(loss_eval)

                #                acc_eval = np.mean(accuracy)
                acc_eval = utils.proteins_acc(predictions, y, mask)
                all_accuracy.append(acc_eval)

                print ("  average evaluation loss (%s): %.5f" % (subset, loss_eval))
                print ("  average evaluation accuracy (%s): %.5f" % (subset, acc_eval))

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_prev * (num_epochs - epoch)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print ("  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev))
        print ("  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str))
        print()

        if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0):
            print ("  saving parameters and metadata")
            with open((metadata_path + side + "-%d" % (epoch) + ".pkl"), 'wb') as f:
                pickle.dump({
                    'config_name': config_name,
                    'param_values': nn.layers.get_all_param_values(l_out),
                    'losses_train': all_losses_train,
                    'accuracy_train': all_accuracy_train,
                    'losses_eval_train': all_losses_eval_train,
                    'losses_eval_valid': all_losses_eval_valid,
                    'losses_eval_test': all_losses_eval_test,
                    'accuracy_eval_valid': all_accuracy_eval_valid,
                    'accuracy_eval_train': all_accuracy_eval_train,
                    'accuracy_eval_test': all_accuracy_eval_test,
                    'mean_norm': all_mean_norm,
                    'time_since_start': time_since_start,
                    'i': i,
                }, f, pickle.HIGHEST_PROTOCOL)

            print ("  stored in %s" % metadata_path)

    print()
Example #21
0
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = INTERMEDIATE_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print("WARNING: not running in fast mode!")

    print("Using")
    print("  %s" % metadata_path)
    print("To generate")
    print("  %s" % prediction_path)
    print("  %s" % submission_path)

    print("Build model")
    interface_layers = config().build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(
        incomings=list(output_layers.values()))
    all_layers = lasagne.layers.get_all_layers(top_layer)
    num_params = lasagne.layers.count_params(top_layer)
    print("  number of parameters: %d" % num_params)
    print(string.ljust("  layer output shapes:", 36), end=' ')
    print(string.ljust("#params:", 10), end=' ')
    print("output shape:")
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 32)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(num_param.__str__(), 10)
        print("    %s %s %s" % (name, num_param, layer.output_shape))

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.items()
    }
    idx = T.lscalar('idx')

    givens = dict()

    for key in list(input_layers.keys()):
        if key == "sunny":
            givens[input_layers[key].input_var] = xs_shared[key][idx * config(
            ).sunny_batch_size:(idx + 1) * config().sunny_batch_size]
        else:
            givens[input_layers[key].
                   input_var] = xs_shared[key][idx *
                                               config().batch_size:(idx + 1) *
                                               config().batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer,
                                         deterministic=True)
        for network_output_layer in list(output_layers.values())
    ]

    iter_test = theano.function(
        [idx],
        network_outputs + theano_printer.get_the_stuff_to_print(),
        givens=givens,
        on_unused_input="ignore",
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    print("Load model parameters for resuming")
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer,
                                        resume_metadata['param_values'])
    num_batches_chunk = config().batches_per_chunk
    num_batches = get_number_of_test_batches()
    num_chunks = int(np.ceil(num_batches / float(config().batches_per_chunk)))

    chunks_train_idcs = list(range(1, num_chunks + 1))

    data_loader.filter_patient_folders()

    create_test_gen = partial(
        config().create_test_gen,
        required_input_keys=list(xs_shared.keys()),
        required_output_keys=[
            "patients", "classification_correction_function"
        ],
    )

    print("Generate predictions with this model")
    start_time = time.time()
    prev_time = start_time

    predictions = [{
        "patient": i + 1,
        "systole": np.zeros((0, 600)),
        "diastole": np.zeros((0, 600))
    } for i in range(NUM_PATIENTS)]

    for e, test_data in zip(itertools.count(start=1),
                            buffering.buffered_gen_threaded(
                                create_test_gen())):
        print("  load testing data onto GPU")

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])

        patient_ids = test_data["output"]["patients"]
        classification_correction = test_data["output"][
            "classification_correction_function"]
        print("  patients:", " ".join(map(str, patient_ids)))
        print("  chunk %d/%d" % (e, num_chunks))

        for b in range(num_batches_chunk):
            iter_result = iter_test(b)
            network_outputs = tuple(iter_result[:len(output_layers)])
            network_outputs_dict = {
                list(output_layers.keys())[i]: network_outputs[i]
                for i in range(len(output_layers))
            }
            kaggle_systoles, kaggle_diastoles = config().postprocess(
                network_outputs_dict)
            kaggle_systoles, kaggle_diastoles = kaggle_systoles.astype(
                'float64'), kaggle_diastoles.astype('float64')
            for idx, patient_id in enumerate(
                    patient_ids[b * config().batch_size:(b + 1) *
                                config().batch_size]):
                if patient_id != 0:
                    index = patient_id - 1
                    patient_data = predictions[index]
                    assert patient_id == patient_data["patient"]

                    kaggle_systole = kaggle_systoles[idx:idx + 1, :]
                    kaggle_diastole = kaggle_diastoles[idx:idx + 1, :]
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(
                        kaggle_systole).all()
                    kaggle_systole = classification_correction[
                        b * config().batch_size + idx](kaggle_systole)
                    kaggle_diastole = classification_correction[
                        b * config().batch_size + idx](kaggle_diastole)
                    assert np.isfinite(kaggle_systole).all() and np.isfinite(
                        kaggle_systole).all()
                    patient_data["systole"] = np.concatenate(
                        (patient_data["systole"], kaggle_systole), axis=0)
                    patient_data["diastole"] = np.concatenate(
                        (patient_data["diastole"], kaggle_diastole), axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (
            float(num_chunks - (e + 1)) / float(e + 1 - chunks_train_idcs[0]))
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print("  %s since start (%.2f s)" %
              (utils.hms(time_since_start), time_since_prev))
        print("  estimated %s to go (ETA: %s)" %
              (utils.hms(est_time_left), eta_str))
        print()

    already_printed = False
    for prediction in predictions:
        if prediction["systole"].size > 0 and prediction["diastole"].size > 0:
            average_method = getattr(config(), 'tta_average_method',
                                     partial(np.mean, axis=0))
            prediction["systole_average"] = average_method(
                prediction["systole"])
            prediction["diastole_average"] = average_method(
                prediction["diastole"])
            try:
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])
            except:
                if not already_printed:
                    print("WARNING: These distributions are not distributions")
                    already_printed = True
                prediction["systole_average"] = make_monotone_distribution(
                    prediction["systole_average"])
                prediction["diastole_average"] = make_monotone_distribution(
                    prediction["diastole_average"])
                test_if_valid_distribution(prediction["systole_average"])
                test_if_valid_distribution(prediction["diastole_average"])

    print("Calculating training and validation set scores for reference")

    validation_dict = {}
    for patient_ids, set_name in [(validation_patients_indices, "validation"),
                                  (train_patients_indices, "train")]:
        errors = []
        for patient in patient_ids:
            prediction = predictions[patient - 1]
            if "systole_average" in prediction:
                assert patient == regular_labels[patient - 1, 0]
                error = CRSP(prediction["systole_average"],
                             regular_labels[patient - 1, 1])
                errors.append(error)
                error = CRSP(prediction["diastole_average"],
                             regular_labels[patient - 1, 2])
                errors.append(error)
        if len(errors) > 0:
            errors = np.array(errors)
            estimated_CRSP = np.mean(errors)
            print("  %s kaggle loss: %f" %
                  (string.rjust(set_name, 12), estimated_CRSP))
            validation_dict[set_name] = estimated_CRSP
        else:
            print("  %s kaggle loss: not calculated" %
                  (string.rjust(set_name, 12)))

    print("dumping prediction file to %s" % prediction_path)
    with open(prediction_path, 'w') as f:
        pickle.dump(
            {
                'metadata_path': metadata_path,
                'prediction_path': prediction_path,
                'submission_path': submission_path,
                'configuration_file': config().__name__,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'time_since_start': time_since_start,
                'param_values': lasagne.layers.get_all_param_values(top_layer),
                'predictions': predictions,
                'validation_errors': validation_dict,
            }, f, pickle.HIGHEST_PROTOCOL)
    print("prediction file dumped")

    print("dumping submission file to %s" % submission_path)
    with open(submission_path, 'w') as csvfile:
        csvwriter = csv.writer(csvfile,
                               delimiter=',',
                               quotechar='|',
                               quoting=csv.QUOTE_MINIMAL)
        csvwriter.writerow(['Id'] + ['P%d' % i for i in range(600)])
        for prediction in predictions:
            # the submission only has patients 501 to 700
            if prediction["patient"] in data_loader.test_patients_indices:
                if "diastole_average" not in prediction or "systole_average" not in prediction:
                    raise Exception("Not all test-set patients were predicted")
                csvwriter.writerow(["%d_Diastole" % prediction["patient"]] + [
                    "%.18f" % p
                    for p in prediction["diastole_average"].flatten()
                ])
                csvwriter.writerow(["%d_Systole" % prediction["patient"]] + [
                    "%.18f" % p
                    for p in prediction["systole_average"].flatten()
                ])
    print("submission file dumped")

    return
Example #22
0
            losses_eval_valid[obj_name] = valid_mean
            means.append(valid_mean)
            print(obj_name, valid_mean)
        print('Sum of mean losses:', sum(means))

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx +
                                            1.) / (chunk_idx + 1. -
                                                   start_chunk_idx)
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print("  %s since start (%.2f s)" %
              (utils.hms(time_since_start), time_since_prev))
        print("  estimated %s to go (ETA: %s)" %
              (utils.hms(est_time_left), eta_str))
        print()

    if ((chunk_idx + 1) % config().save_every) == 0:
        print()
        print('Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks))
        print('Saving metadata, parameters')

        with open(metadata_path, 'w') as f:
            pickle.dump(
                {
                    'configuration_file': config_name,
                    'git_revision_hash': utils.get_git_revision_hash(),
                    'experiment_id': expid,
Example #23
0
def main():
    sym_y = T.imatrix('target_output')
    sym_mask = T.matrix('mask')
    sym_x = T.tensor3()

    TOL = 1e-5
    num_epochs = config.epochs
    batch_size = config.batch_size

#### DATA ####
#    print "@@@@TESTING@@@@"
#    l_in = nn.layers.InputLayer(shape=(None, 700, 42))
#    l_dim_a = nn.layers.DimshuffleLayer(
#        l_in, (0,2,1))
#    l_conv_a = nn.layers.Conv1DLayer(
#        incoming=l_dim_a, num_filters=42, border_mode='same',
#        filter_size=3, stride=1, nonlinearity=nn.nonlinearities.rectify)
#    l_dim_b = nn.layers.DimshuffleLayer(
#        l_conv_a, (0,2,1))
#    out = nn.layers.get_output(l_dim_b, sym_x)
#    testvar = np.ones((128, 700, 42)).astype('float32')
#    print "@@@@EVAL@@@@"
#    john = out.eval({sym_x: testvar})
#    print("Johns shape")
#    print(john.shape)


    print("Building network ...")
    l_in, l_out = config.build_model()

    all_layers = nn.layers.get_all_layers(l_out)
    num_params = nn.layers.count_params(l_out)
    print("  number of parameters: %d" % num_params)
    print("  layer output shapes:")
    for layer in all_layers:
        name = string.ljust(layer.__class__.__name__, 32)
        print("    %s %s" % (name, nn.layers.get_output_shape(layer)))
    print("Creating cost function")
    # lasagne.layers.get_output produces a variable for the output of the net
    out_train = nn.layers.get_output(
        l_out, sym_x, mask=sym_mask, deterministic=False)

#    testvar = np.ones((128, 700, 42)).astype('float32')
#    john = out_train.eval({sym_x: testvar})
#    print("@@@@@JOHN@@@@@")
#    print(john.shape)
#    print(john.reshape((-1, num_classes)).shape)

    out_eval = nn.layers.get_output(
        l_out, sym_x, mask=sym_mask, deterministic=True)

    probs_flat = out_train.reshape((-1, num_classes))

    lambda_reg = config.lambda_reg
    params = nn.layers.get_all_params(l_out, regularizable=True)
    reg_term = sum(T.sum(p**2) for p in params)
    cost = T.nnet.categorical_crossentropy(T.clip(probs_flat, TOL, 1-TOL), sym_y.flatten())
    cost = T.sum(cost*sym_mask.flatten()) / T.sum(sym_mask) + lambda_reg * reg_term

    # Retrieve all parameters from the network
    all_params = nn.layers.get_all_params(l_out, trainable=True)
    # Setting the weights
    if hasattr(config, 'set_weights'):
        nn.layers.set_all_param_values(l_out, config.set_weights())
    # Compute SGD updates for training
    print("Computing updates ...")
    if hasattr(config, 'learning_rate_schedule'):
        learning_rate_schedule = config.learning_rate_schedule              # Import learning rate schedule
    else:
        learning_rate_schedule = { 0: config.learning_rate }
    learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))

    all_grads = T.grad(cost, all_params)

    cut_norm = config.cut_grad
    updates, norm_calc = nn.updates.total_norm_constraint(all_grads, max_norm=cut_norm, return_norm=True)

    if optimizer == "rmsprop":
        updates = nn.updates.rmsprop(updates, all_params, learning_rate)
    elif optimizer == "adadelta":
        updates = nn.updates.adadelta(updates, all_params, learning_rate)
    elif optimizer == "adagrad":
        updates = nn.updates.adagrad(updates, all_params, learning_rate)
    elif optimizer == "nag":
        momentum_schedule = config.momentum_schedule
        momentum = theano.shared(np.float32(momentum_schedule[0]))
        updates = nn.updates.nesterov_momentum(updates, all_params, learning_rate, momentum)
    else:
        sys.exit("please choose either <rmsprop/adagrad/adadelta/nag> in configfile")
            
    # Theano functions for training and computing cost
    print "config.batch_size %d" %batch_size
    print "data.num_classes %d" %num_classes
    if hasattr(config, 'build_model'):
        print("has build model")
    print("Compiling functions ...")
    # Use this for training (see deterministic = False above)
    train = theano.function(
        [sym_x, sym_y, sym_mask], [cost, out_train, norm_calc], updates=updates)

    # use this for eval (deterministic = True + no updates)
    eval = theano.function([sym_x, sym_y, sym_mask], [cost, out_eval])

    # Start timers
    start_time = time.time()
    prev_time = start_time

    all_losses_train = []
    all_accuracy_train = []
    all_losses_eval_train = []
    all_losses_eval_valid = []
    all_losses_eval_test = []
    all_accuracy_eval_train = []
    all_accuracy_eval_valid = []
    all_accuracy_eval_test = []
    all_mean_norm = []


    import data
    X_train = data.X_train
    X_valid = data.X_valid
    X_test = data.X_test
    y_train = data.labels_train
    y_valid = data.labels_valid
    y_test = data.labels_test
    mask_train = data.mask_train
    mask_valid = data.mask_valid
    mask_test = data.mask_test
    print("y shape")
    print(y_valid.shape)
    print("X shape")
    print(X_valid.shape)
    # Start training
    
    if config.batch_norm:
        collect_out = nn.layers.get_output(l_out, sym_x, deterministic=True, collect=True)
        f_collect = theano.function([sym_x],
                                [collect_out])

    for epoch in range(num_epochs):

        if (epoch % 10) == 0:
            print "Epoch %d of %d" % (epoch + 1, num_epochs)

        if epoch in learning_rate_schedule:
            lr = np.float32(learning_rate_schedule[epoch])
            print "  setting learning rate to %.7f" % lr
            learning_rate.set_value(lr)
        if optimizer == "nag":
            if epoch in momentum_schedule:
                mu = np.float32(momentum_schedule[epoch])
                print "  setting learning rate to %.7f" % mu
                momentum.set_value(mu)
        print "Shuffling data"
        seq_names = np.arange(0,data.num_seq_train)
        np.random.shuffle(seq_names)     
        X_train = X_train[seq_names]
        y_train = y_train[seq_names]
        mask_train = mask_train[seq_names]

        num_batches = data.num_seq_train // batch_size
        losses = []
        preds = []
        norms = []
        for i in range(num_batches):
            idx = range(i*batch_size, (i+1)*batch_size)
            x_batch = X_train[idx]
            y_batch = y_train[idx]
            mask_batch = mask_train[idx]
            loss, out, batch_norm = train(x_batch, y_batch, mask_batch)
            print(batch_norm)
            norms.append(batch_norm)
            preds.append(out)
            losses.append(loss)

#            if ((i+1) % config.write_every_batch == 0) | (i == 0):
#                if i == 0:
#                    start_place = 0
#                else:
#                    start_place = i-config.write_every_batch
#                print "Batch %d of %d" % (i + 1, num_batches)
#                print "  curbatch training loss: %.5f" % np.mean(losses[start_place:(i+1)])
#                print "  curbatch training acc: %.5f" % np.mean(accuracy[start_place:(i+1)])
        predictions = np.concatenate(preds, axis = 0)
        loss_train = np.mean(losses)
        all_losses_train.append(loss_train)

        acc_train = utils.proteins_acc(predictions, y_train[0:num_batches*batch_size], mask_train[0:num_batches*batch_size])
        all_accuracy_train.append(acc_train)

        mean_norm = np.mean(norms)
        all_mean_norm.append(mean_norm)

        if 1==1:
            print "  average training loss: %.5f" % loss_train
            print "  average training accuracy: %.5f" % acc_train
            print "  average norm: %.5f" % mean_norm

        if 1==1:#(i + 1) % config.validate_every == 0:
            if config.batch_norm:
                _ = f_collect(X_train)
            sets = [#('train', X_train, y_train, mask_train, all_losses_eval_train, all_accuracy_eval_train),
                    ('valid', X_valid, y_valid, mask_valid, all_losses_eval_valid, all_accuracy_eval_valid),
                    ('test', X_test, y_test, mask_test, all_losses_eval_test, all_accuracy_eval_test)]
            for subset, X, y, mask, all_losses, all_accuracy in sets:
                print "  validating: %s loss" % subset
                preds = []
                num_batches = np.size(X,axis=0) // config.batch_size
                for i in range(num_batches): ## +1 to get the "rest"
                    print(i)
                    idx = range(i*batch_size, (i+1)*batch_size)
                    x_batch = X[idx]
                    y_batch = y[idx]
                    mask_batch = mask[idx]
                    loss, out = eval(x_batch, y_batch, mask_batch)
                    preds.append(out)
#                    acc = utils.proteins_acc(out, y_batch, mask_batch)
                    losses.append(loss)
#                    accuracy.append(acc)
                predictions = np.concatenate(preds, axis = 0)
                print "  pred"
                print(predictions.shape)
                print(predictions.dtype)
                loss_eval = np.mean(losses)
                all_losses.append(loss_eval)
                
#                acc_eval = np.mean(accuracy)
                acc_eval = utils.proteins_acc(predictions, y, mask)
                all_accuracy.append(acc_eval)

#                print "  average evaluation loss (%s): %.5f" % (subset, loss_eval)
                print "  average evaluation accuracy (%s): %.5f" % (subset, acc_eval)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        est_time_left = time_since_start * num_epochs
        eta = datetime.now() + timedelta(seconds=est_time_left)
        eta_str = eta.strftime("%c")
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
        print

        if (epoch >= config.start_saving_at) and ((epoch % config.save_every) == 0):
            print "  saving parameters and metadata"
            with open((metadata_path + "-%d" % (epoch) + ".pkl"), 'w') as f:
                pickle.dump({
                        'config_name': config_name,
                        'param_values': nn.layers.get_all_param_values(l_out),
                        'losses_train': all_losses_train,
                        'accuracy_train': all_accuracy_train,
                        'losses_eval_train': all_losses_eval_train,
                        'losses_eval_valid': all_losses_eval_valid,
			'losses_eval_test': all_losses_eval_test,
                        'accuracy_eval_valid': all_accuracy_eval_valid,
                        'accuracy_eval_train': all_accuracy_eval_train,
			'accuracy_eval_test': all_accuracy_eval_test,
                        'mean_norm' : all_mean_norm,
                        'time_since_start': time_since_start,
                        'i': i,
                    }, f, pickle.HIGHEST_PROTOCOL)

            print "  stored in %s" % metadata_path

    print
Example #24
0
                                       cont_denom])
            del outputs

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * \
        ((num_chunks_train - (e + 1)) /
         float(e + 1 - chunks_train_ids[0]))
    eta = datetime.datetime.now() + \
        datetime.timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")

    print("  %s since start (%.2f s)" % (
        hms(time_since_start),
        time_since_prev
    ))
    print("  estimated %s to go (ETA: %s)\n" % (
        hms(est_time_left),
        eta_str
    ))

    # Save after every validate.
    if (((e + 1) % save_every) == 0 or
        ((e + 1) % validate_every) == 0 or
            ((e + 1) == num_chunks_train)):
        print("\nSaving model ...")

        with open(dump_path, 'w') as f:
            pickle.dump({
Example #25
0
def predict_model(expid, mfile=None):
    metadata_path = MODEL_PATH + "%s.pkl" % (expid if not mfile else mfile)
    prediction_path = MODEL_PREDICTIONS_PATH + "%s.pkl" % expid
    submission_path = SUBMISSION_PATH + "%s.csv" % expid

    if theano.config.optimizer != "fast_run":
        print "WARNING: not running in fast mode!"

    print "Using"
    print "  %s" % metadata_path
    print "To generate"
    print "  %s" % prediction_path

    print "Build model"
    interface_layers = config.build_model()

    output_layers = interface_layers["outputs"]
    input_layers = interface_layers["inputs"]
    top_layer = lasagne.layers.MergeLayer(incomings=output_layers.values())
    all_layers = lasagne.layers.get_all_layers(top_layer)
    all_params = lasagne.layers.get_all_params(top_layer, trainable=True)

    num_params = sum([np.prod(p.get_value().shape) for p in all_params])

    print string.ljust("  layer output shapes:", 34),
    print string.ljust("#params:", 10),
    print string.ljust("#data:", 10),
    print "output shape:"
    for layer in all_layers[:-1]:
        name = string.ljust(layer.__class__.__name__, 30)
        num_param = sum(
            [np.prod(p.get_value().shape) for p in layer.get_params()])
        num_param = string.ljust(int(num_param).__str__(), 10)
        num_size = string.ljust(np.prod(layer.output_shape[1:]).__str__(), 10)
        print "    %s %s %s %s" % (name, num_param, num_size,
                                   layer.output_shape)
    print "  number of parameters: %d" % num_params

    xs_shared = {
        key: lasagne.utils.shared_empty(dim=len(l_in.output_shape),
                                        dtype='float32')
        for (key, l_in) in input_layers.iteritems()
    }

    idx = T.lscalar('idx')

    givens = dict()

    for (key, l_in) in input_layers.iteritems():
        givens[l_in.input_var] = xs_shared[key][idx *
                                                config.batch_size:(idx + 1) *
                                                config.batch_size]

    network_outputs = [
        lasagne.layers.helper.get_output(network_output_layer,
                                         deterministic=True)
        for network_output_layer in output_layers.values()
    ]

    print "Compiling..."
    iter_test = theano.function(
        [idx],
        network_outputs + theano_printer.get_the_stuff_to_print(),
        givens=givens,
        on_unused_input="ignore",
        # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
    )

    required_input = {
        key: l_in.output_shape
        for (key, l_in) in input_layers.iteritems()
    }

    print "Preparing dataloaders"
    config.test_data.prepare()
    chunk_size = config.batches_per_chunk * config.batch_size

    test_data_generator = buffering.buffered_gen_threaded(
        config.test_data.generate_batch(
            chunk_size=chunk_size,
            required_input=required_input,
            required_output={},
        ))

    print "Load model parameters for resuming"
    resume_metadata = np.load(metadata_path)
    lasagne.layers.set_all_param_values(top_layer,
                                        resume_metadata['param_values'])

    chunks_test_idcs = itertools.count(0)
    num_chunks_test = math.ceil(1.0 * config.test_data.epochs *
                                config.test_data.number_of_samples /
                                (config.batch_size * config.batches_per_chunk))

    start_time, prev_time = None, None
    all_predictions = dict()

    print "Loading first chunks"
    for e, test_data in izip(chunks_test_idcs, test_data_generator):

        if start_time is None:
            start_time = time.time()
            prev_time = start_time
        print

        print "Chunk %d/%d" % (e + 1, num_chunks_test)
        print "=============="

        if config.dump_network_loaded_data:
            pickle.dump(test_data,
                        open("data_loader_dump_test_%d.pkl" % e, "wb"))

        for key in xs_shared:
            xs_shared[key].set_value(test_data["input"][key])

        sample_ids = test_data[IDS]

        for b in xrange(config.batches_per_chunk):
            th_result = iter_test(b)

            predictions = th_result[:len(network_outputs)]

            for output_idx, key in enumerate(output_layers.keys()):
                for sample_idx in xrange(b * config.batch_size,
                                         (b + 1) * config.batch_size):
                    prediction_pos = sample_idx % config.batch_size
                    sample_id = sample_ids[sample_idx]
                    if sample_id is not None:
                        if sample_id not in all_predictions:
                            all_predictions[sample_id] = dict()
                        if key not in all_predictions[sample_id]:
                            all_predictions[sample_id][key] = predictions[
                                output_idx][prediction_pos]
                        else:
                            all_predictions[sample_id][key] = np.concatenate(
                                (all_predictions[sample_id][key],
                                 predictions[output_idx][prediction_pos]),
                                axis=0)

        now = time.time()
        time_since_start = now - start_time
        time_since_prev = now - prev_time
        prev_time = now
        print "  %s since start (+%.2f s)" % (utils.hms(time_since_start),
                                              time_since_prev)
        try:
            if num_chunks_test:
                est_time_left = time_since_start * (float(num_chunks_test -
                                                          (e + 1)) /
                                                    float(e + 1))
                eta = datetime.datetime.now() + datetime.timedelta(
                    seconds=est_time_left)
                eta_str = eta.strftime("%c")
                print "  estimated %s to go" % utils.hms(est_time_left)
                print "  (ETA: %s)" % eta_str
        except OverflowError:
            print "  This will take really long, like REALLY long."

        print "  %dms per testing sample" % (1000. * time_since_start / (
            (e + 1) * config.batch_size * config.batches_per_chunk))

    with open(prediction_path, 'w') as f:
        pickle.dump(
            {
                'metadata_path': metadata_path,
                'prediction_path': prediction_path,
                'configuration_file': config.__name__,
                'git_revision_hash': utils.get_git_revision_hash(),
                'experiment_id': expid,
                'predictions': all_predictions,
            }, f, pickle.HIGHEST_PROTOCOL)

    print "  saved to %s" % prediction_path
    print

    return