loss = compute_loss(b)
            losses.append(loss)

        mean_valid_loss = np.sqrt(np.mean(losses))
        print "  mean validation loss (RMSE):\t\t%.6f" % mean_valid_loss
        losses_valid.append(mean_valid_loss)

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * (float(NUM_CHUNKS -
                                              (e + 1)) / float(e + 1))
    eta = datetime.now() + timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")
    print "  %s since start (%.2f s)" % (load_data.hms(time_since_start),
                                         time_since_prev)
    print "  estimated %s to go (ETA: %s)" % (load_data.hms(est_time_left),
                                              eta_str)
    print

del chunk_data, xs_chunk, x_chunk, y_chunk, xs_valid, x_valid  # memory cleanup

print "Compute predictions on validation set for analysis in batches"
predictions_list = []
for b in xrange(num_batches_valid):
    # if b % 1000 == 0:
    #     print "  batch %d/%d" % (b + 1, num_batches_valid)

    predictions = compute_output(b)
    predictions_list.append(predictions)
            predictions = compute_output(b)
            predictions_chunk_list.append(predictions)

        predictions_chunk = np.vstack(predictions_chunk_list)
        predictions_chunk = predictions_chunk[:
                                              chunk_length]  # cut off zeros / padding

        print("  compute average over transforms")
        predictions_chunk_avg = predictions_chunk.reshape(
            -1, len(augmentation_transforms), 37).mean(1)

        predictions_list.append(predictions_chunk_avg)

        time_since_start = time.time() - start_time
        print("  %s since start" % load_data.hms(time_since_start))

    all_predictions = np.vstack(predictions_list)

    print("Write predictions to %s" % target_path_valid)
    load_data.save_gz(target_path_valid, all_predictions)

    print("Evaluate")
    rmse_valid = analysis['losses_valid'][-1]
    rmse_augmented = np.sqrt(np.mean((y_valid - all_predictions)**2))
    print("  MSE (last iteration):\t%.6f" % rmse_valid)
    print("  MSE (augmented):\t%.6f" % rmse_augmented)

if DO_TEST:
    print()
    print("TEST SET")
            if b % 1000 == 0:
                print "  batch %d/%d" % (b + 1, num_batches_chunk)

            predictions = compute_output(b)
            predictions_chunk_list.append(predictions)

        predictions_chunk = np.vstack(predictions_chunk_list)
        predictions_chunk = predictions_chunk[:chunk_length] # cut off zeros / padding

        print "  compute average over transforms"
        predictions_chunk_avg = predictions_chunk.reshape(-1, len(augmentation_transforms), 37).mean(1)

        predictions_list.append(predictions_chunk_avg)

        time_since_start = time.time() - start_time
        print "  %s since start" % load_data.hms(time_since_start)


    all_predictions = np.vstack(predictions_list)

    print "Write predictions to %s" % target_path_valid
    load_data.save_gz(target_path_valid, all_predictions)

    print "Evaluate"
    rmse_valid = analysis['losses_valid'][-1]
    rmse_augmented = np.sqrt(np.mean((y_valid - all_predictions)**2))
    print "  MSE (last iteration):\t%.6f" % rmse_valid
    print "  MSE (augmented):\t%.6f" % rmse_augmented


Esempio n. 4
0
def main(
    model="resnet",
    mode=params.mode,
    num_chunks=params.num_chunks,
    chunk_size=params.chunk_size,
    input_sizes=params.input_sizes,
    batch_size=params.batch_size,
    nbands=params.nbands,
    model_name=params.model_name,
):
    #create a model (neural network)
    multi_model = call_model(params, model=model)
    print("Model loaded: {}".format(model), flush=True)

    if mode == "train":
        #create a csv logger that will store the history of the .fit function into a .csv file
        with open(params.full_path_of_history, 'w',
                  newline='') as history_file:
            writer = csv.writer(history_file)
            writer.writerow(["chunk", "loss", "binary_accuracy"])

            loss = optimizers.Adam(lr=params.learning_rate)
            multi_model.compile(
                optimizer=loss,
                loss="binary_crossentropy",
                metrics=[metrics.binary_accuracy],
            )

            if nbands == 3:
                augmented_data_gen_pos = ra.realtime_augmented_data_gen_pos_col(
                    params=params,
                    num_chunks=params.num_chunks,
                    chunk_size=chunk_size,
                    target_sizes=input_sizes,
                    augmentation_params=params.default_augmentation_params,
                )
                augmented_data_gen_neg = ra.realtime_augmented_data_gen_neg_col(
                    params=params,
                    num_chunks=params.num_chunks,
                    chunk_size=chunk_size,
                    target_sizes=input_sizes,
                    augmentation_params=params.default_augmentation_params,
                )

            else:
                augmented_data_gen_pos = ra.realtime_augmented_data_gen_pos(
                    params=params,
                    range_min=params.range_min,
                    range_max=params.range_max,
                    num_chunks=params.num_chunks,
                    chunk_size=chunk_size,
                    target_sizes=input_sizes,
                    normalize=params.normalize,
                    resize=params.resize,
                    augmentation_params=params.default_augmentation_params,
                )
                augmented_data_gen_neg = ra.realtime_augmented_data_gen_neg(
                    params=params,
                    num_chunks=params.num_chunks,
                    chunk_size=chunk_size,
                    target_sizes=input_sizes,
                    normalize=params.normalize,
                    resize=params.resize,
                    augmentation_params=params.default_augmentation_params,
                )

            train_gen_neg = load_data.buffered_gen_mp(
                augmented_data_gen_neg, buffer_size=params.buffer_size)
            train_gen_pos = load_data.buffered_gen_mp(
                augmented_data_gen_pos, buffer_size=params.buffer_size)

            loss_per_chunk = []
            bin_acc_per_chunk = []
            actual_begin_time = time.time()
            try:
                for chunk in range(params.num_chunks):
                    start_time = time.time()
                    chunk_data_pos, chunk_length = next(train_gen_pos)
                    y_train_pos = chunk_data_pos.pop()
                    X_train_pos = chunk_data_pos

                    chunk_data_neg, _ = next(train_gen_neg)
                    y_train_neg = chunk_data_neg.pop()
                    X_train_neg = chunk_data_neg

                    if False:  #just to view some images positive or negative
                        imgs = chunk_data_pos[0]
                        imgs = np.squeeze(imgs)
                        for img in imgs:
                            plt.imshow(img / 255.0)
                            plt.show()

                    X_train = np.concatenate((X_train_pos[0], X_train_neg[0]))
                    y_train = np.concatenate((y_train_pos, y_train_neg))
                    y_train = y_train.astype(np.int32)
                    y_train = np.expand_dims(y_train, axis=1)
                    batches = 0
                    start_chunk_processing_time = time.time()
                    for batch in iterate_minibatches(X_train,
                                                     y_train,
                                                     batch_size,
                                                     shuffle=True):
                        X_batch, y_batch = batch

                        history = multi_model.fit(
                            X_batch / 255.0 - params.avg_img, y_batch)
                        batches += 1
                    print("Chunck neural net time: {0:.3f} seconds".format(
                        time.time() - start_chunk_processing_time),
                          flush=True)
                    #write results to csv for later use
                    writer.writerow([
                        str(chunk),
                        str(history.history["loss"][0]),
                        str(history.history["binary_accuracy"][0])
                    ])

                    #store loss and accuracy in list
                    loss_per_chunk.append(history.history["loss"][0])
                    bin_acc_per_chunk.append(
                        history.history["binary_accuracy"][0])

                    # plot loss and accuracy on interval
                    if chunk % params.chunk_plot_interval == 0:
                        save_loss_and_acc_figure(loss_per_chunk,
                                                 bin_acc_per_chunk, params)

                    #empty the train data
                    X_train = None
                    y_train = None
                    print("Chunck {}/{} has been trained".format(
                        chunk + 1, params.num_chunks),
                          flush=True)

            except KeyboardInterrupt:
                multi_model.save_weights(params.full_path_of_weights)
                print("interrupted by KEYBOARD!", flush=True)
                print("saved weights to: {}".format(
                    params.full_path_of_weights),
                      flush=True)
            end_time = time.time()

            multi_model.save_weights(params.full_path_of_weights)
            print("\nSaved weights to: {}".format(params.full_path_of_weights),
                  flush=True)
            print("\nSaved results to: {}".format(params.full_path_of_history),
                  flush=True)
            final_time = end_time - actual_begin_time
            print("\nTotal time employed ",
                  load_data.hms(final_time),
                  flush=True)

    if mode == "predict":
        if nbands == 3:
            augmented_data_gen_test_fixed = ra.realtime_fixed_augmented_data_test_col(
                params=params,
                target_sizes=input_sizes)  # ,normalize=normalize)
        else:
            augmented_data_gen_test_fixed = ra.realtime_fixed_augmented_data_test(
                params=params, target_sizes=input_sizes)

        #load a trained model
        multi_model.load_weights(params.full_path_predict_weights)

        predictions = []
        test_batches = 0
        if params.augm_pred == True:  #seems to be a boolean to control whether you want the test data to be augmented or not when performing a prediction on test data.
            start_time = time.time()
            for e, (chunk_data_test, chunk_length_test
                    ) in enumerate(augmented_data_gen_test_fixed):
                X_test = chunk_data_test
                X_test = X_test[0]
                X_test = X_test / 255.0 - params.avg_img
                pred1 = multi_model.predict(X_test)
                pred2 = multi_model.predict(
                    np.array([np.flipud(image) for image in X_test]))
                pred3 = multi_model.predict(
                    np.array([np.fliplr(np.flipud(image))
                              for image in X_test]))
                pred4 = multi_model.predict(
                    np.array([np.fliplr(image) for image in X_test]))
                preds = np.mean([pred1, pred2, pred3, pred4], axis=0)
                preds = preds.tolist()
                predictions = predictions + preds
                print("done with predict chunk: {}".format(e), flush=True)
        else:
            for e, (chunk_data_test, chunk_length_test
                    ) in enumerate(augmented_data_gen_test_fixed):
                X_test = chunk_data_test
                X_test = X_test[0]
                X_test = X_test / 255.0 - params.avg_img
                pred1 = multi_model.predict(X_test)
                preds = pred1.tolist()
                predictions = predictions + preds

        with open("pred_" + params.model_name + ".pkl", "wb") as f:
            pickle.dump([[ra.test_data], [predictions]], f,
                        pickle.HIGHEST_PROTOCOL)

        objects = []
        with (open("pred_" + params.model_name + ".pkl", "rb")) as openfile:
            while True:
                try:
                    objects.append(pickle.load(openfile))
                except EOFError:
                    break

        f = open("pred_my_model.csv", "w")
        x = str(objects[0])
        f.write(x)
        f.write("\n")
        f.close()
            losses.append(loss)

        mean_valid_loss = np.sqrt(np.mean(losses))
        print "  mean validation loss (RMSE):\t\t%.6f" % mean_valid_loss
        losses_valid.append(mean_valid_loss)

        layers.dump_params(l6, e=e)

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * (float(NUM_CHUNKS - (e + 1)) / float(e + 1))
    eta = datetime.now() + timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")
    print "  %s since start (%.2f s)" % (load_data.hms(time_since_start), time_since_prev)
    print "  estimated %s to go (ETA: %s)" % (load_data.hms(est_time_left), eta_str)
    print


del chunk_data, xs_chunk, x_chunk, y_chunk, xs_valid, x_valid # memory cleanup


print "Compute predictions on validation set for analysis in batches"
predictions_list = []
for b in xrange(num_batches_valid):
    # if b % 1000 == 0:
    #     print "  batch %d/%d" % (b + 1, num_batches_valid)

    predictions = compute_output(b)
    predictions_list.append(predictions)
Esempio n. 6
0
            losses.append(loss)

        mean_valid_loss = np.sqrt(np.mean(losses))
        print("  mean validation loss (RMSE):\t\t%.6f" % mean_valid_loss)
        losses_valid.append(mean_valid_loss)

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * (float(NUM_CHUNKS -
                                              (e + 1)) / float(e + 1))
    eta = datetime.now() + timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")
    print("  %s since start (%.2f s)" %
          (load_data.hms(time_since_start), time_since_prev))
    print("  estimated %s to go (ETA: %s)" %
          (load_data.hms(est_time_left), eta_str))
    print()

del chunk_data, xs_chunk, x_chunk, y_chunk, xs_valid, x_valid  # memory cleanup

print("Compute predictions on validation set for analysis in batches")
predictions_list = []
for b in range(num_batches_valid):
    # if b % 1000 == 0:
    #     print "  batch %d/%d" % (b + 1, num_batches_valid)

    predictions = compute_output(b)
    predictions_list.append(predictions)
            losses.append(loss)

        mean_valid_loss = np.sqrt(np.mean(losses))
        print("  mean validation loss (RMSE):\t\t%.6f" % mean_valid_loss)
        losses_valid.append(mean_valid_loss)

        layers.dump_params(l6, e=e)

    now = time.time()
    time_since_start = now - start_time
    time_since_prev = now - prev_time
    prev_time = now
    est_time_left = time_since_start * (float(NUM_CHUNKS - (e + 1)) / float(e + 1))
    eta = datetime.now() + timedelta(seconds=est_time_left)
    eta_str = eta.strftime("%c")
    print("  %s since start (%.2f s)" % (load_data.hms(time_since_start), time_since_prev))
    print("  estimated %s to go (ETA: %s)" % (load_data.hms(est_time_left), eta_str))
    print()


del chunk_data, xs_chunk, x_chunk, y_chunk, xs_valid, x_valid # memory cleanup


print("Compute predictions on validation set for analysis in batches")
predictions_list = []
for b in range(num_batches_valid):
    # if b % 1000 == 0:
    #     print "  batch %d/%d" % (b + 1, num_batches_valid)

    predictions = compute_output(b)
    predictions_list.append(predictions)