def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
    print ("Loading data definitions...")
    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl'))

    # frames_source = frames_source[:: 2]
    frames = np.zeros(shape=((len(frames_source),) + IMG_SIZE))

    j = 1
    for i in range(1, len(frames_source)):
        filename = "frame_" + str(j) + ".png"
        im_file = os.path.join(DATA_DIR, filename)
        try:
            frame = cv2.imread(im_file, cv2.IMREAD_COLOR)
            frames[i] = (frame.astype(np.float32) - 127.5) / 127.5
            # j = j + 2
            j = j + 1
        except AttributeError as e:
            print(im_file)
            print(e)

    # Build video progressions
    videos_list = []
    start_frame_index = 0
    end_frame_index = VIDEO_LENGTH
    while (end_frame_index <= len(frames_source)):
        frame_list = frames_source[start_frame_index:end_frame_index]
        if (len(set(frame_list)) == 1):
            videos_list.append((start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + 1
            end_frame_index = end_frame_index + 1
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    videos_list = np.asarray(videos_list, dtype=np.int32)
    n_videos = videos_list.shape[0]

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    if CLASSIFIER:
        # Load labesl into categorical 1-hot vectors
        actions = ['moving slow', 'slowing down', 'standing', 'speeding up', 'moving fast', 'fake']
        print ("Loading annotations...")
        action_classes = hkl.load(os.path.join(DATA_DIR, 'annotations_train_128.hkl'))
        action_nums = []
        for i in range(len(action_classes)):
            action_dict = dict(ele.split(':') for ele in action_classes[i].split(', ')[2:])
            action_nums.append(actions.index(str(action_dict['Driver'])))
        # action_nums = action_nums[::2]
        action_cats = np.asarray(to_categorical(action_nums, len(actions)))

    # Setup validation
    val_frames_source = hkl.load(os.path.join(VAL_DATA_DIR, 'sources_val_128.hkl'))
    # val_frames_source = val_frames_source[:: 2]
    val_frames = np.zeros(shape=((len(val_frames_source),) + IMG_SIZE))

    j = 1
    for i in range(1, len(val_frames_source)):
        filename = "frame_" + str(j) + ".png"
        im_file = os.path.join(VAL_DATA_DIR, filename)
        try:
            val_frame = cv2.imread(im_file, cv2.IMREAD_COLOR)
            val_frames[i] = (val_frame.astype(np.float32) - 127.5) / 127.5
            # j = j + 2
            j = j + 1
        except AttributeError as e:
            print(im_file)
            print(e)

    val_videos_list = []
    start_frame_index = 0
    end_frame_index = VIDEO_LENGTH
    while (end_frame_index <= len(val_frames_source)):
        val_frame_list = val_frames_source[start_frame_index:end_frame_index]
        if (len(set(val_frame_list)) == 1):
            val_videos_list.append((start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + VIDEO_LENGTH
            end_frame_index = end_frame_index + VIDEO_LENGTH
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    val_videos_list = np.asarray(val_videos_list, dtype=np.int32)
    n_val_videos = val_videos_list.shape[0]

    if CLASSIFIER:
        # Load val labesl into categorical 1-hot vectors
        val_action_classes = hkl.load(os.path.join(VAL_DATA_DIR, 'annotations_val_128.hkl'))
        val_action_nums = []
        for i in range(len(val_action_classes)):
            val_action_dict = dict(ele.split(':') for ele in val_action_classes[i].split(', ')[2:])
            val_action_nums.append(actions.index(str(val_action_dict['Driver'])))
        # val_action_nums = val_action_nums[::2]
        val_action_cats = to_categorical(val_action_nums, len(actions))

    # Build the Spatio-temporal Autoencoder
    print ("Creating models...")
    encoder = encoder_model()
    decoder = decoder_model()

    intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
    mask_gen_1 = Sequential()
    mask_gen_1.add(encoder)
    mask_gen_1.add(intermediate_decoder)
    mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_G)

    autoencoder = autoencoder_model(encoder, decoder)

    if CLASSIFIER:
        # classifier = classifier_model()
        classifier = conv_classifier_model()
        action_predictor = action_model(encoder, decoder, classifier)
        action_predictor.compile(loss=['mse', 'categorical_crossentropy'],
                                 loss_weights=LOSS_WEIGHTS,
                                 optimizer=OPTIM_G,
                                 metrics=['accuracy'])
        # action_predictor.compile(loss='categorical_crossentropy',
        #                          optimizer=OPTIM_G,
        #                          metrics=['accuracy'])

        set_trainability(classifier, True)
        classifier.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=OPTIM_D)
        run_utilities(encoder, decoder, autoencoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS)
        print (action_predictor.summary())

    run_utilities(encoder, decoder, autoencoder, "None", ENC_WEIGHTS, DEC_WEIGHTS, "None")

    autoencoder.compile(loss="mean_squared_error", optimizer=OPTIM_A)

    NB_ITERATIONS = int(n_videos/BATCH_SIZE)
    # NB_ITERATIONS = 1
    NB_VAL_ITERATIONS = int(n_val_videos/BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False)
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False)
    LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS.set_model(autoencoder)


    print ("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS_AUTOENCODER):
        print("\n\nEpoch ", epoch)
        loss = []
        val_loss = []

        # Set learning rate every epoch
        LRS.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print ("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X, y = load_X_y(videos_list, index, frames, [])
            X_train = X[:, 0 : 10]
            y_train = X[:, 10 :]
            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS-1) + "  " +
                         "loss: " + str(loss[len(loss)-1]) +
                         "\t    [" + "{0}>".format("="*(arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            orig_image, truth_image, pred_image = combine_images(X_train, y_train, predicted_images)
            pred_image = pred_image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            if epoch == 0 :
                cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image)
                cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_image)
            cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_image)

        # Run over validation data
        for index in range(NB_VAL_ITERATIONS):
            X, y = load_X_y(val_videos_list, index, val_frames, [])
            X_train = X[:, 0 : 10]
            y_train = X[:, 10 :]
            val_loss.append(autoencoder.test_on_batch(X_train, y_train))

            arrow = int(index / (NB_VAL_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS-1) + "  " +
                         "val_loss: " + str(val_loss[len(val_loss)-1]) +
                         "\t    [" + "{0}>".format("="*(arrow)))
            stdout.flush()

        # then after each epoch/iteration
        avg_loss = sum(loss)/len(loss)
        avg_val_loss = sum(val_loss) / len(val_loss)
        logs = {'loss': avg_loss, 'val_loss' : avg_val_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"d_loss\":%f};\n" % (epoch, avg_loss))

        print("\nAvg loss: " + str(avg_loss) + " Avg val loss: " + str(avg_val_loss))

        # Save model weights per epoch to file
        encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'e`oder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True)

        # Save predicted mask per epoch
        predicted_attn = mask_gen_1.predict(X_train, verbose=0)
        a_pred = np.reshape(predicted_attn, newshape=(10, 10, 16, 16, 1))
        np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)

        # predicted_attn = mask_gen_2.predict(X_train, verbose=0)
        # a_pred = np.reshape(predicted_attn, newshape=(10, 10, 128, 128, 1))
        # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen2_' + str(epoch) + '.npy'), a_pred)

    # Train AAE
    if CLASSIFIER:
        # exp_memory = ExperienceMemory(memory_length=100)
        print ("Training Classifier...")
        # Setup fake labels
        y_fake_classes = []
        for k in range(BATCH_SIZE):
            class_nums = (len(actions) - 1) * np.ones(shape=(int(VIDEO_LENGTH / 2), 1), dtype=np.float32)
            y_fake_classes.append(to_categorical(class_nums, len(actions)))

        y_fake_classes = np.asarray(y_fake_classes)

        for epoch in range(NB_EPOCHS_CLASS):
            print("\n\nEpoch ", epoch)
            c_loss = []
            a_loss = []
            val_c_loss = []
            val_a_loss = []

            # # Set learning rate every epoch
            # LRS.on_epoch_begin(epoch=epoch)
            lr = K.get_value(autoencoder.optimizer.lr)
            print ("Learning rate: " + str(lr))
            print ("a_loss_metrics: " + str(action_predictor.metrics_names))
            print ("c_loss_metrics: " + str(classifier.metrics_names))

            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                X, y = load_X_y(videos_list, index, frames, action_cats)
                X_train = X[:, 0 : int(VIDEO_LENGTH/2)]
                y_true_imgs = X[:, int(VIDEO_LENGTH/2) :]
                y_true_classes = y[:, int(VIDEO_LENGTH/2) :]

                print (y_true_classes.shape)
                print (y_true_classes)

                # Train classifier
                y_fake_imgs = autoencoder.predict(X_train, verbose=0)

                X = np.concatenate((y_true_imgs, y_fake_imgs), axis=0)
                y = np.concatenate((y_true_classes, y_fake_classes), axis=0)
                for j in range(C_TRAIN_RATIO):
                    c_loss.append(classifier.train_on_batch(X, y))

                # Train action_predictor
                set_trainability(classifier, False)
                for j in range(A_TRAIN_RATIO):
                    a_loss.append(action_predictor.train_on_batch(X_train, [y_true_imgs, y_true_classes]))
                    # a_loss.append(action_predictor.train_on_batch(X_train, y_true_classes))

                set_trainability(classifier, True)

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS-1) + "  " +
                             "a_loss: " + str([ a_loss[len(a_loss) - 1][j]  for j in [0, -1]]) + "  " +
                             "c_loss: " + str(c_loss[len(c_loss) - 1]) + "  " +
                             "\t    [" + "{0}>".format("="*(arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                # predicted_images = autoencoder.predict(X_train)
                predicted_images, predicted_classes = action_predictor.predict(X_train, verbose=0)
                # predicted_classes = action_predictor.predict(X_train, verbose=0)
                orig_image, truth_image, pred_image = combine_images(X_train, y_true_imgs, predicted_images)
                pred_image = pred_image * 127.5 + 127.5
                orig_image = orig_image * 127.5 + 127.5
                truth_image = truth_image * 127.5 + 127.5
                font = cv2.FONT_HERSHEY_SIMPLEX
                if epoch == 0 :
                    y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)]
                    # Add labels as text to the image
                    for k in range(BATCH_SIZE):
                        for j in range(int(VIDEO_LENGTH/2)):
                            class_num_past = np.argmax(y_orig_classes[k, j])
                            class_num_futr = np.argmax(y_true_classes[k, j])
                            cv2.putText(orig_image, actions[class_num_past],
                                        (2 + j*(128), 120 + k*128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
                            cv2.putText(truth_image, actions[class_num_futr],
                                        (2 + j*(128), 120 + k*128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
                    cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) +
                                             "_cla_orig.png"), orig_image)
                    cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) +
                                             "_cla_truth.png"), truth_image)

                # Add labels as text to the image
                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH / 2)):
                        class_num = np.argmax(predicted_classes[k, j])
                        class_num_futr = np.argmax(y_true_classes[k, j])
                        cv2.putText(pred_image, actions[class_num],
                                    (2 + j * (128), 120 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_image, actions[class_num_futr],
                                    (2 + j * (128), 105 + k * 128), font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
                cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_image)

            # Run over validation data
            print ('')
            for index in range(NB_VAL_ITERATIONS):
                X, y = load_X_y(val_videos_list, index, val_frames, val_action_cats)
                X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
                y_classes = y[:, 0: int(VIDEO_LENGTH / 2)]
                y_imgs = X[:, int(VIDEO_LENGTH / 2):]

                val_c_loss.append(classifier.test_on_batch(X_train, y_classes))
                val_a_loss.append(action_predictor.test_on_batch(X_train, [y_imgs, y_classes]))
                # val_a_loss.append(action_predictor.test_on_batch(X_train, y_classes))

                arrow = int(index / (NB_VAL_ITERATIONS / 40))
                stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + "  " +
                             "val_a_loss: " + str([val_a_loss[len(val_a_loss) - 1][j] for j in [0, -1]]) + "  " +
                             "val_c_loss: " + str(val_c_loss[len(val_c_loss) - 1]) )
                stdout.flush()


            predicted_attn = mask_gen_1.predict(X_train, verbose=0)
            a_pred = np.reshape(predicted_attn, newshape=(10, 10, 16, 16, 1))
            np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_cla_gen1_' + str(epoch) + '.npy'), a_pred)

            # then after each epoch/iteration
            avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
            avg_val_c_loss = np.mean(np.asarray(val_c_loss, dtype=np.float32), axis=0)
            avg_a_loss = np.mean(np.asarray(a_loss, dtype=np.float32), axis=0)
            avg_val_a_loss = np.mean(np.asarray(val_a_loss, dtype=np.float32), axis=0)

            loss_values = np.asarray(avg_c_loss.tolist() + avg_val_c_loss.tolist() \
                          + avg_a_loss.tolist() + avg_val_a_loss.tolist(), dtype=np.float32)
            c_loss_keys = ['c_' + metric for metric in classifier.metrics_names]
            a_loss_keys = ['a_' + metric for metric in action_predictor.metrics_names]
            val_c_loss_keys = ['c_val_' + metric for metric in classifier.metrics_names]
            val_a_loss_keys = ['a_val_' + metric for metric in action_predictor.metrics_names]

            loss_keys = c_loss_keys + val_c_loss_keys + \
                        a_loss_keys + val_a_loss_keys
            logs = dict(zip(loss_keys, loss_values))

            TC_cla.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_aae.json'), 'a') as log_file:
                log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs))

            print("\nAvg c_loss: " + str(avg_c_loss) +
                  " Avg val_c_loss: " + str(avg_val_c_loss) +
                  "\nAvg a_loss: " + str(avg_a_loss[0]) +
                  " Avg val_a_loss: " + str(avg_val_a_loss[0]))

            # Save model weights per epoch to file
            encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_'+str(epoch)+'.h5'), True)
            decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True)
            classifier.save_weights(os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'), True)

    # End TensorBoard Callback
    TC.on_train_end('_')
    TC_cla.on_train_end('_')
Example #2
0
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
    print ("Loading data definitions...")
    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl'))
    videos_list = get_video_lists(frames_source=frames_source, stride=4)
    n_videos = videos_list.shape[0]

    # Setup test
    test_frames_source = hkl.load(os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl'))
    test_videos_list = get_video_lists(frames_source=test_frames_source, stride=(int(VIDEO_LENGTH/2)))
    n_test_videos = test_videos_list.shape[0]

    if RAM_DECIMATE:
        frames = load_to_RAM(frames_source=frames_source)

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Build the Spatio-temporal Autoencoder
    print ("Creating models...")
    encoder = encoder_model()
    print (encoder.summary())

    decoder = decoder_model()
    autoencoder = autoencoder_model(encoder, decoder)
    autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A)

    # Build attention layer output
    # intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
    # mask_gen_1 = Sequential()
    # mask_gen_1.add(encoder)
    # mask_gen_1.add(intermediate_decoder)
    # mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_A)

    run_utilities(encoder, decoder, autoencoder, ENC_WEIGHTS, DEC_WEIGHTS)

    NB_ITERATIONS = int(n_videos/BATCH_SIZE)
    # NB_ITERATIONS = 5
    NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False)
    LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS.set_model(autoencoder)

    print ("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS_AUTOENCODER):
        print("\n\nEpoch ", epoch)
        loss = []
        test_loss = []

        # Set learning rate every epoch
        LRS.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print ("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            if RAM_DECIMATE:
                X = load_X_RAM(videos_list, index, frames)
            else:
                X = load_X(videos_list, index, DATA_DIR, IMG_SIZE)
            X_train = X[:, 0 : int(VIDEO_LENGTH/2)]
            y_train = X[:, int(VIDEO_LENGTH/2) :]
            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS-1) + "  " +
                         "loss: " + str(loss[len(loss)-1]) +
                         "\t    [" + "{0}>".format("="*(arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            voila = np.concatenate((X_train, y_train), axis=1)
            truth_seq = arrange_images(voila)
            pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))

            truth_seq = truth_seq * 127.5 + 127.5
            pred_seq = pred_seq * 127.5 + 127.5

            cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_seq)
            cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_seq)

        # Run over test data
        print ('')
        for index in range(NB_TEST_ITERATIONS):
            X = load_X(test_videos_list, index, TEST_DATA_DIR, IMG_SIZE)
            X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
            y_train = X[:, int(VIDEO_LENGTH / 2):]
            test_loss.append(autoencoder.test_on_batch(X_train, y_train))

            arrow = int(index / (NB_TEST_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + "  " +
                         "test_loss: " + str(test_loss[len(test_loss) - 1]) +
                         "\t    [" + "{0}>".format("=" * (arrow)))
            stdout.flush()

        # then after each epoch/iteration
        avg_loss = sum(loss)/len(loss)
        avg_test_loss = sum(test_loss) / len(test_loss)
        logs = {'loss': avg_loss, 'test_loss': avg_test_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"loss\":%f, \"test_loss\":%f};\n" % (epoch, avg_loss, avg_test_loss))

            print("\nAvg loss: " + str(avg_loss) + " Avg test loss: " + str(avg_test_loss))

        # Save model weights per epoch to file
        encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True)
Example #3
0
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS):
    print("Loading data...")
    mnist = np.load(os.path.join(DATA_DIR, 'mnist_test_seq.npy'))
    mnist = np.expand_dims(mnist, axis=4)

    # Build the Spatio-temporal Autoencoder
    print("Creating models...")
    encoder = encoder_model()
    decoder = decoder_model()
    autoencoder = autoencoder_model(encoder, decoder)

    run_utilities(encoder, decoder, autoencoder, ENC_WEIGHTS, DEC_WEIGHTS)

    autoencoder.compile(loss='mean_squared_error', optimizer=OPTIM)

    NB_ITERATIONS = int(mnist.shape[1] / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    # LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    # LRS.set_model(autoencoder)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS):
        print("\n\nEpoch ", epoch)
        loss = []

        # Set learning rate every epoch
        # LRS.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X_train = np.zeros(shape=(10, 10, 64, 64, 1))
            y_train = np.zeros(shape=(10, 10, 64, 64, 1))
            for i in range(BATCH_SIZE):
                X_train[i] = mnist[0:int(VIDEO_LENGTH / 2), index + i]
                y_train[i] = mnist[int(VIDEO_LENGTH / 2), index + i]

            X_train = (X_train.astype(np.float32) - 127.5) / 127.5
            y_train = (y_train.astype(np.float32) - 127.5) / 127.5

            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIteration: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "loss: " +
                         str(loss[len(loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            orig_image, truth_image, pred_image = combine_images(
                X_train, y_train, predicted_images)
            pred_image = pred_image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            if epoch == 0:
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_orig.png"),
                    orig_image)
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_truth.png"),
                    truth_image)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + ".png"),
                pred_image)

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        logs = {'loss': avg_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"d_loss\":%f};\n" %
                           (epoch, avg_loss))

        print("\nAvg loss: " + str(avg_loss))

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)

    # End TensorBoard Callback
    TC.on_train_end('_')
Example #4
0
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
    print("Loading data definitions.")

    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl'))
    videos_list_1 = get_video_lists(frames_source=frames_source,
                                    stride=8,
                                    frame_skip=0)
    videos_list_2 = get_video_lists(frames_source=frames_source,
                                    stride=8,
                                    frame_skip=1)
    videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0)

    # Load actions from annotations
    action_labels = hkl.load(
        os.path.join(DATA_DIR, 'annotations_train_208.hkl'))
    ped_action_classes, ped_class_count = get_action_classes(
        action_labels=action_labels)
    print("Training Stats: " + str(ped_class_count))

    if RAM_DECIMATE:
        frames = load_to_RAM(frames_source=frames_source)

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Setup test
    test_frames_source = hkl.load(
        os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl'))
    test_videos_list = get_video_lists(frames_source=test_frames_source,
                                       stride=8,
                                       frame_skip=0)

    # Load test action annotations
    test_action_labels = hkl.load(
        os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl'))
    test_ped_action_classes, test_ped_class_count = get_action_classes(
        test_action_labels)
    print("Test Stats: " + str(test_ped_class_count))

    # Build the Spatio-temporal Autoencoder
    print("Creating models.")
    encoder = encoder_model()
    decoder = decoder_model()

    # Build stacked classifier
    if CLASSIFIER:
        classifier = ensemble_c3d()
        # classifier.compile(loss="binary_crossentropy",
        #                    optimizer=OPTIM_C,
        #                    metrics=['accuracy'])
        run_utilities(encoder, decoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS,
                      CLA_WEIGHTS)
        sclassifier = stacked_classifier_model(encoder, decoder, classifier)
        sclassifier.compile(loss="binary_crossentropy",
                            optimizer=OPTIM_C,
                            metrics=['accuracy'])
        print(sclassifier.summary())

    if not CLASSIFIER:
        autoencoder = autoencoder_model(encoder, decoder)
        autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A)
        run_utilities(encoder, decoder, 'classifier', ENC_WEIGHTS, DEC_WEIGHTS,
                      CLA_WEIGHTS)

    n_videos = videos_list.shape[0]
    n_test_videos = test_videos_list.shape[0]
    NB_ITERATIONS = int(n_videos / BATCH_SIZE)
    # NB_ITERATIONS = 1
    NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE)
    # NB_TEST_ITERATIONS = 1

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR,
                                     histogram_freq=0,
                                     write_graph=False,
                                     write_images=False)
    if not CLASSIFIER:
        LRS_auto = lrs_callback.LearningRateScheduler(schedule=auto_schedule)
        LRS_auto.set_model(autoencoder)

    LC_auto = coeff_callback.CoeffCallback(schedule=coeff_schedule)

    if CLASSIFIER:
        LRS_cla = lrs_callback.LearningRateScheduler(schedule=cla_schedule)
        LRS_cla.set_model(sclassifier)

    print("Beginning Training.")
    # Begin Training
    for epoch in range(NB_EPOCHS_AUTOENCODER):
        print("\n\nEpoch ", epoch)
        loss = []
        test_loss = []

        # Set learning rate every epoch
        LRS_auto.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print("Learning rate: " + str(lr))

        LC_auto.on_epoch_begin(epoch=epoch)
        print("Loss Coefficients: " + str(LAMBDA))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            if RAM_DECIMATE:
                X, y = load_X_y_RAM(videos_list, index, frames, [])
            else:
                X, y = load_X_y(videos_list, index, DATA_DIR, [])
            X_train = np.flip(X[:, 0:int(VIDEO_LENGTH / 2)], axis=1)
            y_train = X[:, int(VIDEO_LENGTH / 2):]
            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "loss: " +
                         str(loss[len(loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            voila = np.concatenate((X_train, y_train), axis=1)
            truth_seq = arrange_images(voila)
            pred_seq = arrange_images(
                np.concatenate((X_train, predicted_images), axis=1))

            truth_seq = truth_seq * 127.5 + 127.5
            pred_seq = pred_seq * 127.5 + 127.5

            if epoch == 0:
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_truth.png"),
                    truth_seq)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_pred.png"),
                pred_seq)

        # Run over test data
        print('')
        for index in range(NB_TEST_ITERATIONS):
            X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, [])
            X_test = np.flip(X[:, 0:int(VIDEO_LENGTH / 2)], axis=1)
            y_test = X[:, int(VIDEO_LENGTH / 2):]
            test_loss.append(autoencoder.test_on_batch(X_test, y_test))

            arrow = int(index / (NB_TEST_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" +
                         str(NB_TEST_ITERATIONS - 1) + "  " + "test_loss: " +
                         str(test_loss[len(test_loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        avg_test_loss = sum(test_loss) / len(test_loss)
        logs = {'loss': avg_loss, 'test_loss': avg_test_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"loss\":%f, \"test_loss\":%f};\n" %
                           (epoch, avg_loss, avg_test_loss))

        print("\nAvg loss: " + str(avg_loss) + " Avg test loss: " +
              str(avg_test_loss))

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)

    # Train Classifier
    if CLASSIFIER:
        print("Training Classifier...")
        for epoch in range(NB_EPOCHS_CLASS):
            print("\n\nEpoch ", epoch)
            c_loss = []
            test_c_loss = []

            # # Set learning rate every epoch
            LRS_cla.on_epoch_begin(epoch=epoch)
            lr = K.get_value(sclassifier.optimizer.lr)
            y_train_pred = []
            y_train_true = []
            print("Learning rate: " + str(lr))
            print("c_loss_metrics: " + str(sclassifier.metrics_names))

            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                if RAM_DECIMATE:
                    X, y = load_X_y_RAM(videos_list, index, frames,
                                        ped_action_classes)
                else:
                    X, y = load_X_y(videos_list, index, DATA_DIR,
                                    ped_action_classes)

                X_train = np.flip(X[:, 0:int(VIDEO_LENGTH / 2)], axis=1)
                # X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
                y_true_class = y[:, CLASS_TARGET_INDEX]
                y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]

                c_loss.append(sclassifier.train_on_batch(
                    X_train, y_true_class))

                y_train_true.extend(y_true_class)
                y_train_pred.extend(sclassifier.predict(X_train, verbose=0))

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIter: " + str(index) + "/" +
                             str(NB_ITERATIONS - 1) + "  " + "c_loss: " +
                             str([c_loss[len(c_loss) - 1][j]
                                  for j in [0, 1]]) + "  " + "\t    [" +
                             "{0}>".format("=" * (arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                generator = autoencoder_model(encoder, decoder)
                predicted_images = generator.predict(X_train)
                ped_pred_class = sclassifier.predict(X_train, verbose=0)
                pred_seq = arrange_images(
                    np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = pred_seq * 127.5 + 127.5

                truth_image = arrange_images(y_true_imgs)
                truth_image = truth_image * 127.5 + 127.5
                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y[:, 0:int(VIDEO_LENGTH / 2)]
                y_true_classes = y[:, int(VIDEO_LENGTH / 2):]

                # Add labels as text to the image
                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH / 2)):
                        # class_num_past = np.argmax(y_orig_classes[k, j])
                        # class_num_futr = np.argmax(y_true_classes[k, j])
                        # class_num_y = np.argmax(ped_pred_class[k])
                        # label_true = simple_ped_set[class_num_futr]
                        # label_orig = simple_ped_set[class_num_past]
                        # label_pred = simple_ped_set[class_num_y]
                        #
                        # label_true = str(y_orig_classes[k, j])
                        # label_pred = str([round(float(i), 2) for i in ped_pred_class[k]])

                        if (y_orig_classes[k, j] > 0.5):
                            label_orig = "crossing"
                        else:
                            label_orig = "not crossing"

                        if (y_true_classes[k][0] > 0.5):
                            label_true = "crossing"
                        else:
                            label_true = "not crossing"

                        if (ped_pred_class[k][0] > 0.5):
                            label_pred = "crossing"
                        else:
                            label_pred = "not crossing"

                        cv2.putText(pred_seq, label_orig,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, label_pred,
                                    (2 + (j + 16) * (208), 114 + k * 128),
                                    font, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, 'truth: ' + label_true,
                                    (2 + (j + 16) * (208), 94 + k * 128), font,
                                    0.5, (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(truth_image, label_true,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)

                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_pred.png"),
                    pred_seq)
                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_truth.png"),
                    truth_image)

            # Run over test data
            print('')
            y_test_pred = []
            y_test_true = []
            for index in range(NB_TEST_ITERATIONS):
                X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR,
                                test_ped_action_classes)
                X_test = X[:, 0:int(VIDEO_LENGTH / 2)]
                y_true_class = y[:, CLASS_TARGET_INDEX]
                y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]

                test_c_loss.append(
                    sclassifier.test_on_batch(X_test, y_true_class))
                y_test_true.extend(y_true_class)
                y_test_pred.extend(sclassifier.predict(X_test, verbose=0))

                arrow = int(index / (NB_TEST_ITERATIONS / 40))
                stdout.write(
                    "\rIter: " + str(index) + "/" +
                    str(NB_TEST_ITERATIONS - 1) + "  " + "test_c_loss: " +
                    str([test_c_loss[len(test_c_loss) - 1][j]
                         for j in [0, 1]]))
                stdout.flush()

            # Save generated images to file
            generator = autoencoder_model(encoder, decoder)
            test_predicted_images = generator.predict(X_test)
            test_ped_pred_class = sclassifier.predict(X_test, verbose=0)
            orig_image = arrange_images(X_test)
            truth_image = arrange_images(y_true_imgs)
            pred_image = arrange_images(test_predicted_images)
            pred_image = pred_image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            font = cv2.FONT_HERSHEY_SIMPLEX
            if epoch == 0:
                y_orig_classes = y[:, 0:int(VIDEO_LENGTH / 2)]
                y_true_classes = y[:, int(VIDEO_LENGTH / 2):]
                # Add labels as text to the image
                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH / 2)):
                        # class_num_past = np.argmax(y_orig_classes[k, j])
                        # class_num_futr = np.argmax(y_true_classes[k, j])
                        if (y_orig_classes[k, j] > 0.5):
                            label_orig = "crossing"
                        else:
                            label_orig = "not crossing"

                        if (y_true_classes[k][0] > 0.5):
                            label_true = "crossing"
                        else:
                            label_true = "not crossing"

                        cv2.putText(orig_image, label_orig,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(truth_image, label_true,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_test_orig.png"),
                    orig_image)
                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_test_truth.png"),
                    truth_image)

            # Add labels as text to the image
            for k in range(BATCH_SIZE):
                # class_num_y = np.argmax(test_ped_pred_class[k])
                if (test_ped_pred_class[k][0] > 0.5):
                    label_pred = "crossing"
                else:
                    label_pred = "not crossing"

                for j in range(int(VIDEO_LENGTH / 2)):
                    cv2.putText(pred_image, label_pred,
                                (2 + j * (208), 114 + k * 128), font, 0.5,
                                (255, 255, 255), 1, cv2.LINE_AA)
            cv2.imwrite(
                os.path.join(
                    CLA_GEN_IMAGES_DIR,
                    str(epoch) + "_" + str(index) + "_cla_test_pred.png"),
                pred_image)

            # then after each epoch/iteration
            avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
            avg_test_c_loss = np.mean(np.asarray(test_c_loss,
                                                 dtype=np.float32),
                                      axis=0)

            # Calculate Precision and Recall scores
            train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics(
                np.asarray(y_train_true),
                np.asarray(y_train_pred),
                avg='micro')
            test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics(
                np.asarray(y_test_true), np.asarray(y_test_pred), avg='micro')
            loss_values = np.asarray(
                avg_c_loss.tolist() + [train_prec.tolist()] +
                [train_rec.tolist()] + avg_test_c_loss.tolist() +
                [test_prec.tolist()] + [test_rec.tolist()],
                dtype=np.float32)
            # loss_values = np.asarray(avg_c_loss.tolist() + train_prec.tolist() +
            #                          train_rec.tolist() +
            #                          avg_test_c_loss.tolist() + test_prec.tolist() +
            #                          test_rec.tolist(), dtype=np.float32)
            precs = ['prec_' + action for action in simple_ped_set]
            recs = ['rec_' + action for action in simple_ped_set]
            c_loss_keys = [
                'c_' + metric
                for metric in sclassifier.metrics_names + precs + recs
            ]
            test_c_loss_keys = [
                'c_test_' + metric
                for metric in sclassifier.metrics_names + precs + recs
            ]

            loss_keys = c_loss_keys + test_c_loss_keys
            logs = dict(zip(loss_keys, loss_values))

            TC_cla.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_cla.json'),
                      'a') as log_file:
                log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs))

            print("\nAvg c_loss: " + str(avg_c_loss) + " Avg test_c_loss: " +
                  str(avg_test_c_loss))

            prec, recall, fbeta, support = get_sklearn_metrics(
                np.asarray(y_train_true),
                np.asarray(y_train_pred),
                avg='weighted')
            print("Train Prec: %.2f, Recall: %.2f, Fbeta: %.2f" %
                  (prec, recall, fbeta))
            prec, recall, fbeta, support = get_sklearn_metrics(
                np.asarray(y_test_true),
                np.asarray(y_test_pred),
                avg='weighted')
            print("Test Prec: %.2f, Recall: %.2f, Fbeta: %.2f" %
                  (prec, recall, fbeta))

            # Save model weights per epoch to file
            encoder.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'encoder_cla_epoch_' + str(epoch) + '.h5'), True)
            decoder.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'decoder_cla_epoch_' + str(epoch) + '.h5'), True)
            classifier.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'classifier_cla_epoch_' + str(epoch) + '.h5'),
                True)
        print(
            get_classification_report(np.asarray(y_train_true),
                                      np.asarray(y_train_pred)))
        print(
            get_classification_report(np.asarray(y_test_true),
                                      np.asarray(y_test_pred)))
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
    print("Loading data definitions.")

    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl'))
    videos_list = get_video_lists(frames_source=frames_source, stride=1)

    # Load actions from annotations
    action_labels = hkl.load(os.path.join(DATA_DIR, 'annotations_train_128.hkl'))
    ped_action_classes, ped_class_count = get_action_classes(action_labels=action_labels)
    print("Training Stats: " + str(ped_class_count))

    if RAM_DECIMATE:
        frames = load_to_RAM(frames_source=frames_source)

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Setup test
    test_frames_source = hkl.load(os.path.join(TEST_DATA_DIR, 'sources_test_128.hkl'))
    test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8)
    # Load test action annotations
    test_action_labels = hkl.load(os.path.join(TEST_DATA_DIR, 'annotations_test_128.hkl'))
    test_ped_action_classes, test_ped_class_count = get_action_classes(test_action_labels)
    print("Test Stats: " + str(test_ped_class_count))

    videos_list = subsample_videos(videos_list=videos_list, ped_action_labels=ped_action_classes)

    # Build the Spatio-temporal Autoencoder
    print("Creating models.")
    encoder = encoder_model()
    decoder = decoder_model()

    print(encoder.summary())
    print(decoder.summary())

    # Build attention layer output
    intermediate_decoder = Model(inputs=decoder.layers[0].input, outputs=decoder.layers[10].output)
    mask_gen = Sequential()
    mask_gen.add(encoder)
    mask_gen.add(intermediate_decoder)
    mask_gen.compile(loss='mean_absolute_error', optimizer=OPTIM_A)

    autoencoder = autoencoder_model(encoder, decoder)
    autoencoder.compile(loss="mean_absolute_error", optimizer=OPTIM_A)

    # Build stacked classifier
    if CLASSIFIER:
        classifier = pretrained_c3d()
        classifier.compile(loss="binary_crossentropy",
                           optimizer=OPTIM_C,
                           metrics=['accuracy'])
        sclassifier = stacked_classifier_model(encoder, decoder, classifier)
        sclassifier.compile(loss=["mean_absolute_error", "binary_crossentropy"],
                            optimizer=OPTIM_C,
                            loss_weights=LOSS_WEIGHTS,
                            metrics=['accuracy'])
        print(sclassifier.summary())

    if CLASSIFIER:
        run_utilities(encoder, decoder, autoencoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS)
    else:
        run_utilities(encoder, decoder, autoencoder, 'classifier', ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS)

    n_videos = videos_list.shape[0]
    n_test_videos = test_videos_list.shape[0]
    NB_ITERATIONS = int(n_videos / BATCH_SIZE)
    # NB_ITERATIONS = 1
    NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE)
    # NB_TEST_ITERATIONS = 1

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR, histogram_freq=0, write_graph=False, write_images=False)
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False)
    LRS_auto = lrs_callback.LearningRateScheduler(schedule=auto_schedule)
    LRS_auto.set_model(autoencoder)

    if CLASSIFIER:
        LRS_clas = lrs_callback.LearningRateScheduler(schedule=clas_schedule)
        LRS_clas.set_model(sclassifier)

    print("Beginning Training.")
    # Begin Training
    for epoch in range(NB_EPOCHS_AUTOENCODER):
        print("\n\nEpoch ", epoch)
        loss = []
        test_loss = []

        # Set learning rate every epoch
        LRS_auto.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            if RAM_DECIMATE:
                X, y = load_X_y_RAM(videos_list, index, frames, [])
            else:
                X, y = load_X_y(videos_list, index, DATA_DIR, [])

            X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
            y_train = X[:, int(VIDEO_LENGTH / 2):]
            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + "  " +
                         "loss: " + str(loss[len(loss) - 1]) +
                         "\t    [" + "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            orig_image = arrange_images(X_train)
            truth_image = arrange_images(y_train)
            pred_image = arrange_images(predicted_images)
            orig_image = orig_image * 127.5 + 127.5
            pred_image = pred_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            if epoch == 0:
                cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_orig.png"), orig_image)
                cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_truth.png"), truth_image)
            cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_pred.png"), pred_image)

        # Run over validation data
        print('')
        for index in range(NB_TEST_ITERATIONS):
            X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, [])
            X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
            y_train = X[:, int(VIDEO_LENGTH / 2):]
            test_loss.append(autoencoder.test_on_batch(X_train, y_train))

            arrow = int(index / (NB_TEST_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + "  " +
                         "test_loss: " + str(test_loss[len(test_loss) - 1]) +
                         "\t    [" + "{0}>".format("=" * (arrow)))
            stdout.flush()

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        avg_test_loss = sum(test_loss) / len(test_loss)
        logs = {'loss': avg_loss, 'test_loss': avg_test_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"loss\":%f, \"test_loss\":%f};\n" % (epoch, avg_loss, avg_test_loss))

        print("\nAvg loss: " + str(avg_loss) + " Avg test loss: " + str(avg_test_loss))

        # Save model weights per epoch to file
        encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_epoch_' + str(epoch) + '.h5'), True)

        # Save predicted mask per epoch
        # predicted_attn = mask_gen.predict(X_train, verbose=0)
        # a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, 16, 14, 14, 1))
        # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_gen1_' + str(epoch) + '.npy'), a_pred)

    # Train Classifier
    if CLASSIFIER:
        print("Training Classifier...")
        for epoch in range(NB_EPOCHS_CLASS):
            print("\n\nEpoch ", epoch)
            c_loss = []
            test_c_loss = []

            # # Set learning rate every epoch
            LRS_clas.on_epoch_begin(epoch=epoch)
            lr = K.get_value(sclassifier.optimizer.lr)
            print("Learning rate: " + str(lr))
            print("c_loss_metrics: " + str(sclassifier.metrics_names))

            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                if RAM_DECIMATE:
                    X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes)
                else:
                    X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes)

                X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
                y_true_class = y[:, CLASS_TARGET_INDEX]
                y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]

                c_loss.append(sclassifier.train_on_batch(X_train, [y_true_imgs, y_true_class]))

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + "  " +
                             "c_loss: " + str([ c_loss[len(c_loss) - 1][j]  for j in [0, 1, 2, 3, 4]]) + "  " +
                             "\t    [" + "{0}>".format("=" * (arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                predicted_images, ped_pred_class = sclassifier.predict(X_train, verbose=0)
                pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = pred_seq * 127.5 + 127.5

                truth_image = arrange_images(y_true_imgs)
                truth_image = truth_image * 127.5 + 127.5
                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)]
                y_true_classes = y[:, int(VIDEO_LENGTH / 2):]
                # Add labels as text to the image

                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH / 2)):
                        class_num_past = np.argmax(y_orig_classes[k, j])
                        class_num_futr = np.argmax(y_true_classes[k, j])
                        class_num_y = np.argmax(ped_pred_class[k])
                        cv2.putText(pred_seq, simple_ped_set[class_num_past],
                                    (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                    cv2.LINE_AA)
                        try:
                            cv2.putText(pred_seq, simple_ped_set[class_num_y],
                                        (2 + (j + 16) * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                        cv2.LINE_AA)
                        except IndexError as e:
                            print (class_num_y)
                            print (e)

                        cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_futr],
                                    (2 + (j + 16) * (128), 94 + k * 128), font, 0.5, (255, 255, 255), 1,
                                    cv2.LINE_AA)
                        cv2.putText(truth_image, simple_ped_set[class_num_futr],
                                    (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                    cv2.LINE_AA)

                cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq)
                cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_truth.png"), truth_image)

            # Run over test data
            print('')
            for index in range(NB_TEST_ITERATIONS):
                X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR, test_ped_action_classes)
                X_test = X[:, 0: int(VIDEO_LENGTH / 2)]
                y_true_class = y[:, CLASS_TARGET_INDEX]
                y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]

                test_c_loss.append(sclassifier.test_on_batch(X_test, [y_true_imgs, y_true_class]))

                arrow = int(index / (NB_TEST_ITERATIONS / 40))
                stdout.write("\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) + "  " +
                             "test_c_loss: " + str([test_c_loss[len(test_c_loss) - 1][j] for j in [0, 1, 2, 3, 4]]))
                stdout.flush()

            # Save generated images to file
            test_predicted_images, test_ped_pred_class = sclassifier.predict(X_test, verbose=0)
            pred_seq = arrange_images(np.concatenate((X_test, test_predicted_images), axis=1))
            pred_seq = pred_seq * 127.5 + 127.5

            truth_image = arrange_images(y_true_imgs)
            truth_image = truth_image * 127.5 + 127.5
            font = cv2.FONT_HERSHEY_SIMPLEX
            y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)]
            y_true_classes = y[:, int(VIDEO_LENGTH / 2):]
            # Add labels as text to the image

            for k in range(BATCH_SIZE):
                for j in range(int(VIDEO_LENGTH / 2)):
                    class_num_past = np.argmax(y_orig_classes[k, j])
                    class_num_futr = np.argmax(y_true_classes[k, j])
                    class_num_y = np.argmax(test_ped_pred_class[k])
                    cv2.putText(pred_seq, simple_ped_set[class_num_past],
                                (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(pred_seq, simple_ped_set[class_num_y],
                                (2 + (j + 16) * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(pred_seq, 'truth: ' + simple_ped_set[class_num_futr],
                                (2 + (j + 16) * (128), 94 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(truth_image, simple_ped_set[class_num_futr],
                                (2 + j * (128), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)

            cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(index) + "_cla_test_pred.png"), pred_seq)
            cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(index) + "_cla_test_truth.png"), truth_image)

            # predicted_attn = mask_gen.predict(X_train, verbose=0)
            # a_pred = np.reshape(predicted_attn, newshape=(BATCH_SIZE, 16, 16, 16, 1))
            # np.save(os.path.join(ATTN_WEIGHTS_DIR, 'attention_weights_cla_' + str(epoch) + '.npy'), a_pred)

            # then after each epoch/iteration
            avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
            avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32), axis=0)

            loss_values = np.asarray(avg_c_loss.tolist() + avg_test_c_loss.tolist(), dtype=np.float32)
            c_loss_keys = ['c_' + metric for metric in classifier.metrics_names]
            test_c_loss_keys = ['c_test_' + metric for metric in classifier.metrics_names]

            loss_keys = c_loss_keys + test_c_loss_keys
            logs = dict(zip(loss_keys, loss_values))

            TC_cla.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file:
                log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs))

            print("\nAvg c_loss: " + str(avg_c_loss) +
                  " Avg test_c_loss: " + str(avg_test_c_loss))

            # Save model weights per epoch to file
            encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True)
            decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True)
            classifier.save_weights(os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'),
                                    True)
Example #6
0
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, DIS_WEIGHTS):
    print("Loading data definitions...")
    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl'))

    # Build video progressions
    videos_list = []
    start_frame_index = 1
    end_frame_index = VIDEO_LENGTH + 1
    while (end_frame_index <= len(frames_source)):
        frame_list = frames_source[start_frame_index:end_frame_index]
        if (len(set(frame_list)) == 1):
            videos_list.append(range(start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + 1
            end_frame_index = end_frame_index + 1
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    videos_list = np.asarray(videos_list, dtype=np.int32)
    n_videos = videos_list.shape[0]

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Build the Spatio-temporal Autoencoder
    print("Creating models...")
    encoder = encoder_model()
    decoder = decoder_model()

    intermediate_decoder = Model(inputs=decoder.layers[0].input,
                                 outputs=decoder.layers[10].output)
    mask_gen = Sequential()
    mask_gen.add(encoder)
    mask_gen.add(intermediate_decoder)
    mask_gen.compile(loss='mean_squared_error', optimizer=OPTIM_G)

    autoencoder = autoencoder_model(encoder, decoder)

    if ADVERSARIAL:
        discriminator = discriminator_model()
        aae = aae_model(autoencoder, discriminator)
        aae.compile(loss='binary_crossentropy', optimizer=OPTIM_G)
        set_trainability(discriminator, True)
        discriminator.compile(loss='binary_crossentropy', optimizer=OPTIM_D)
        run_utilities(encoder, decoder, autoencoder, discriminator,
                      ENC_WEIGHTS, DEC_WEIGHTS, DIS_WEIGHTS)
    else:
        run_utilities(encoder, decoder, autoencoder, 'None', ENC_WEIGHTS,
                      DEC_WEIGHTS, 'None')

    autoencoder.compile(loss=mse_kld_loss, optimizer=OPTIM_A)

    NB_ITERATIONS = int(n_videos / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS.set_model(autoencoder)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS_AUTOENCODER):
        print("\n\nEpoch ", epoch)
        loss = []

        # Set learning rate every epoch
        LRS.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X = load_X(videos_list, index, DATA_DIR)
            X_train = X[:, 0:int(VIDEO_LENGTH / 2)]
            y_train = X[:, int(VIDEO_LENGTH / 2):]
            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIteration: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "loss: " +
                         str(loss[len(loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            orig_image, truth_image, pred_image = combine_images(
                X_train, y_train, predicted_images)
            pred_image = pred_image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            if epoch == 0:
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_orig.png"),
                    orig_image)
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_truth.png"),
                    truth_image)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_pred.png"),
                pred_image)

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        logs = {'loss': avg_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"d_loss\":%f};\n" %
                           (epoch, avg_loss))

        print("\nAvg loss: " + str(avg_loss))

        # Save predicted mask per epoch
        predicted_attn_1 = mask_gen.predict(X_train, verbose=0)
        a_pred_1 = np.reshape(predicted_attn_1, newshape=(10, 10, 16, 16, 1))
        np.save(
            os.path.join(TEST_RESULTS_DIR,
                         'attention_weights_gen1_' + str(epoch) + '.npy'),
            a_pred_1)

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)

    # Train AAE
    if ADVERSARIAL:
        exp_memory = ExperienceMemory(memory_length=100)
        for epoch in range(NB_EPOCHS_AAE):
            print("\n\nEpoch ", epoch)
            g_loss = []
            d_loss = []
            # a_loss = []

            # # Set learning rate every epoch
            # LRS.on_epoch_begin(epoch=epoch)
            lr = K.get_value(autoencoder.optimizer.lr)
            print("Learning rate: " + str(lr))

            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                X = load_X(videos_list, index, DATA_DIR)
                X_train = X[:, 0:int(VIDEO_LENGTH / 2)]
                y_train = X[:, int(VIDEO_LENGTH / 2):]

                future_images = autoencoder.predict(X_train, verbose=0)
                trainable_fakes = exp_memory.get_trainable_fakes(
                    current_gens=future_images, exp_window_size=5)

                # Train Discriminator on future images (y_train, not X_train)
                X = np.concatenate((y_train, trainable_fakes))
                y = np.concatenate(
                    (np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.int),
                     np.zeros(shape=(BATCH_SIZE, 10, 1), dtype=np.int)),
                    axis=0)
                d_loss.append(discriminator.train_on_batch(X, y))

                # Train AAE
                set_trainability(discriminator, False)
                y = np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.int)
                g_loss.append(aae.train_on_batch(X_train, y))
                set_trainability(discriminator, True)

                # # Train Autoencoder
                # a_loss.append(autoencoder.train_on_batch(X_train, y_train))

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIteration: " + str(index) + "/" +
                             str(NB_ITERATIONS - 1) + "  " + "g_loss: " +
                             str(g_loss[len(g_loss) - 1]) + "  " + "d_loss: " +
                             str(d_loss[len(d_loss) - 1]) + "\t    [" +
                             "{0}>".format("=" * (arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                predicted_images = autoencoder.predict(X_train, verbose=0)
                orig_image, truth_image, pred_image = combine_images(
                    X_train, y_train, predicted_images)
                pred_image = pred_image * 127.5 + 127.5
                orig_image = orig_image * 127.5 + 127.5
                truth_image = truth_image * 127.5 + 127.5
                if epoch == 0:
                    cv2.imwrite(
                        os.path.join(
                            GEN_IMAGES_DIR,
                            str(epoch) + "_" + str(index) + "_aae_orig.png"),
                        orig_image)
                    cv2.imwrite(
                        os.path.join(
                            GEN_IMAGES_DIR,
                            str(epoch) + "_" + str(index) + "_aae_truth.png"),
                        truth_image)
                cv2.imwrite(
                    os.path.join(
                        GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_aae_pred.png"),
                    pred_image)

                predicted_attn_1 = mask_gen.predict(X_train, verbose=0)
                a_pred_1 = np.reshape(predicted_attn_1,
                                      newshape=(10, 10, 16, 16, 1))
                np.save(
                    os.path.join(
                        TEST_RESULTS_DIR,
                        'attention_weights_gen1_' + str(epoch) + '.npy'),
                    a_pred_1)

            # then after each epoch/iteration
            # avg_a_loss = sum(a_loss) / len(a_loss)
            avg_g_loss = sum(g_loss) / len(g_loss)
            avg_d_loss = sum(d_loss) / len(d_loss)
            logs = {'g_loss': avg_g_loss, 'd_loss': avg_d_loss}
            TC.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_aae.json'),
                      'a') as log_file:
                log_file.write(
                    "{\"epoch\":%d, \"g_loss\":%f, \"d_loss\":%f};\n" %
                    (epoch, avg_g_loss, avg_d_loss))

            print("\nAvg g_loss: " + str(avg_g_loss) + "  Avg d_loss: " +
                  str(avg_d_loss))

            # Save model weights per epoch to file
            encoder.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'encoder_aae_epoch_' + str(epoch) + '.h5'), True)
            decoder.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'decoder_aae_epoch_' + str(epoch) + '.h5'), True)
            discriminator.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'discriminator_aae_epoch_' + str(epoch) + '.h5'),
                True)

    # End TensorBoard Callback
    TC.on_train_end('_')
Example #7
0
def train(BATCH_SIZE, GEN_WEIGHTS, DISC_WEIGHTS):
    print("Loading data...")
    X_train = hkl.load(os.path.join(DATA_DIR, 'X_train.hkl'))
    X_train = (X_train.astype(np.float32) - 127.5) / 127.5

    if SHUFFLE:
        # Shuffle images to aid generalization
        X_train = np.random.permutation(X_train)

    print("Creating models...")
    # Create the Generator and Discriminator models
    generator = generator_model()
    discriminator = discriminator_model()

    # Create the full GAN model with discriminator non-trainable
    GAN = gan_model(generator, discriminator)
    g_optim = G_OPTIM
    d_optim = D_OPTIM

    generator.compile(loss='binary_crossentropy', optimizer='sgd')
    GAN.compile(loss='binary_crossentropy', optimizer=g_optim)
    set_trainability(discriminator, True)
    discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)

    if PRINT_MODEL_SUMMARY:
        print(generator.summary())
        print(discriminator.summary())
        print(GAN.summary())
        # exit(0)

    # Save model to file
    if SAVE_MODEL:
        print("Saving models to file...")
        model_json = generator.to_json()
        with open(os.path.join(MODEL_DIR, "generator.json"), "w") as json_file:
            json_file.write(model_json)
        plot_model(generator,
                   to_file=os.path.join(MODEL_DIR, 'generator.png'),
                   show_shapes=True)

        model_json = discriminator.to_json()
        with open(os.path.join(MODEL_DIR, "discriminator.json"),
                  "w") as json_file:
            json_file.write(model_json)
        plot_model(discriminator,
                   to_file=os.path.join(MODEL_DIR, 'discriminator.png'),
                   show_shapes=True)

        model_json = GAN.to_json()
        with open(os.path.join(MODEL_DIR, "GAN.json"), "w") as json_file:
            json_file.write(model_json)
        plot_model(GAN,
                   to_file=os.path.join(MODEL_DIR, 'GAN.png'),
                   show_shapes=True)

    if GEN_WEIGHTS != "None":
        print("Pre-loading generator with weights...")
        load_weights(GEN_WEIGHTS, generator)
    if DISC_WEIGHTS != "None":
        print("Pre-loading discriminator with weights...")
        load_weights(DISC_WEIGHTS, discriminator)

    NB_ITERATIONS = int(X_train.shape[0] / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    # TC.set_model(generator, discriminator)

    noise = np.zeros((BATCH_SIZE, 100), dtype=np.float32)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS):
        print("\n\nEpoch ", epoch)
        g_loss = []
        d_loss = []
        for index in range(NB_ITERATIONS):

            # Generate images
            for i in range(BATCH_SIZE):
                noise[i, :] = np.random.normal(0, 1, 100)
            image_batch = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]
            generated_images = generator.predict(noise, verbose=0)

            image = generated_images[1]
            image = image * 127.5 + 127.5
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_1.png"), image)
            print(image)
            image = generated_images[2]
            image = image * 127.5 + 127.5
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_2.png"), image)
            image = generated_images[3]
            image = image * 127.5 + 127.5
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_3.png"), image)
            image = generated_images[4]
            image = image * 127.5 + 127.5
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_4.png"), image)

            # Train Discriminator
            X = np.concatenate((image_batch, generated_images))
            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
            d_loss.append(discriminator.train_on_batch(X, y))
            # print("Epoch %d Batch %d d_loss : %f" % (epoch, index, d_loss))

            # Train GAN
            for i in range(BATCH_SIZE):
                noise[i, :] = np.random.normal(0, 1, 100)
            set_trainability(discriminator, False)
            g_loss.append(GAN.train_on_batch(noise, [1] * BATCH_SIZE))
            set_trainability(discriminator, True)

            # Train GAN as to keep gen_loss lower than d_loss
            while (g_loss[len(g_loss) - 1] > d_loss[len(d_loss) - 1]):
                # Train GAN
                for i in range(BATCH_SIZE):
                    noise[i, :] = np.random.normal(0, 1, 100)
                set_trainability(discriminator, False)
                g_loss.append(GAN.train_on_batch(noise, [1] * BATCH_SIZE))
                set_trainability(discriminator, True)

            arrow = int(index / 10)
            stdout.write("\rIteration: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "g_loss: " +
                         str(g_loss[len(g_loss) - 1]) + "\t    " + "d_loss: " +
                         str(d_loss[len(d_loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            image = combine_images(generated_images)
            image = image * 127.5 + 127.5
            # Image.fromarray(image.astype(np.uint8)).save(str(epoch) + "_" + str(index) + ".png")
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + ".png"), image)

        # then after each epoch/iteration
        avg_g_loss = sum(g_loss) / len(g_loss)
        avg_d_loss = sum(d_loss) / len(d_loss)
        logs = {'g_loss': avg_g_loss, 'd_loss': avg_d_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"g_loss\":%f, \"d_loss\":%f};\n" %
                           (epoch, avg_g_loss, avg_d_loss))

        # Save model weights per epoch to file
        generator.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'generator_epoch_' + str(epoch) + '.h5'), True)
        discriminator.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'discriminator_epoch_' + str(epoch) + '.h5'), True)

    # End TensorBoard Callback
    TC.on_train_end()
Example #8
0
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, DIS_WEIGHTS):
    print("Loading data...")
    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl'))

    # Build video progressions
    videos_list = []
    start_frame_index = 1
    end_frame_index = VIDEO_LENGTH + 1
    while (end_frame_index <= len(frames_source)):
        frame_list = frames_source[start_frame_index:end_frame_index]
        if (len(set(frame_list)) == 1):
            videos_list.append(range(start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + 1
            end_frame_index = end_frame_index + 1
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    videos_list = np.asarray(videos_list, dtype=np.int32)
    n_videos = videos_list.shape[0]

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Build the Spatio-temporal Autoencoder
    print("Creating models...")
    encoder = encoder_model()
    decoder = decoder_model()
    discriminator = discriminator_model()
    autoencoder = autoencoder_model(encoder, decoder)
    da = da_model(autoencoder, discriminator)

    run_utilities(encoder, decoder, autoencoder, discriminator, ENC_WEIGHTS,
                  DEC_WEIGHTS, DIS_WEIGHTS)

    da.compile(loss='binary_crossentropy', optimizer=OPTIM_A)
    set_trainability(discriminator, True)
    discriminator.compile(loss='binary_crossentropy', optimizer=OPTIM_D)
    NB_ITERATIONS = int(n_videos / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    # LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    # LRS.set_model(autoencoder)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS):
        print("\n\nEpoch ", epoch)
        a_loss = []
        d_loss = []
        # Set learning rate every epoch
        # LRS.on_epoch_begin(epoch=epoch)
        # lr = K.get_value(autoencoder.optimizer.lr)
        # print ("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X = load_X(videos_list, index, DATA_DIR)
            X_train = X[:, 0:int(VIDEO_LENGTH / 2)]
            y_train = X[:, int(VIDEO_LENGTH / 2):]

            # Adversarially training a pre-trained autoencoder
            future_images = autoencoder.predict(X_train, verbose=0)

            # Train Discriminator on future images (y_train, not X_train)
            X = np.concatenate((y_train, future_images))
            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
            d_loss.append(discriminator.train_on_batch(X, y))

            # Train Discriminative Autoencoder
            set_trainability(discriminator, False)
            a_loss.append(da.train_on_batch(X_train, [1] * BATCH_SIZE))
            set_trainability(discriminator, True)

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIteration: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "a_loss: " +
                         str(a_loss[len(a_loss) - 1]) + "\t    " + "d_loss: " +
                         str(d_loss[len(d_loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            generated_images = autoencoder.predict(X_train, verbose=0)
            orig_image, image, truth_image = combine_images(
                generated_images, X_train, y_train)
            image = image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            if epoch == 0:
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_orig.png"),
                    orig_image)
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_truth.png"),
                    truth_image)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + ".png"), image)

        # then after each epoch/iteration
        avg_a_loss = sum(a_loss) / len(a_loss)
        avg_d_loss = sum(d_loss) / len(d_loss)
        logs = {'a_loss': avg_a_loss, 'd_loss': avg_d_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"a_loss\":%f, \"d_loss\":%f};\n" %
                           (epoch, avg_a_loss, avg_d_loss))

        print("\nAvg a_loss: " + str(avg_a_loss) + "  Avg d_loss: " +
              str(avg_d_loss))

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)
        discriminator.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'discriminator_epoch_' + str(epoch) + '.h5'), True)

    # End TensorBoard Callback
    TC.on_train_end('_')
Example #9
0
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
    print("Loading data definitions.")

    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl'))
    # videos_list_1 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=0)
    videos_list = get_video_lists(frames_source=frames_source, stride=8, frame_skip=0)
    # videos_list_2 = get_video_lists(frames_source=frames_source, stride=8, frame_skip=1)
    # videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0)

    # Load actions from annotations
    action_labels = hkl.load(os.path.join(DATA_DIR, 'annotations_train_208.hkl'))
    ped_action_classes, ped_class_count = get_action_classes(action_labels=action_labels)
    print("Training Stats: " + str(ped_class_count))

    if RAM_DECIMATE:
        frames = load_to_RAM(frames_source=frames_source)

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Setup test
    val_frames_source = hkl.load(os.path.join(VAL_DATA_DIR, 'sources_val_208.hkl'))
    val_videos_list = get_video_lists(frames_source=val_frames_source, stride=8, frame_skip=0)

    # Load test action annotations
    val_action_labels = hkl.load(os.path.join(VAL_DATA_DIR, 'annotations_val_208.hkl'))
    val_ped_action_classes, val_ped_class_count = get_action_classes(val_action_labels)
    print("Val Stats: " + str(val_ped_class_count))

    # Build the Spatio-temporal Autoencoder
    print ("Creating models.")
    encoder = encoder_model()
    decoder = decoder_model()

    # Build stacked classifier
    classifier = ensemble_c3d()
    run_utilities(encoder, decoder, classifier, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS)
    sclassifier = stacked_classifier_model(encoder, decoder, classifier)
    sclassifier.compile(loss=["binary_crossentropy"],
                        optimizer=OPTIM_C,
                        metrics=['accuracy'])
    print (sclassifier.summary())

    n_videos = videos_list.shape[0]
    n_val_videos = val_videos_list.shape[0]
    NB_ITERATIONS = int(n_videos/BATCH_SIZE)
    # NB_ITERATIONS = 1
    NB_VAL_ITERATIONS = int(n_val_videos/BATCH_SIZE)
    # NB_VAL_ITERATIONS = 1

    # Setup TensorBoard Callback
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR, histogram_freq=0, write_graph=False, write_images=False)
    LRS_cla = lrs_callback.LearningRateScheduler(schedule=cla_schedule)
    LRS_cla.set_model(sclassifier)

    print ("Beginning Training.")
    # Begin Training

    # Train Classifier
    print("Training Classifier...")
    for epoch in range(1, NB_EPOCHS_CLASS+1):
        print("\n\nEpoch ", epoch)
        c_loss = []
        val_c_loss = []

        # # Set learning rate every epoch
        LRS_cla.on_epoch_begin(epoch=epoch)
        lr = K.get_value(sclassifier.optimizer.lr)

        y_train_pred = []
        y_train_true = []
        print("Learning rate: " + str(lr))
        print("c_loss_metrics: " + str(sclassifier.metrics_names))

        for index in range(NB_ITERATIONS):
            if RAM_DECIMATE:
                X, y = load_X_y_RAM(videos_list, index, frames, ped_action_classes)
            else:
                X, y = load_X_y(videos_list, index, DATA_DIR, ped_action_classes)

            if REV:
                X_train = np.flip(X[:, 0: int(VIDEO_LENGTH / 2)], axis=1)
            else:
                X_train = X[:, 0: int(VIDEO_LENGTH / 2)]
            y_true_class = y[:, CLASS_TARGET_INDEX]
            y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]

            c_loss.append(sclassifier.train_on_batch(X_train, y_true_class))

            y_train_true.extend(y_true_class)
            y_train_pred.extend(sclassifier.predict(X_train, verbose=0))

            arrow = int(index / (NB_ITERATIONS / 30))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_ITERATIONS - 1) + "  " +
                         "c_loss: " + str([ c_loss[len(c_loss) - 1][j]  for j in [0, 1]]) + "  " +
                         "\t    [" + "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            z, res = encoder.predict(X_train)
            predicted_images = decoder.predict([z, res])
            ped_pred_class = sclassifier.predict(X_train, verbose=0)
            pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
            pred_seq = pred_seq * 127.5 + 127.5

            truth_image = arrange_images(y_true_imgs)
            truth_image = truth_image * 127.5 + 127.5
            font = cv2.FONT_HERSHEY_SIMPLEX
            y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)]
            y_true_classes = y[:, int(VIDEO_LENGTH / 2):]

            # Add labels as text to the image
            for k in range(BATCH_SIZE):
                for j in range(int(VIDEO_LENGTH / 2)):
                    if y_orig_classes[k, j] > 0.5:
                        label_orig = "crossing"
                    else:
                        label_orig = "not crossing"

                    if y_true_classes[k][j] > 0.5:
                        label_true = "crossing"
                    else:
                        label_true = "not crossing"

                    if ped_pred_class[k][0] > 0.5:
                        label_pred = "crossing"
                    else:
                        label_pred = "not crossing"

                    cv2.putText(pred_seq, label_orig,
                                (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(pred_seq, label_pred,
                                (2 + (j + 16) * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(pred_seq, 'truth: ' + label_true,
                                (2 + (j + 16) * (208), 94 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(truth_image, label_true,
                                (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)

            cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_pred.png"), pred_seq)
            cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_truth.png"), truth_image)

        # Run over validation data
        print('')
        y_val_pred = []
        y_val_true = []
        for index in range(NB_VAL_ITERATIONS):
            X, y = load_X_y(val_videos_list, index, VAL_DATA_DIR, val_ped_action_classes)
            if REV:
                X_val = np.flip(X[:, 0: int(VIDEO_LENGTH / 2)], axis=1)
            else:
                X_val = X[:, 0: int(VIDEO_LENGTH / 2)]
            y_true_class = y[:, CLASS_TARGET_INDEX]
            y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]

            val_c_loss.append(sclassifier.test_on_batch(X_val, y_true_class))
            y_val_true.extend(y_true_class)
            y_val_pred.extend(sclassifier.predict(X_val, verbose=0))

            arrow = int(index / (NB_VAL_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" + str(NB_VAL_ITERATIONS - 1) + "  " +
                         "val_c_loss: " +  str([ val_c_loss[len(val_c_loss) - 1][j] for j in [0, 1]]))
            stdout.flush()

        # Save generated images to file
        z, res = encoder.predict(X_val)
        val_predicted_images = decoder.predict([z, res])
        val_ped_pred_class = sclassifier.predict(X_val, verbose=0)
        orig_image = arrange_images(X_val)
        truth_image = arrange_images(y_true_imgs)
        pred_image = arrange_images(val_predicted_images)
        pred_image = pred_image * 127.5 + 127.5
        orig_image = orig_image * 127.5 + 127.5
        truth_image = truth_image * 127.5 + 127.5
        font = cv2.FONT_HERSHEY_SIMPLEX
        if epoch == 0:
            y_orig_classes = y[:, 0: int(VIDEO_LENGTH / 2)]
            y_true_classes = y[:, int(VIDEO_LENGTH / 2):]
            # Add labels as text to the image
            for k in range(BATCH_SIZE):
                for j in range(int(VIDEO_LENGTH / 2)):
                    if (y_orig_classes[k, j] > 0.5):
                        label_orig = "crossing"
                    else:
                        label_orig = "not crossing"

                    if (y_true_classes[k][j] > 0.5):
                        label_true = "crossing"
                    else:
                        label_true = "not crossing"

                    cv2.putText(orig_image, label_orig,
                                (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
                    cv2.putText(truth_image, label_true,
                                (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                                cv2.LINE_AA)
            cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) +
                                     "_cla_val_orig.png"), orig_image)
            cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) +
                                     "_cla_val_truth.png"), truth_image)

        # Add labels as text to the image
        for k in range(BATCH_SIZE):
            # class_num_y = np.argmax(val_ped_pred_class[k])
            if (val_ped_pred_class[k][0] > 0.5):
                label_pred = "crossing"
            else:
                label_pred = "not crossing"

            for j in range(int(VIDEO_LENGTH / 2)):
                cv2.putText(pred_image, label_pred,
                            (2 + j * (208), 114 + k * 128), font, 0.5, (255, 255, 255), 1,
                            cv2.LINE_AA)
        cv2.imwrite(os.path.join(CLA_GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_cla_val_pred.png"),
                    pred_image)

        # then after each epoch/iteration
        avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
        avg_val_c_loss = np.mean(np.asarray(val_c_loss, dtype=np.float32), axis=0)

        # Calculate Precision and Recall scores
        train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics(np.asarray(y_train_true),
                                                                                np.asarray(y_train_pred),
                                                                                avg='binary',
                                                                                pos_label=1)
        val_prec, val_rec, val_fbeta, val_support = get_sklearn_metrics(np.asarray(y_val_true),
                                                                            np.asarray(y_val_pred),
                                                                            avg='binary',
                                                                            pos_label=1)

        print("\nTrain Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (train_prec, train_rec, train_fbeta))
        print("Val Prec: %.2f, Recall: %.2f, Fbeta: %.2f" % (val_prec, val_rec, val_fbeta))
        loss_values = np.asarray(avg_c_loss.tolist() + [train_prec.tolist()] +
                                 [train_rec.tolist()] +
                                 avg_val_c_loss.tolist() + [val_prec.tolist()] +
                                 [val_rec.tolist()], dtype=np.float32)
        # loss_values = np.asarray(avg_c_loss.tolist() + train_prec.tolist() +
        #                          train_rec.tolist() +
        #                          avg_val_c_loss.tolist() + val_prec.tolist() +
        #                          val_rec.tolist(), dtype=np.float32)

        precs = ['prec_' + action for action in simple_ped_set]
        recs = ['rec_' + action for action in simple_ped_set]
        c_loss_keys = ['c_' + metric for metric in sclassifier.metrics_names + precs + recs]
        val_c_loss_keys = ['c_val_' + metric for metric in sclassifier.metrics_names + precs + recs]

        loss_keys = c_loss_keys + val_c_loss_keys
        logs = dict(zip(loss_keys, loss_values))

        TC_cla.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses_cla.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs))

        print("\nAvg c_loss: " + str(avg_c_loss) +
              " Avg val_c_loss: " + str(avg_val_c_loss))

        # Save model weights per epoch to file
        if FINETUNE_ENCODER:
            encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True)
        if FINETUNE_DECODER:
            decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True)
        classifier.save_weights(os.path.join(CHECKPOINT_DIR, 'classifier_cla_epoch_' + str(epoch) + '.h5'),
                                True)
    print(get_classification_report(np.asarray(y_train_true), np.asarray(y_train_pred)))
    print(get_classification_report(np.asarray(y_val_true), np.asarray(y_val_pred)))
Example #10
0
def train(BATCH_SIZE, ENC_WEIGHTS, TEM_WEIGHTS, DEC_WEIGHTS):
    print("Loading data...")
    frames = hkl.load(os.path.join(DATA_DIR, 'X_train_128.hkl'))
    frames = (frames.astype(np.float32) - 127.5) / 127.5

    n_videos = int(frames.shape[0] / VIDEO_LENGTH)
    X_train = np.zeros((n_videos, VIDEO_LENGTH) + frames.shape[1:],
                       dtype=np.float32)

    # Arrange frames in a progression
    for i in range(n_videos):
        X_train[i] = frames[i * VIDEO_LENGTH:(i + 1) * VIDEO_LENGTH]

    if SHUFFLE:
        # Shuffle images to aid generalization
        X_train = np.random.permutation(X_train)

    # Build the Spatio-temporal Autoencoder
    print("Creating models...")
    encoder = encoder_model()
    temporal_net = temporal_model()
    decoder = decoder_model()
    autoencoder = autoencoder_model(encoder, temporal_net, decoder)

    run_utilities(encoder, temporal_net, decoder, autoencoder, ENC_WEIGHTS,
                  TEM_WEIGHTS, DEC_WEIGHTS)

    # generator.compile(loss='binary_crossentropy', optimizer='sgd')
    # GAN.compile(loss='binary_crossentropy', optimizer=g_optim)
    # set_trainability(discriminator, True)
    # discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
    autoencoder.compile(loss='binary_crossentropy', optimizer=OPTIM)

    NB_ITERATIONS = int(X_train.shape[0] / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS):
        print("\n\nEpoch ", epoch)
        loss = []
        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X = X_train[index * BATCH_SIZE:(index + 1) * BATCH_SIZE]
            loss.append(autoencoder.train_on_batch(X, X))

            arrow = int(index / (NB_ITERATIONS / 30))
            stdout.write("\rIteration: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "loss: " +
                         str(loss[len(loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            generated_images = autoencoder.predict(X, verbose=0)
            orig_image, image = combine_images(generated_images, X)
            image = image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_orig.png"),
                orig_image)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + ".png"), image)
            # image = X[0, 1]
            # image = image * 127.5 + 127.5
            # cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_X_1.png"), image)
            # image = X[0, 2]
            # image = image * 127.5 + 127.5
            # cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_X_2.png"), image)
            # image = X[0, 3]
            # image = image * 127.5 + 127.5
            # cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_X_3.png"), image)
            # image = X[0, 4]
            # image = image * 127.5 + 127.5
            # cv2.imwrite(os.path.join(GEN_IMAGES_DIR, str(epoch) + "_" + str(index) + "_X_4.png"), image)

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        logs = {'loss': avg_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"d_loss\":%f};\n" %
                           (epoch, avg_loss))

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        temporal_net.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'temporal_net_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)

    # End TensorBoard Callback
    TC.on_train_end(_)
Example #11
0
def train(BATCH_SIZE, ENC_WEIGHTS, TEM_WEIGHTS, DEC_WEIGHTS):
    print("Loading data...")
    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl'))

    # Build video progressions
    videos_list = []
    start_frame_index = 1
    end_frame_index = VIDEO_LENGTH + 1
    while (end_frame_index <= len(frames_source)):
        frame_list = frames_source[start_frame_index:end_frame_index]
        if (len(set(frame_list)) == 1):
            videos_list.append(range(start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + 1
            end_frame_index = end_frame_index + 1
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    videos_list = np.asarray(videos_list, dtype=np.int32)
    n_videos = videos_list.shape[0]

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Build video progressions
    videos_list = []
    start_frame_index = 1
    end_frame_index = VIDEO_LENGTH + 1
    while (end_frame_index <= len(frames_source)):
        frame_list = frames_source[start_frame_index:end_frame_index]
        if (len(set(frame_list)) == 1):
            videos_list.append(range(start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + 1
            end_frame_index = end_frame_index + 1
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    videos_list = np.asarray(videos_list, dtype=np.int32)
    n_videos = videos_list.shape[0]

    # Build the Spatio-temporal Autoencoder
    print("Creating models...")
    encoder = encoder_model()
    temporizer = temporal_model()
    decoder = decoder_model()

    # print (encoder.summary())
    # print (temporizer.summary())
    # print (decoder.summary())

    autoencoder = autoencoder_model(encoder, temporizer, decoder)
    run_utilities(encoder, temporizer, decoder, autoencoder, ENC_WEIGHTS,
                  TEM_WEIGHTS, DEC_WEIGHTS)

    autoencoder.compile(loss='mean_squared_error', optimizer=OPTIM)

    NB_ITERATIONS = int(n_videos / BATCH_SIZE)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS.set_model(autoencoder)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS):
        print("\n\nEpoch ", epoch)
        loss = []

        # Set learning rate every epoch
        LRS.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X = load_X(videos_list, index, DATA_DIR)
            loss.append(autoencoder.train_on_batch(X, X))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIteration: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "loss: " +
                         str(loss[len(loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            generated_images = autoencoder.predict(X, verbose=0)
            orig_image, image = combine_images(generated_images, X)
            image = image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            if epoch == 0:
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_orig.png"),
                    orig_image)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + ".png"), image)

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        logs = {'loss': avg_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"loss\":%f};\n" %
                           (epoch, avg_loss))

        print("\nAvg loss: " + str(avg_loss))

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        temporizer.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'temporizer_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)

    # End TensorBoard Callback
    TC.on_train_end('_')
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, GEN_WEIGHTS, DIS_WEIGHTS):
    print("Loading data definitions...")
    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_128.hkl'))

    # Build video progressions
    videos_list = []
    start_frame_index = 1
    end_frame_index = VIDEO_LENGTH + 1
    while (end_frame_index <= len(frames_source)):
        frame_list = frames_source[start_frame_index:end_frame_index]
        if (len(set(frame_list)) == 1):
            videos_list.append(range(start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + 1
            end_frame_index = end_frame_index + 1
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    videos_list = np.asarray(videos_list, dtype=np.int32)
    n_videos = videos_list.shape[0]

    # Setup validation
    val_frames_source = hkl.load(
        os.path.join(VAL_DATA_DIR, 'sources_val_128.hkl'))
    val_videos_list = []
    start_frame_index = 1
    end_frame_index = VIDEO_LENGTH + 1
    while (end_frame_index <= len(val_frames_source)):
        val_frame_list = val_frames_source[start_frame_index:end_frame_index]
        if (len(set(val_frame_list)) == 1):
            val_videos_list.append(range(start_frame_index, end_frame_index))
            start_frame_index = start_frame_index + VIDEO_LENGTH
            end_frame_index = end_frame_index + VIDEO_LENGTH
        else:
            start_frame_index = end_frame_index - 1
            end_frame_index = start_frame_index + VIDEO_LENGTH

    val_videos_list = np.asarray(val_videos_list, dtype=np.int32)
    n_val_videos = val_videos_list.shape[0]

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Build the Spatio-temporal Autoencoder
    print("Creating models...")
    encoder = encoder_model()
    decoder = decoder_model()
    autoencoder = autoencoder_model(encoder, decoder)
    autoencoder.compile(loss="mean_squared_error", optimizer=OPTIM_A)

    intermediate_decoder = Model(inputs=decoder.layers[0].input,
                                 outputs=decoder.layers[1].output)
    mask_gen_1 = Sequential()
    mask_gen_1.add(encoder)
    mask_gen_1.add(intermediate_decoder)
    mask_gen_1.compile(loss='mean_squared_error', optimizer=OPTIM_G)

    if ADVERSARIAL:
        generator = refiner_g_model()
        discriminator = refiner_d_model()
        gan = gan_model(autoencoder, generator, discriminator)
        generator.compile(loss='binary_crossentropy', optimizer='sgd')
        gan.compile(loss=['mae', 'binary_crossentropy'],
                    loss_weights=LOSS_WEIGHTS,
                    optimizer=OPTIM_G,
                    metrics=['accuracy'])
        print('GAN')
        print(gan.summary())
        set_trainability(discriminator, True)
        discriminator.compile(loss='binary_crossentropy',
                              optimizer=OPTIM_D,
                              metrics=['accuracy'])
        run_utilities(encoder, decoder, autoencoder, generator, discriminator,
                      gan, ENC_WEIGHTS, DEC_WEIGHTS, GEN_WEIGHTS, DIS_WEIGHTS)
    else:
        run_utilities(encoder, decoder, autoencoder, 'None', 'None', 'None',
                      ENC_WEIGHTS, DEC_WEIGHTS, 'None', 'None')

    NB_ITERATIONS = int(n_videos / BATCH_SIZE)
    # NB_ITERATIONS = 5
    NB_VAL_ITERATIONS = int(n_val_videos / BATCH_SIZE)

    # for i in range(len(decoder.layers)):
    #     print (decoder.layers[i], str(i))
    #
    # exit(0)

    # Setup TensorBoard Callback
    TC = tb_callback.TensorBoard(log_dir=TF_LOG_DIR,
                                 histogram_freq=0,
                                 write_graph=False,
                                 write_images=False)
    TC_gan = tb_callback.TensorBoard(log_dir=TF_LOG_GAN_DIR,
                                     histogram_freq=0,
                                     write_graph=False,
                                     write_images=False)
    LRS = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS.set_model(autoencoder)

    print("Beginning Training...")
    # Begin Training
    for epoch in range(NB_EPOCHS_AUTOENCODER):
        print("\n\nEpoch ", epoch)
        loss = []
        val_loss = []

        # Set learning rate every epoch
        LRS.on_epoch_begin(epoch=epoch)
        lr = K.get_value(autoencoder.optimizer.lr)
        print("Learning rate: " + str(lr))

        for index in range(NB_ITERATIONS):
            # Train Autoencoder
            X = load_X(videos_list, index, DATA_DIR, (128, 128, 3))
            X_train = X[:, 0:10]
            y_train = X[:, 10:]
            loss.append(autoencoder.train_on_batch(X_train, y_train))

            arrow = int(index / (NB_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" +
                         str(NB_ITERATIONS - 1) + "  " + "loss: " +
                         str(loss[len(loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        if SAVE_GENERATED_IMAGES:
            # Save generated images to file
            predicted_images = autoencoder.predict(X_train, verbose=0)
            orig_image, truth_image, pred_image = combine_images(
                X_train, y_train, predicted_images)
            pred_image = pred_image * 127.5 + 127.5
            orig_image = orig_image * 127.5 + 127.5
            truth_image = truth_image * 127.5 + 127.5
            if epoch == 0:
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_orig.png"),
                    orig_image)
                cv2.imwrite(
                    os.path.join(GEN_IMAGES_DIR,
                                 str(epoch) + "_" + str(index) + "_truth.png"),
                    truth_image)
            cv2.imwrite(
                os.path.join(GEN_IMAGES_DIR,
                             str(epoch) + "_" + str(index) + "_pred.png"),
                pred_image)

        predicted_attn = mask_gen_1.predict(X_train, verbose=0)
        a_pred = np.reshape(predicted_attn,
                            newshape=(BATCH_SIZE, VIDEO_LENGTH - 10, 16, 16,
                                      1))
        np.save(
            os.path.join(ATTN_WEIGHTS_DIR,
                         'attention_weights_cla_gen1_' + str(epoch) + '.npy'),
            a_pred)

        # Run over validation data
        for index in range(NB_VAL_ITERATIONS):
            X = load_X(val_videos_list, index, VAL_DATA_DIR, (128, 128, 3))
            X_train = X[:, 0:10]
            y_train = X[:, 10:]
            val_loss.append(autoencoder.test_on_batch(X_train, y_train))

            arrow = int(index / (NB_VAL_ITERATIONS / 40))
            stdout.write("\rIter: " + str(index) + "/" +
                         str(NB_VAL_ITERATIONS - 1) + "  " + "val_loss: " +
                         str(val_loss[len(val_loss) - 1]) + "\t    [" +
                         "{0}>".format("=" * (arrow)))
            stdout.flush()

        # then after each epoch/iteration
        avg_loss = sum(loss) / len(loss)
        avg_val_loss = sum(val_loss) / len(val_loss)
        logs = {'loss': avg_loss, 'val_loss': avg_val_loss}
        TC.on_epoch_end(epoch, logs)

        # Log the losses
        with open(os.path.join(LOG_DIR, 'losses.json'), 'a') as log_file:
            log_file.write("{\"epoch\":%d, \"loss\":%f};\n" %
                           (epoch, avg_loss))

            print("\nAvg loss: " + str(avg_loss) + " Avg val loss: " +
                  str(avg_val_loss))

        # Save model weights per epoch to file
        encoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'encoder_epoch_' + str(epoch) + '.h5'), True)
        decoder.save_weights(
            os.path.join(CHECKPOINT_DIR,
                         'decoder_epoch_' + str(epoch) + '.h5'), True)

        predicted_attn = mask_gen_1.predict(X_train, verbose=0)
        a_pred = np.reshape(predicted_attn,
                            newshape=(BATCH_SIZE, VIDEO_LENGTH - 10, 16, 16,
                                      1))
        np.save(
            os.path.join(ATTN_WEIGHTS_DIR,
                         'attention_weights_cla_gen1_' + str(epoch) + '.npy'),
            a_pred)

    # Train AAE
    if ADVERSARIAL:
        print("Training Stage II.")
        exp_memory = ExperienceMemory(memory_length=100)
        for epoch in range(NB_EPOCHS_GAN):
            print("\n\nEpoch ", epoch)
            g_loss = []
            val_g_loss = []
            d_loss = []
            val_d_loss = []
            # a_loss = []

            # # Set learning rate every epoch
            # LRS.on_epoch_begin(epoch=epoch)
            lr = K.get_value(gan.optimizer.lr)
            print("GAN learning rate: " + str(lr))
            lr = K.get_value(discriminator.optimizer.lr)
            print("Disc learning rate: " + str(lr))
            print("g_loss_metrics: " + str(gan.metrics_names))
            print("d_loss_metrics: " + str(discriminator.metrics_names))

            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                X = load_X(videos_list, index, DATA_DIR, (128, 128, 3))
                X_hd = load_X(videos_list, index, HD_DATA_DIR, (256, 256, 3))
                X128 = X[:, 0:int(VIDEO_LENGTH / 2)]
                Y128 = autoencoder.predict(X128, verbose=0)
                X256_real = X_hd[:, int(VIDEO_LENGTH / 2):]
                X256_fake = generator.predict(Y128, verbose=0)

                trainable_fakes = exp_memory.get_trainable_fakes(
                    current_gens=X256_fake, exp_window_size=4)

                # Train Discriminator on future images (y_train, not X_train)
                X = np.concatenate((X256_real, trainable_fakes))
                y = np.concatenate(
                    (np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32),
                     np.zeros(shape=(BATCH_SIZE, 10, 1), dtype=np.float32)),
                    axis=0)
                d_loss.append(discriminator.train_on_batch(X, y))

                # Train AAE
                set_trainability(discriminator, False)
                y = np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32)
                g_loss.append(gan.train_on_batch(X128, [X256_real, y]))
                set_trainability(discriminator, True)

                # # Train Autoencoder
                # a_loss.append(autoencoder.train_on_batch(X_train, y_train))

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIter: " + str(index) + "/" +
                             str(NB_ITERATIONS - 1) + "  " + "g_loss: " +
                             str([g_loss[len(g_loss) - 1][j]
                                  for j in [0, -1]]) + "  " + "d_loss: " +
                             str(d_loss[len(d_loss) - 1]) + "\t    [" +
                             "{0}>".format("=" * (arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                predicted_images = generator.predict(Y128, verbose=0)
                orig_image, truth_image, pred_image = combine_images(
                    Y128, X256_real, predicted_images)
                pred_image = pred_image * 127.5 + 127.5
                orig_image = orig_image * 127.5 + 127.5
                truth_image = truth_image * 127.5 + 127.5
                if epoch == 0:
                    cv2.imwrite(
                        os.path.join(
                            CLA_GEN_IMAGES_DIR,
                            str(epoch) + "_" + str(index) + "_gan_orig.png"),
                        orig_image)
                    cv2.imwrite(
                        os.path.join(
                            CLA_GEN_IMAGES_DIR,
                            str(epoch) + "_" + str(index) + "_gan_truth.png"),
                        truth_image)
                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_gan_pred.png"),
                    pred_image)

            # Run over validation data
            print('')
            for index in range(NB_VAL_ITERATIONS):
                X = load_X(val_videos_list, index, VAL_DATA_DIR, (128, 128, 3))
                X_hd = load_X(val_videos_list, index, VAL_HD_DATA_DIR,
                              (256, 256, 3))
                X128_val = X[:, 0:int(VIDEO_LENGTH / 2)]
                Y128_val = autoencoder.predict(X128, verbose=0)
                X256_real_val = X_hd[:, int(VIDEO_LENGTH / 2):]
                X256_fake_val = generator.predict(Y128_val, verbose=0)

                X = np.concatenate((X256_real_val, X256_fake_val))
                y = np.concatenate(
                    (np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32),
                     np.zeros(shape=(BATCH_SIZE, 10, 1), dtype=np.float32)),
                    axis=0)
                val_d_loss.append(discriminator.test_on_batch(X, y))

                y = np.ones(shape=(BATCH_SIZE, 10, 1), dtype=np.float32)
                val_g_loss.append(
                    gan.test_on_batch(X128_val, [X256_real_val, y]))

                arrow = int(index / (NB_VAL_ITERATIONS / 40))
                stdout.write(
                    "\rIter: " + str(index) + "/" +
                    str(NB_VAL_ITERATIONS - 1) + "  " + "val_g_loss: " +
                    str([val_g_loss[len(val_g_loss) - 1][j]
                         for j in [0, -1]]) + "  " + "val_d_loss: " +
                    str(val_d_loss[len(val_d_loss) - 1]))
                stdout.flush()

            # then after each epoch/iteration
            avg_d_loss = np.mean(np.asarray(d_loss, dtype=np.float32), axis=0)
            avg_val_d_loss = np.mean(np.asarray(val_d_loss, dtype=np.float32),
                                     axis=0)
            avg_g_loss = np.mean(np.asarray(g_loss, dtype=np.float32), axis=0)
            avg_val_g_loss = np.mean(np.asarray(val_g_loss, dtype=np.float32),
                                     axis=0)

            loss_values = np.asarray(avg_d_loss.tolist() + avg_val_d_loss.tolist() \
                                     + avg_g_loss.tolist() + avg_val_g_loss.tolist(), dtype=np.float32)
            d_loss_keys = [
                'd_' + metric for metric in discriminator.metrics_names
            ]
            g_loss_keys = ['g_' + metric for metric in gan.metrics_names]
            val_d_loss_keys = [
                'd_val_' + metric for metric in discriminator.metrics_names
            ]
            val_g_loss_keys = [
                'g_val_' + metric for metric in gan.metrics_names
            ]

            loss_keys = d_loss_keys + val_d_loss_keys + \
                        g_loss_keys + val_g_loss_keys
            logs = dict(zip(loss_keys, loss_values))

            TC_gan.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_gan.json'),
                      'a') as log_file:
                log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs))

            print("\nAvg d_loss: " + str(avg_d_loss) + " Avg val_d_loss: " +
                  str(avg_val_d_loss) + "\nAvg g_loss: " +
                  str([avg_g_loss[j] for j in [0, -1]]) + " Avg val_g_loss: " +
                  str([avg_val_g_loss[j] for j in [0, -1]]))

            # Save model weights per epoch to file
            encoder.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'encoder_gan_epoch_' + str(epoch) + '.h5'), True)
            decoder.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'decoder_gan_epoch_' + str(epoch) + '.h5'), True)
            generator.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'generator_gan_epoch_' + str(epoch) + '.h5'),
                True)
            discriminator.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'discriminator_gan_epoch_' + str(epoch) + '.h5'),
                True)

    # End TensorBoard Callback
    TC.on_train_end('_')
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
    print("Loading data definitions.")

    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl'))
    videos_list_1 = get_video_lists(frames_source=frames_source,
                                    stride=1,
                                    frame_skip=0)
    videos_list_2 = get_video_lists(frames_source=frames_source,
                                    stride=1,
                                    frame_skip=1)
    # videos_list_3 = get_video_lists(frames_source=frames_source, stride=1, frame_skip=2)
    videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0)

    # Load actions from annotations
    action_labels = hkl.load(
        os.path.join(DATA_DIR, 'annotations_train_208.hkl'))
    ped_action_classes, ped_class_count = get_action_classes(
        action_labels=action_labels)
    print("Training Stats: " + str(ped_class_count))

    classwise_videos_list, count = get_classwise_data(videos_list,
                                                      ped_action_classes)
    videos_list = prob_subsample(classwise_videos_list, count)

    if RAM_DECIMATE:
        frames = load_to_RAM(frames_source=frames_source)

    # if SHUFFLE:
    #     # Shuffle images to aid generalization
    #     videos_list = np.random.permutation(videos_list)

    # Setup test
    test_frames_source = hkl.load(
        os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl'))
    test_videos_list = get_video_lists(frames_source=test_frames_source,
                                       stride=1)
    # Load test action annotations
    test_action_labels = hkl.load(
        os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl'))
    test_ped_action_classes, test_ped_class_count = get_action_classes(
        test_action_labels)
    print("Test Stats: " + str(test_ped_class_count))

    # Build the Spatio-temporal Autoencoder
    print("Creating models.")
    # Build stacked classifier
    classifier = pretrained_c3d()
    classifier.compile(
        loss="categorical_crossentropy",
        optimizer=OPTIM_C,
        # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy'])
        metrics=['accuracy'])

    run_utilities(classifier, CLA_WEIGHTS)

    n_videos = videos_list.shape[0]
    n_test_videos = test_videos_list.shape[0]
    NB_ITERATIONS = int(n_videos / BATCH_SIZE)
    # NB_ITERATIONS = 1
    NB_TEST_ITERATIONS = int(n_test_videos / BATCH_SIZE)
    # NB_TEST_ITERATIONS = 1

    # Setup TensorBoard Callback
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR,
                                     histogram_freq=0,
                                     write_graph=False,
                                     write_images=False)
    LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS_clas.set_model(classifier)

    print("Beginning Training.")
    # Begin Training
    # Train Classifier
    if CLASSIFIER:
        print("Training Classifier...")
        for epoch in range(NB_EPOCHS_CLASS):
            print("\n\nEpoch ", epoch)
            c_loss = []
            test_c_loss = []

            # # Set learning rate every epoch
            LRS_clas.on_epoch_begin(epoch=epoch)
            lr = K.get_value(classifier.optimizer.lr)
            print("Learning rate: " + str(lr))
            print("c_loss_metrics: " + str(classifier.metrics_names))

            y_train_pred = []
            y_train_true = []
            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                if RAM_DECIMATE:
                    videos_list = prob_subsample(classwise_videos_list, count)
                    X, y = load_X_y_RAM(videos_list, index, frames,
                                        ped_action_classes)
                else:
                    videos_list = prob_subsample(classwise_videos_list, count)
                    X, y = load_X_y(videos_list, index, DATA_DIR,
                                    ped_action_classes)

                X_train = X
                y_true_class = y[:, CLASS_TARGET_INDEX]

                c_loss.append(classifier.train_on_batch(X_train, y_true_class))
                y_train_true.extend(y_true_class)
                y_train_pred.extend(classifier.predict(X_train, verbose=0))

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIter: " + str(index) + "/" +
                             str(NB_ITERATIONS - 1) + "  " + "c_loss: " +
                             str([c_loss[len(c_loss) - 1][j]
                                  for j in [0, 1]]) + "  " + "\t    [" +
                             "{0}>".format("=" * (arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                ped_pred_class = classifier.predict(X_train, verbose=0)
                # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = arrange_images(X_train)
                pred_seq = pred_seq * 127.5 + 127.5

                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y
                # Add labels as text to the image

                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH)):
                        class_num_past = np.argmax(y_orig_classes[k, j])
                        class_num_y = np.argmax(ped_pred_class[k])
                        cv2.putText(pred_seq,
                                    'truth: ' + simple_ped_set[class_num_past],
                                    (2 + j * (208), 94 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, simple_ped_set[class_num_y],
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)

                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_pred.png"),
                    pred_seq)

            # Run over test data
            print('')
            y_test_pred = []
            y_test_true = []
            for index in range(NB_TEST_ITERATIONS):
                X, y = load_X_y(test_videos_list, index, TEST_DATA_DIR,
                                test_ped_action_classes)
                X_test = X
                y_true_class = y[:, CLASS_TARGET_INDEX]

                test_c_loss.append(
                    classifier.test_on_batch(X_test, y_true_class))
                y_test_true.extend(y_true_class)
                y_test_pred.extend(classifier.predict(X_test, verbose=0))

                arrow = int(index / (NB_TEST_ITERATIONS / 40))
                stdout.write(
                    "\rIter: " + str(index) + "/" +
                    str(NB_TEST_ITERATIONS - 1) + "  " + "test_c_loss: " +
                    str([test_c_loss[len(test_c_loss) - 1][j]
                         for j in [0, 1]]))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                test_ped_pred_class = classifier.predict(X_test, verbose=0)
                # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = arrange_images(X_test)
                pred_seq = pred_seq * 127.5 + 127.5

                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y
                # Add labels as text to the image

                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH)):
                        class_num_past = np.argmax(y_orig_classes[k, j])
                        class_num_y = np.argmax(test_ped_pred_class[k])
                        cv2.putText(pred_seq,
                                    'truth: ' + simple_ped_set[class_num_past],
                                    (2 + j * (208), 94 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, simple_ped_set[class_num_y],
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)

                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_test_pred.png"),
                    pred_seq)

            # then after each epoch/iteration
            avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
            avg_test_c_loss = np.mean(np.asarray(test_c_loss,
                                                 dtype=np.float32),
                                      axis=0)

            train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics(
                np.asarray(y_train_true), np.asarray(y_train_pred), avg=None)
            test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics(
                np.asarray(y_test_true), np.asarray(y_test_pred), avg=None)

            loss_values = np.asarray(avg_c_loss.tolist() +
                                     train_prec.tolist() + train_rec.tolist() +
                                     avg_test_c_loss.tolist() +
                                     test_prec.tolist() + test_rec.tolist(),
                                     dtype=np.float32)
            precs = ['prec_' + action for action in simple_ped_set]
            recs = ['rec_' + action for action in simple_ped_set]
            fbeta = ['fbeta_' + action for action in simple_ped_set]
            c_loss_keys = [
                'c_' + metric
                for metric in classifier.metrics_names + precs + recs
            ]
            test_c_loss_keys = [
                'c_test_' + metric
                for metric in classifier.metrics_names + precs + recs
            ]

            loss_keys = c_loss_keys + test_c_loss_keys
            logs = dict(zip(loss_keys, loss_values))

            TC_cla.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_cla.json'),
                      'a') as log_file:
                log_file.write("{\"epoch\":%d, %s;\n" % (epoch, logs))

            print("\nAvg c_loss: " + str(avg_c_loss) + " Avg test_c_loss: " +
                  str(avg_test_c_loss))

            print("Training Precision per class:" + str(train_prec))
            print("Test Precision per class:" + str(test_prec))
            print("Training Recall per class:" + str(train_rec))
            print("Test Recall per class:" + str(test_rec))

            prec, recall, fbeta, support = get_sklearn_metrics(
                np.asarray(y_train_true),
                np.asarray(y_train_pred),
                avg='weighted')
            print("Train Prec: %.2f, Recall: %.2f, Fbeta: %.2f" %
                  (prec, recall, fbeta))
            prec, recall, fbeta, support = get_sklearn_metrics(
                np.asarray(y_test_true),
                np.asarray(y_test_pred),
                avg='weighted')
            print("Test Prec: %.2f, Recall: %.2f, Fbeta: %.2f" %
                  (prec, recall, fbeta))

            # Save model weights per epoch to file
            # encoder.save_weights(os.path.join(CHECKPOINT_DIR, 'encoder_cla_epoch_' + str(epoch) + '.h5'), True)
            # decoder.save_weights(os.path.join(CHECKPOINT_DIR, 'decoder_cla_epoch_' + str(epoch) + '.h5'), True)
            classifier.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'classifier_cla_epoch_' + str(epoch) + '.h5'),
                True)

            # get_confusion_matrix(y_train_true, y_train_pred)
            # get_confusion_matrix(y_test_true, y_test_pred)

        print(
            get_classification_report(np.asarray(y_train_true),
                                      np.asarray(y_train_pred)))
        print(
            get_classification_report(np.asarray(y_test_true),
                                      np.asarray(y_test_pred)))
def test(CLA_WEIGHTS):

    if not os.path.exists(TEST_RESULTS_DIR + '/pred/'):
        os.mkdir(TEST_RESULTS_DIR + '/pred/')

    # Setup test
    test_frames_source = hkl.load(
        os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl'))
    # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8, frame_skip=0)
    # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=16, frame_skip=0)
    test_videos_list = get_video_lists(frames_source=test_frames_source,
                                       stride=16,
                                       frame_skip=2)
    # Load test action annotations
    test_action_labels = hkl.load(
        os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl'))
    test_ped_action_classes, test_ped_class_count = get_action_classes(
        test_action_labels, mode='sigmoid')
    print("Test Stats: " + str(test_ped_class_count))

    # Build the Spatio-temporal Autoencoder
    print("Creating models.")
    # Build stacked classifier
    # classifier = pretrained_c3d()
    classifier = ensemble_c3d()
    # classifier = c3d_scratch()
    classifier.compile(
        loss="binary_crossentropy",
        optimizer=OPTIM_C,
        # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy'])
        metrics=['acc'])

    # Build attention layer output
    intermediate_classifier = Model(inputs=classifier.layers[0].input,
                                    outputs=classifier.layers[1].output)
    mask_gen_1 = Sequential()
    # mask_gen_1.add(encoder)
    mask_gen_1.add(intermediate_classifier)
    mask_gen_1.compile(loss='binary_crossentropy', optimizer=OPTIM_C)

    run_utilities(classifier, CLA_WEIGHTS)

    n_test_videos = test_videos_list.shape[0]

    NB_TEST_ITERATIONS = int(n_test_videos / TEST_BATCH_SIZE)
    # NB_TEST_ITERATIONS = 5

    # Setup TensorBoard Callback
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR,
                                     histogram_freq=0,
                                     write_graph=False,
                                     write_images=False)
    LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS_clas.set_model(classifier)

    if CLASSIFIER:
        print("Testing Classifier...")
        # Run over test data
        print('')
        y_test_pred = []
        y_test_true = []
        test_c_loss = []
        for index in range(NB_TEST_ITERATIONS):
            X, y = load_X_y(test_videos_list,
                            index,
                            TEST_DATA_DIR,
                            test_ped_action_classes,
                            batch_size=TEST_BATCH_SIZE)
            X_test = X
            y_true_class = y[:, CLASS_TARGET_INDEX]

            test_c_loss.append(classifier.test_on_batch(X_test, y_true_class))
            y_test_true.extend(y_true_class)
            y_test_pred.extend(classifier.predict(X_test, verbose=0))

            arrow = int(index / (NB_TEST_ITERATIONS / 40))
            stdout.write(
                "\rIter: " + str(index) + "/" + str(NB_TEST_ITERATIONS - 1) +
                "  " + "test_c_loss: " +
                str([test_c_loss[len(test_c_loss) - 1][j] for j in [0, 1]]))
            stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                test_ped_pred_class = classifier.predict(X_test, verbose=0)
                # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = arrange_images(X_test)
                pred_seq = pred_seq * 127.5 + 127.5

                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y
                # Add labels as text to the image

                for k in range(TEST_BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH)):

                        if (y_orig_classes[k, j] > 0.5):
                            label_true = "crossing"
                        else:
                            label_true = "not crossing"

                        if (test_ped_pred_class[k] > 0.5):
                            label_pred = "crossing"
                        else:
                            label_pred = "not crossing"

                        cv2.putText(pred_seq, 'truth: ' + label_true,
                                    (2 + j * (208), 94 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, label_pred,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)

                cv2.imwrite(
                    os.path.join(TEST_RESULTS_DIR + '/pred/',
                                 str(index) + "_cla_test_pred.png"), pred_seq)

        # then after each epoch
        avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32),
                                  axis=0)

        test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics(
            np.asarray(y_test_true),
            np.asarray(y_test_pred),
            avg='binary',
            pos_label=1)
        print("\nAvg test_c_loss: " + str(avg_test_c_loss))
        print("Test Prec: %.4f, Recall: %.4f, Fbeta: %.4f" %
              (test_prec, test_rec, test_fbeta))

        print("Classification Report")
        print(
            get_classification_report(np.asarray(y_test_true),
                                      np.asarray(y_test_pred)))

        print("Confusion matrix")
        tn, fp, fn, tp = confusion_matrix(y_test_true,
                                          np.round(y_test_pred)).ravel()
        print("TN: %.2f, FP: %.2f, FN: %.2f, TP: %.2f" % (tn, fp, fn, tp))
def train(BATCH_SIZE, ENC_WEIGHTS, DEC_WEIGHTS, CLA_WEIGHTS):
    print("Loading data definitions.")

    frames_source = hkl.load(os.path.join(DATA_DIR, 'sources_train_208.hkl'))
    videos_list_1 = get_video_lists(frames_source=frames_source,
                                    stride=8,
                                    frame_skip=0)
    videos_list_2 = get_video_lists(frames_source=frames_source,
                                    stride=8,
                                    frame_skip=1)
    videos_list = np.concatenate((videos_list_1, videos_list_2), axis=0)

    # Load actions from annotations
    action_labels = hkl.load(
        os.path.join(DATA_DIR, 'annotations_train_208.hkl'))
    ped_action_classes, ped_class_count = get_action_classes(
        action_labels=action_labels, mode='sigmoid')
    print("Training Stats: " + str(ped_class_count))

    # videos_list = remove_zero_classes(videos_list, ped_action_classes)
    # classwise_videos_list, count = get_classwise_data(videos_list, ped_action_classes)
    # videos_list = prob_subsample(classwise_videos_list, count)

    if RAM_DECIMATE:
        frames = load_to_RAM(frames_source=frames_source)

    if SHUFFLE:
        # Shuffle images to aid generalization
        videos_list = np.random.permutation(videos_list)

    # Setup validation
    val_frames_source = hkl.load(
        os.path.join(VAL_DATA_DIR, 'sources_val_208.hkl'))
    val_videos_list = get_video_lists(frames_source=val_frames_source,
                                      stride=8,
                                      frame_skip=0)
    # Load val action annotations
    val_action_labels = hkl.load(
        os.path.join(VAL_DATA_DIR, 'annotations_val_208.hkl'))
    val_ped_action_classes, val_ped_class_count = get_action_classes(
        val_action_labels, mode='sigmoid')
    # val_videos_list = remove_zero_classes(val_videos_list, val_ped_action_classes)
    print("Val Stats: " + str(val_ped_class_count))

    # Build the Spatio-temporal Autoencoder
    print("Creating models.")
    # Build stacked classifier
    # classifier = pretrained_c3d()
    classifier = ensemble_c3d()
    # classifier = c3d_scratch()
    classifier.compile(
        loss="binary_crossentropy",
        optimizer=OPTIM_C,
        # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy'])
        metrics=['acc'])

    # Build attention layer output
    intermediate_classifier = Model(inputs=classifier.layers[0].input,
                                    outputs=classifier.layers[1].output)
    mask_gen_1 = Sequential()
    # mask_gen_1.add(encoder)
    mask_gen_1.add(intermediate_classifier)
    mask_gen_1.compile(loss='binary_crossentropy', optimizer=OPTIM_C)

    run_utilities(classifier, CLA_WEIGHTS)

    n_videos = videos_list.shape[0]
    n_val_videos = val_videos_list.shape[0]

    NB_ITERATIONS = int(n_videos / BATCH_SIZE)
    # NB_ITERATIONS = 5
    NB_VAL_ITERATIONS = int(n_val_videos / BATCH_SIZE)
    # NB_VAL_ITERATIONS = 5

    # Setup TensorBoard Callback
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR,
                                     histogram_freq=0,
                                     write_graph=False,
                                     write_images=False)
    LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS_clas.set_model(classifier)

    print("Beginning Training.")
    # Begin Training
    # Train Classifier
    if CLASSIFIER:
        print("Training Classifier...")
        for epoch in range(1, NB_EPOCHS_CLASS + 1):
            print("\n\nEpoch ", epoch)
            c_loss = []
            val_c_loss = []

            # # Set learning rate every epoch
            LRS_clas.on_epoch_begin(epoch=epoch)
            lr = K.get_value(classifier.optimizer.lr)
            print("Learning rate: " + str(lr))
            print("c_loss_metrics: " + str(classifier.metrics_names))

            y_train_pred = []
            y_train_true = []
            for index in range(NB_ITERATIONS):
                # Train Autoencoder
                if RAM_DECIMATE:
                    # videos_list = prob_subsample(classwise_videos_list, count)
                    X, y = load_X_y_RAM(videos_list, index, frames,
                                        ped_action_classes)
                else:
                    # videos_list = prob_subsample(classwise_videos_list, count)
                    X, y = load_X_y(videos_list, index, DATA_DIR,
                                    ped_action_classes)

                X_train = X
                y_true_class = y[:, CLASS_TARGET_INDEX]

                c_loss.append(classifier.train_on_batch(X_train, y_true_class))

                y_train_true.extend(y_true_class)
                y_train_pred.extend(classifier.predict(X_train, verbose=0))

                arrow = int(index / (NB_ITERATIONS / 30))
                stdout.write("\rIter: " + str(index) + "/" +
                             str(NB_ITERATIONS - 1) + "  " + "c_loss: " +
                             str([c_loss[len(c_loss) - 1][j]
                                  for j in [0, 1]]) + "  " + "\t    [" +
                             "{0}>".format("=" * (arrow)))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                ped_pred_class = classifier.predict(X_train, verbose=0)
                # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = arrange_images(X_train)
                pred_seq = pred_seq * 127.5 + 127.5

                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y
                # Add labels as text to the image

                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH)):
                        class_num_past = np.argmax(y_orig_classes[k, j])
                        class_num_y = np.argmax(ped_pred_class[k])

                        label_true = str(y_orig_classes[k, j])
                        label_pred = str(
                            [round(float(i), 2) for i in ped_pred_class[k]])

                        cv2.putText(pred_seq, 'truth: ' + label_true,
                                    (2 + j * (208), 94 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, label_pred,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)

                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_pred.png"),
                    pred_seq)

                slices = mask_gen_1.predict(X_train)
                slice_images = arrange_images(slices)
                slice_images = slice_images * 127.5 + 127.5
                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_slice_pred.png"),
                    slice_images)

            # Run over val data
            print('')
            y_val_pred = []
            y_val_true = []
            for index in range(NB_VAL_ITERATIONS):
                X, y = load_X_y(val_videos_list, index, VAL_DATA_DIR,
                                val_ped_action_classes)
                X_val = X
                y_true_class = y[:, CLASS_TARGET_INDEX]

                val_c_loss.append(classifier.test_on_batch(
                    X_val, y_true_class))
                y_val_true.extend(y_true_class)
                y_val_pred.extend(classifier.predict(X_val, verbose=0))

                arrow = int(index / (NB_VAL_ITERATIONS / 40))
                stdout.write(
                    "\rIter: " + str(index) + "/" +
                    str(NB_VAL_ITERATIONS - 1) + "  " + "val_c_loss: " +
                    str([val_c_loss[len(val_c_loss) - 1][j] for j in [0, 1]]))
                stdout.flush()

            if SAVE_GENERATED_IMAGES:
                # Save generated images to file
                val_ped_pred_class = classifier.predict(X_val, verbose=0)
                # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                pred_seq = arrange_images(X_val)
                pred_seq = pred_seq * 127.5 + 127.5

                font = cv2.FONT_HERSHEY_SIMPLEX
                y_orig_classes = y
                # Add labels as text to the image

                for k in range(BATCH_SIZE):
                    for j in range(int(VIDEO_LENGTH)):
                        class_num_past = np.argmax(y_orig_classes[k, j])
                        class_num_y = np.argmax(val_ped_pred_class[k])

                        label_true = str(y_orig_classes[k, j])
                        label_pred = str(
                            [round(float(i), 2) for i in ped_pred_class[k]])

                        cv2.putText(pred_seq, 'truth: ' + label_true,
                                    (2 + j * (208), 94 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)
                        cv2.putText(pred_seq, label_pred,
                                    (2 + j * (208), 114 + k * 128), font, 0.5,
                                    (255, 255, 255), 1, cv2.LINE_AA)

                cv2.imwrite(
                    os.path.join(
                        CLA_GEN_IMAGES_DIR,
                        str(epoch) + "_" + str(index) + "_cla_val_pred.png"),
                    pred_seq)

            # then after each epoch
            avg_c_loss = np.mean(np.asarray(c_loss, dtype=np.float32), axis=0)
            avg_val_c_loss = np.mean(np.asarray(val_c_loss, dtype=np.float32),
                                     axis=0)

            train_prec, train_rec, train_fbeta, train_support = get_sklearn_metrics(
                np.asarray(y_train_true),
                np.asarray(y_train_pred),
                avg='binary',
                pos_label=1)
            val_prec, val_rec, val_fbeta, val_support = get_sklearn_metrics(
                np.asarray(y_val_true),
                np.asarray(y_val_pred),
                avg='binary',
                pos_label=1)

            loss_values = np.asarray(
                avg_c_loss.tolist() + [train_prec.tolist()] +
                [train_rec.tolist()] + avg_val_c_loss.tolist() +
                [val_prec.tolist()] + [val_rec.tolist()],
                dtype=np.float32)

            precs = ['prec_' + action for action in simple_ped_set]
            recs = ['rec_' + action for action in simple_ped_set]
            fbeta = ['fbeta_' + action for action in simple_ped_set]
            c_loss_keys = [
                'c_' + metric
                for metric in classifier.metrics_names + precs + recs
            ]
            val_c_loss_keys = [
                'c_val_' + metric
                for metric in classifier.metrics_names + precs + recs
            ]

            loss_keys = c_loss_keys + val_c_loss_keys
            logs = dict(zip(loss_keys, loss_values))

            TC_cla.on_epoch_end(epoch, logs)

            # Log the losses
            with open(os.path.join(LOG_DIR, 'losses_cla.json'),
                      'a') as log_file:
                log_file.write("{\"epoch\":%d, %s\n" %
                               (epoch, str(logs).strip('{')))

            print("\nAvg c_loss: " + str(avg_c_loss) + " Avg val_c_loss: " +
                  str(avg_val_c_loss))

            print("Train Prec: %.2f, Recall: %.2f, Fbeta: %.2f" %
                  (train_prec, train_rec, train_fbeta))
            print("Val Prec: %.2f, Recall: %.2f, Fbeta: %.2f" %
                  (val_prec, val_rec, val_fbeta))

            # Save model weights per epoch to file
            classifier.save_weights(
                os.path.join(CHECKPOINT_DIR,
                             'classifier_cla_epoch_' + str(epoch) + '.h5'),
                True)
            classifier.save(
                os.path.join(CHECKPOINT_DIR, 'full_classifier_cla_epoch_' +
                             str(epoch) + '.h5'))

        print(
            get_classification_report(np.asarray(y_train_true),
                                      np.asarray(y_train_pred)))
        print(
            get_classification_report(np.asarray(y_val_true),
                                      np.asarray(y_val_pred)))
Example #16
0
def test_mtcp(CLA_WEIGHTS):

    if not os.path.exists(TEST_RESULTS_DIR + '/pred/'):
        os.mkdir(TEST_RESULTS_DIR + '/pred/')

    # Setup test
    test_frames_source = hkl.load(
        os.path.join(TEST_DATA_DIR, 'sources_test_208.hkl'))
    # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=8, frame_skip=0)
    test_videos_list = get_video_lists(frames_source=test_frames_source,
                                       stride=16,
                                       frame_skip=0)
    # test_videos_list = get_video_lists(frames_source=test_frames_source, stride=16, frame_skip=2)
    # Load test action annotations
    test_action_labels = hkl.load(
        os.path.join(TEST_DATA_DIR, 'annotations_test_208.hkl'))
    test_ped_action_classes, test_ped_class_count = get_action_classes(
        test_action_labels, mode='sigmoid')
    print("Test Stats: " + str(test_ped_class_count))

    # Build the Spatio-temporal Autoencoder
    print("Creating models.")
    # Build stacked classifier
    # classifier = pretrained_c3d()
    classifier = ensemble_c3d()
    # classifier = c3d_scratch()
    classifier.compile(
        loss="binary_crossentropy",
        optimizer=OPTIM_C,
        # metrics=[metric_precision, metric_recall, metric_mpca, 'accuracy'])
        metrics=['acc'])

    run_utilities(classifier, CLA_WEIGHTS)

    n_test_videos = test_videos_list.shape[0]

    NB_TEST_ITERATIONS = int(n_test_videos / TEST_BATCH_SIZE)
    # NB_TEST_ITERATIONS = 5

    # Setup TensorBoard Callback
    TC_cla = tb_callback.TensorBoard(log_dir=TF_LOG_CLA_DIR,
                                     histogram_freq=0,
                                     write_graph=False,
                                     write_images=False)
    LRS_clas = lrs_callback.LearningRateScheduler(schedule=schedule)
    LRS_clas.set_model(classifier)
    if CLASSIFIER:
        print("Testing Classifier...")
        # Run over test data
        print('')
        # Time to correct prediction
        tcp_list = []
        tcp_true_list = []
        tcp_pred_list = []
        y_test_pred = []
        y_test_true = []
        test_c_loss = []
        index = 0
        tcp = 1
        while index < NB_TEST_ITERATIONS:
            X, y = load_X_y(test_videos_list,
                            index,
                            TEST_DATA_DIR,
                            test_ped_action_classes,
                            batch_size=TEST_BATCH_SIZE)

            y_past_class = y[:, 0]
            y_end_class = y[:, -1]

            if y_end_class[0] == y_past_class[0]:
                index = index + 1
                continue
            else:
                stdout.write("\rIter: " + str(index) + "/" +
                             str(NB_TEST_ITERATIONS - 1))
                stdout.flush()
                for fnum in range(int(VIDEO_LENGTH / 2) + 1):

                    X, y = load_X_y(test_videos_list,
                                    index,
                                    TEST_DATA_DIR,
                                    test_ped_action_classes,
                                    batch_size=TEST_BATCH_SIZE)
                    X_test = X

                    y_true_imgs = X[:, int(VIDEO_LENGTH / 2):]
                    y_true_class = y[:, VIDEO_LENGTH - fnum - 1]
                    if y[:, 0] == y_true_class[0]:
                        break

                    if (fnum + 1 > 16):
                        tcp_pred_list.append(y_pred_class[0])
                        tcp_true_list.append(y_true_class[0])
                        break

                    y_pred_class = classifier.predict(X_test, verbose=0)
                    y_test_pred.extend(classifier.predict(X_test, verbose=0))
                    test_c_loss.append(
                        classifier.test_on_batch(X_test, y_true_class))
                    y_test_true.extend(y_true_class)

                    test_ped_pred_class = classifier.predict(X_test, verbose=0)
                    # pred_seq = arrange_images(np.concatenate((X_train, predicted_images), axis=1))
                    pred_seq = arrange_images(X_test)
                    pred_seq = pred_seq * 127.5 + 127.5

                    # Save generated images to file
                    z = encoder.predict(X_test)
                    test_predicted_images = decoder.predict(z)
                    test_ped_pred_class = sclassifier.predict(X_test,
                                                              verbose=0)
                    pred_seq = arrange_images(
                        np.concatenate((X_test, test_predicted_images),
                                       axis=1))
                    pred_seq = pred_seq * 127.5 + 127.5

                    truth_image = arrange_images(y_true_imgs)
                    truth_image = truth_image * 127.5 + 127.5

                    font = cv2.FONT_HERSHEY_SIMPLEX
                    y_orig_classes = y[:, 0:int(VIDEO_LENGTH / 2)]
                    y_true_classes = y[:, int(VIDEO_LENGTH / 2):]

                    # Add labels as text to the image
                    for k in range(TEST_BATCH_SIZE):
                        for j in range(int(VIDEO_LENGTH / 2)):
                            if y_orig_classes[k, j] > 0.5:
                                label_orig = "crossing"
                            else:
                                label_orig = "not crossing"

                            if y_true_classes[k][j] > 0.5:
                                label_true = "crossing"
                            else:
                                label_true = "not crossing"

                            if test_ped_pred_class[k][0] > 0.5:
                                label_pred = "crossing"
                            else:
                                label_pred = "not crossing"

                            cv2.putText(pred_seq, label_orig,
                                        (2 + j * (208), 114 + k * 128), font,
                                        0.5, (255, 255, 255), 1, cv2.LINE_AA)
                            cv2.putText(pred_seq, label_pred,
                                        (2 + (j + 16) * (208), 114 + k * 128),
                                        font, 0.5, (255, 255, 255), 1,
                                        cv2.LINE_AA)
                            cv2.putText(pred_seq, 'truth: ' + label_true,
                                        (2 + (j + 16) * (208), 94 + k * 128),
                                        font, 0.5, (255, 255, 255), 1,
                                        cv2.LINE_AA)
                            cv2.putText(truth_image, label_true,
                                        (2 + j * (208), 114 + k * 128), font,
                                        0.5, (255, 255, 255), 1, cv2.LINE_AA)

                    cv2.imwrite(
                        os.path.join(TEST_RESULTS_DIR + '/mtcp-pred//',
                                     str(index) + "_cla_test_pred.png"),
                        pred_seq)
                    cv2.imwrite(
                        os.path.join(TEST_RESULTS_DIR + '/mtcp-truth/',
                                     str(index) + "_cla_test_truth.png"),
                        truth_image)

                    if y_true_class[0] != np.round(y_pred_class[0]):
                        index = index + 1
                        continue
                    else:
                        tcp_pred_list.append(y_pred_class[0])
                        tcp_true_list.append(y_true_class[0])
                        tcp_list.append(fnum + 1)
                        index = index + int(VIDEO_LENGTH / 2)
                        # Break from the for loop
                        break

        # then after each epoch
        avg_test_c_loss = np.mean(np.asarray(test_c_loss, dtype=np.float32),
                                  axis=0)

        test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics(
            np.asarray(y_test_true),
            np.asarray(y_test_pred),
            avg='binary',
            pos_label=1)
        print("\nAvg test_c_loss: " + str(avg_test_c_loss))
        print("Mean time to change prediction: " +
              str(np.mean(np.asarray(tcp_list))))
        print("Standard Deviation " + str(np.std(np.asarray(tcp_list))))
        print("Number of correct predictions " + str(len(tcp_list)))
        print("Test Prec: %.4f, Recall: %.4f, Fbeta: %.4f" %
              (test_prec, test_rec, test_fbeta))

        print("Classification Report")
        print(
            get_classification_report(np.asarray(y_test_true),
                                      np.asarray(y_test_pred)))

        print("Confusion matrix")
        tn, fp, fn, tp = confusion_matrix(y_test_true,
                                          np.round(y_test_pred)).ravel()
        print("TN: %.2f, FP: %.2f, FN: %.2f, TP: %.2f" % (tn, fp, fn, tp))

        print("-------------------------------------------")
        print("Test cases where there is a change in label")

        test_prec, test_rec, test_fbeta, test_support = get_sklearn_metrics(
            np.asarray(tcp_true_list),
            np.asarray(tcp_pred_list),
            avg='binary',
            pos_label=1)
        print("Test Prec: %.4f, Recall: %.4f, Fbeta: %.4f" %
              (test_prec, test_rec, test_fbeta))

        test_acc = accuracy_score(tcp_true_list, np.round(tcp_pred_list))
        print("Test Accuracy: %.4f" % (test_acc))

        avg_prec = average_precision_score(tcp_true_list, tcp_pred_list)
        print("Average precision: %.4f" % (avg_prec))

        precisions, recalls, thresholds = precision_recall_curve(
            tcp_true_list, tcp_pred_list)
        print("PR curve precisions: " + str(precisions))
        print("PR curve recalls: " + str(recalls))
        print("PR curve thresholds: " + str(thresholds))
        print("PR curve prec mean: %.4f" % (np.mean(precisions)))
        print("PR curve prec std: %.4f" % (np.std(precisions)))
        print("Number of thresholds: %.4f" % (len(thresholds)))

        print("Classification Report")
        print(
            get_classification_report(np.asarray(tcp_true_list),
                                      np.asarray(tcp_pred_list)))

        print("Confusion matrix")
        tn, fp, fn, tp = confusion_matrix(tcp_true_list,
                                          np.round(tcp_pred_list)).ravel()
        print("TN: %.2f, FP: %.2f, FN: %.2f, TP: %.2f" % (tn, fp, fn, tp))