def eval_ae(args):
    # General
    trajectories_path = args.trajectories
    camera_id = os.path.basename(trajectories_path)
    pretrained_model_path = args.pretrained_model  # e.g. .../adam_bb-tl_mse/06_2018_09_29_00_35_20
    video_resolution = [
        float(measurement) for measurement in args.video_resolution.split('x')
    ]
    video_resolution = np.array(video_resolution, dtype=np.float32)
    frame_level_anomaly_masks_path = args.frame_level_anomaly_masks

    # Extract information about the models
    model_info = os.path.basename(os.path.split(pretrained_model_path)[0])
    global_model = 'gm' in model_info

    coordinate_system = 'global'
    if 'bb-tl' in model_info:
        coordinate_system = 'bounding_box_top_left'
    elif 'bb-c' in model_info:
        coordinate_system = 'bounding_box_centre'

    normalisation_strategy = 'zero_one'
    if '_3stds_' in model_info:
        normalisation_strategy = 'three_stds'
    elif '_robust_' in model_info:
        normalisation_strategy = 'robust'

    pretrained_ae, scaler = load_pretrained_ae(pretrained_model_path)

    # Load data
    trajectories = load_trajectories(trajectories_path)

    if global_model:
        trajectories = extract_global_features(
            trajectories, video_resolution=video_resolution)
        coordinate_system = 'global'

    trajectories = change_coordinate_system(
        trajectories,
        video_resolution=video_resolution,
        coordinate_system=coordinate_system,
        invert=False)

    trajectories_ids, frames, X = aggregate_autoencoder_evaluation_data(
        trajectories)

    X, (trajectories_ids,
        frames) = remove_missing_skeletons(X, trajectories_ids, frames)

    X, _ = scale_trajectories(X,
                              scaler=scaler,
                              strategy=normalisation_strategy)

    # Reconstruct
    reconstructed_X = pretrained_ae.predict(X)
    reconstruction_errors = compute_ae_reconstruction_errors(
        X, reconstructed_X, loss=pretrained_ae.loss)

    # Evaluate Performance
    anomaly_masks = load_anomaly_masks(frame_level_anomaly_masks_path)
    y_true, y_hat = assemble_ground_truth_and_reconstructions(
        anomaly_masks, trajectories_ids, frames, reconstruction_errors)

    auroc, aupr = roc_auc_score(y_true,
                                y_hat), average_precision_score(y_true, y_hat)

    print('Camera %s:\tAUROC\tAUPR' % camera_id)
    print('          \t%.4f\t%.4f\n' % (auroc, aupr))

    # Logging

    return y_true, y_hat
Ejemplo n.º 2
0
def produce_data_from_model_type(model_type,
                                 trajectories,
                                 video_resolution,
                                 input_length,
                                 input_gap,
                                 pred_length,
                                 normalisation_strategy,
                                 coordinate_system,
                                 input_missing_steps=False,
                                 extract_delta=False,
                                 use_first_step_as_reference=False):
    trajectories_train, trajectories_val = split_into_train_and_test(
        trajectories, train_ratio=0.8, seed=42)

    if input_missing_steps:
        trajectories_train = input_trajectories_missing_steps(
            trajectories_train)
        print('\nInputted missing steps of trajectories.')

    if model_type == 'concatenate':
        trajectories_global_train = extract_global_features(
            deepcopy(trajectories_train),
            video_resolution=video_resolution,
            extract_delta=extract_delta,
            use_first_step_as_reference=use_first_step_as_reference)
        trajectories_global_train = change_coordinate_system(
            trajectories_global_train,
            video_resolution=video_resolution,
            coordinate_system='global',
            invert=False)

        trajectories_global_val = extract_global_features(
            deepcopy(trajectories_val),
            video_resolution=video_resolution,
            extract_delta=extract_delta,
            use_first_step_as_reference=use_first_step_as_reference)
        trajectories_global_val = change_coordinate_system(
            trajectories_global_val,
            video_resolution=video_resolution,
            coordinate_system='global',
            invert=False)

        trajectories_local_train, trajectories_local_val = trajectories_train, trajectories_val
        trajectories_local_train = change_coordinate_system(
            trajectories_local_train,
            video_resolution=video_resolution,
            coordinate_system='bounding_box_centre',
            invert=False)
        trajectories_local_val = change_coordinate_system(
            trajectories_local_val,
            video_resolution=video_resolution,
            coordinate_system='bounding_box_centre',
            invert=False)

        X_ref = np.concatenate(
            (aggregate_autoencoder_data(trajectories_global_train),
             aggregate_autoencoder_data(trajectories_local_train)),
            axis=-1)
        _, scaler = scale_trajectories(X_ref, strategy=normalisation_strategy)

        X_global_train, y_global_train = aggregate_rnn_autoencoder_data(
            trajectories_global_train,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length)
        X_global_val, y_global_val = aggregate_rnn_autoencoder_data(
            trajectories_global_val,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length)

        X_local_train, y_local_train = aggregate_rnn_autoencoder_data(
            trajectories_local_train,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length)
        X_local_val, y_local_val = aggregate_rnn_autoencoder_data(
            trajectories_local_val,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length)

        X_train = np.concatenate((X_global_train, X_local_train), axis=-1)
        X_val = np.concatenate((X_global_val, X_local_val), axis=-1)

        X_train, _ = scale_trajectories(X_train,
                                        scaler=scaler,
                                        strategy=normalisation_strategy)
        X_val, _ = scale_trajectories(X_val,
                                      scaler=scaler,
                                      strategy=normalisation_strategy)

        if y_global_train is not None:
            y_train = np.concatenate((y_global_train, y_local_train), axis=-1)
            y_val = np.concatenate((y_global_val, y_local_val), axis=-1)

            y_train, _ = scale_trajectories(y_train,
                                            scaler=scaler,
                                            strategy=normalisation_strategy)
            y_val, _ = scale_trajectories(y_val,
                                          scaler=scaler,
                                          strategy=normalisation_strategy)
        else:
            y_train = y_val = None

        if y_train is not None:
            X_train, y_train = shuffle(X_train, y_train, random_state=42)
        else:
            X_train = shuffle(X_train, random_state=42)
    else:
        if model_type == 'global':
            trajectories_train = extract_global_features(
                trajectories_train,
                video_resolution=video_resolution,
                extract_delta=extract_delta,
                use_first_step_as_reference=use_first_step_as_reference)
            trajectories_val = extract_global_features(
                trajectories_val,
                video_resolution=video_resolution,
                extract_delta=extract_delta,
                use_first_step_as_reference=use_first_step_as_reference)
            coordinate_system = 'global'
            print(
                '\nExtracted global features from input trajectories. In addition, the coordinate system has been '
                'set to global.')

        trajectories_train = change_coordinate_system(
            trajectories_train,
            video_resolution=video_resolution,
            coordinate_system=coordinate_system,
            invert=False)
        trajectories_val = change_coordinate_system(
            trajectories_val,
            video_resolution=video_resolution,
            coordinate_system=coordinate_system,
            invert=False)
        print('\nChanged coordinate system to %s.' % coordinate_system)

        _, scaler = scale_trajectories(
            aggregate_autoencoder_data(trajectories_train),
            strategy=normalisation_strategy)

        X_train, y_train = aggregate_rnn_autoencoder_data(
            trajectories_train,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length)
        if y_train is not None:
            X_train, y_train = shuffle(X_train, y_train, random_state=42)
        else:
            X_train, y_train = shuffle(X_train, random_state=42), None
        X_val, y_val = aggregate_rnn_autoencoder_data(
            trajectories_val,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length)

        X_train, _ = scale_trajectories(X_train,
                                        scaler=scaler,
                                        strategy=normalisation_strategy)
        X_val, _ = scale_trajectories(X_val,
                                      scaler=scaler,
                                      strategy=normalisation_strategy)
        if y_train is not None and y_val is not None:
            y_train, _ = scale_trajectories(y_train,
                                            scaler=scaler,
                                            strategy=normalisation_strategy)
            y_val, _ = scale_trajectories(y_val,
                                          scaler=scaler,
                                          strategy=normalisation_strategy)
        print(
            '\nNormalised input features using the %s normalisation strategy.'
            % normalisation_strategy)

    return X_train, y_train, X_val, y_val, scaler
Ejemplo n.º 3
0
def train_ae(args):
    # General
    trajectories_path = args.trajectories  # e.g. .../03
    camera_id = os.path.basename(trajectories_path)
    video_resolution = [
        float(measurement) for measurement in args.video_resolution.split('x')
    ]
    video_resolution = np.array(video_resolution, dtype=np.float32)
    # Architecture
    global_model = args.global_model
    hidden_dims = args.hidden_dims
    output_activation = args.output_activation
    coordinate_system = args.coordinate_system
    normalisation_strategy = args.normalisation_strategy
    # Training
    optimiser = args.optimiser
    learning_rate = args.learning_rate
    loss = args.loss
    epochs = args.epochs
    batch_size = args.batch_size
    # Logging
    root_log_dir = args.root_log_dir
    resume_training = args.resume_training

    trajectories = load_trajectories(trajectories_path)
    print('\nLoaded %d trajectories.' % len(trajectories))

    if global_model:
        trajectories = extract_global_features(
            trajectories, video_resolution=video_resolution)
        coordinate_system = 'global'
        print(
            '\nExtracted global features from input skeletons. In addition, the coordinate system has been set '
            'to global.')

    trajectories = change_coordinate_system(
        trajectories,
        video_resolution=video_resolution,
        coordinate_system=coordinate_system,
        invert=False)
    print('\nChanged coordinate system to %s.' % coordinate_system)

    trajectories_train, trajectories_val = split_into_train_and_test(
        trajectories, train_ratio=0.8, seed=42)

    X_train = shuffle(aggregate_autoencoder_data(trajectories_train),
                      random_state=42)
    X_val = aggregate_autoencoder_data(trajectories_val)

    X_train, scaler = scale_trajectories(X_train,
                                         strategy=normalisation_strategy)
    X_val, _ = scale_trajectories(X_val,
                                  scaler=scaler,
                                  strategy=normalisation_strategy)
    print('\nNormalised input features using the %s normalisation strategy.' %
          normalisation_strategy)

    input_dim = X_train.shape[-1]
    ae = Autoencoder(input_dim=input_dim,
                     hidden_dims=hidden_dims,
                     output_activation=output_activation,
                     optimiser=optimiser,
                     learning_rate=learning_rate,
                     loss=loss)

    log_dir = set_up_logging(camera_id=camera_id,
                             root_log_dir=root_log_dir,
                             resume_training=resume_training)
    last_epoch = resume_training_from_last_epoch(
        model=ae, resume_training=resume_training)

    ae.train(X_train,
             X_train,
             epochs=epochs,
             initial_epoch=last_epoch,
             batch_size=batch_size,
             val_data=(X_val, X_val),
             log_dir=log_dir)
    print('Autoencoder anomaly model successfully trained.')

    if log_dir is not None:
        file_name = os.path.join(log_dir, 'scaler.pkl')
        joblib.dump(scaler, filename=file_name)
        print('log files were written to: %s' % log_dir)

    return ae, scaler
def train_combined_model(args):
    # General
    trajectories_path = args.trajectories
    camera_id = os.path.basename(trajectories_path)
    video_resolution = [float(measurement) for measurement in args.video_resolution.split('x')]
    video_resolution = np.array(video_resolution, dtype=np.float32)
    # Architecture
    message_passing = args.message_passing
    reconstruct_original_data = args.reconstruct_original_data
    multiple_outputs = args.multiple_outputs
    multiple_outputs_before_concatenation = args.multiple_outputs_before_concatenation
    input_length = args.input_length
    rec_length = args.rec_length
    pred_length = args.pred_length
    global_hidden_dims = args.global_hidden_dims
    local_hidden_dims = args.local_hidden_dims
    extra_hidden_dims = args.extra_hidden_dims
    output_activation = args.output_activation
    cell_type = args.cell_type
    reconstruct_reverse = args.reconstruct_reverse
    # Training
    optimiser = args.optimiser
    learning_rate = args.learning_rate
    loss = args.loss
    l1_reg = args.l1_reg
    l2_reg = args.l2_reg
    epochs = args.epochs
    batch_size = args.batch_size
    input_missing_steps = args.input_missing_steps
    global_normalisation_strategy = args.global_normalisation_strategy
    local_normalisation_strategy = args.local_normalisation_strategy
    out_normalisation_strategy = args.out_normalisation_strategy
    # Logging
    root_log_dir = args.root_log_dir
    resume_training = args.resume_training

    trajectories = load_trajectories(trajectories_path)
    print('\nLoaded %d trajectories.' % len(trajectories))

    trajectories = remove_short_trajectories(trajectories, input_length=input_length,
                                             input_gap=0, pred_length=pred_length)
    print('\nRemoved short trajectories. Number of trajectories left: %d.' % len(trajectories))

    trajectories_train, trajectories_val = split_into_train_and_test(trajectories, train_ratio=0.8, seed=42)

    if input_missing_steps:
        trajectories_train = input_trajectories_missing_steps(trajectories_train)
        print('\nInputted missing steps of trajectories.')

    # TODO: General function to extract features
    # X_..._train, X_..._val, y_..._train, y_..._val, ..._scaler = general_function()

    # Global
    global_trajectories_train = extract_global_features(deepcopy(trajectories_train), video_resolution=video_resolution)
    global_trajectories_val = extract_global_features(deepcopy(trajectories_val), video_resolution=video_resolution)

    global_trajectories_train = change_coordinate_system(global_trajectories_train, video_resolution=video_resolution,
                                                         coordinate_system='global', invert=False)
    global_trajectories_val = change_coordinate_system(global_trajectories_val, video_resolution=video_resolution,
                                                       coordinate_system='global', invert=False)
    print('\nChanged global trajectories\'s coordinate system to global.')

    _, global_scaler = scale_trajectories(aggregate_autoencoder_data(global_trajectories_train),
                                          strategy=global_normalisation_strategy)

    X_global_train, y_global_train = aggregate_rnn_autoencoder_data(global_trajectories_train,
                                                                    input_length=input_length,
                                                                    input_gap=0, pred_length=pred_length)
    X_global_val, y_global_val = aggregate_rnn_autoencoder_data(global_trajectories_val, input_length=input_length,
                                                                input_gap=0, pred_length=pred_length)

    X_global_train, _ = scale_trajectories(X_global_train, scaler=global_scaler, strategy=global_normalisation_strategy)
    X_global_val, _ = scale_trajectories(X_global_val, scaler=global_scaler, strategy=global_normalisation_strategy)
    if y_global_train is not None and y_global_val is not None:
        y_global_train, _ = scale_trajectories(y_global_train, scaler=global_scaler,
                                               strategy=global_normalisation_strategy)
        y_global_val, _ = scale_trajectories(y_global_val, scaler=global_scaler, strategy=global_normalisation_strategy)
    print('\nNormalised global trajectories using the %s normalisation strategy.' % global_normalisation_strategy)

    # Local
    local_trajectories_train = deepcopy(trajectories_train) if reconstruct_original_data else trajectories_train
    local_trajectories_val = deepcopy(trajectories_val) if reconstruct_original_data else trajectories_val

    local_trajectories_train = change_coordinate_system(local_trajectories_train, video_resolution=video_resolution,
                                                        coordinate_system='bounding_box_centre', invert=False)
    local_trajectories_val = change_coordinate_system(local_trajectories_val, video_resolution=video_resolution,
                                                      coordinate_system='bounding_box_centre', invert=False)
    print('\nChanged local trajectories\'s coordinate system to bounding_box_centre.')

    _, local_scaler = scale_trajectories(aggregate_autoencoder_data(local_trajectories_train),
                                         strategy=local_normalisation_strategy)

    X_local_train, y_local_train = aggregate_rnn_autoencoder_data(local_trajectories_train, input_length=input_length,
                                                                  input_gap=0, pred_length=pred_length)
    X_local_val, y_local_val = aggregate_rnn_autoencoder_data(local_trajectories_val, input_length=input_length,
                                                              input_gap=0, pred_length=pred_length)

    X_local_train, _ = scale_trajectories(X_local_train, scaler=local_scaler, strategy=local_normalisation_strategy)
    X_local_val, _ = scale_trajectories(X_local_val, scaler=local_scaler, strategy=local_normalisation_strategy)
    if y_local_train is not None and y_local_val is not None:
        y_local_train, _ = scale_trajectories(y_local_train, scaler=local_scaler, strategy=local_normalisation_strategy)
        y_local_val, _ = scale_trajectories(y_local_val, scaler=local_scaler, strategy=local_normalisation_strategy)
    print('\nNormalised local trajectories using the %s normalisation strategy.' % local_normalisation_strategy)

    # (Optional) Reconstruct the original data
    if reconstruct_original_data:
        print('\nReconstruction/Prediction target is the original data.')
        out_trajectories_train = trajectories_train
        out_trajectories_val = trajectories_val

        out_trajectories_train = change_coordinate_system(out_trajectories_train, video_resolution=video_resolution,
                                                          coordinate_system='global', invert=False)
        out_trajectories_val = change_coordinate_system(out_trajectories_val, video_resolution=video_resolution,
                                                        coordinate_system='global', invert=False)
        print('\nChanged target trajectories\'s coordinate system to global.')

        _, out_scaler = scale_trajectories(aggregate_autoencoder_data(out_trajectories_train),
                                           strategy=out_normalisation_strategy)

        X_out_train, y_out_train = aggregate_rnn_autoencoder_data(out_trajectories_train, input_length=input_length,
                                                                  input_gap=0, pred_length=pred_length)
        X_out_val, y_out_val = aggregate_rnn_autoencoder_data(out_trajectories_val, input_length=input_length,
                                                              input_gap=0, pred_length=pred_length)

        X_out_train, _ = scale_trajectories(X_out_train, scaler=out_scaler, strategy=out_normalisation_strategy)
        X_out_val, _ = scale_trajectories(X_out_val, scaler=out_scaler, strategy=out_normalisation_strategy)
        if y_out_train is not None and y_out_val is not None:
            y_out_train, _ = scale_trajectories(y_out_train, scaler=out_scaler, strategy=out_normalisation_strategy)
            y_out_val, _ = scale_trajectories(y_out_val, scaler=out_scaler, strategy=out_normalisation_strategy)
        print('\nNormalised target trajectories using the %s normalisation strategy.' % out_normalisation_strategy)

    # Shuffle training data and assemble training and validation sets
    if y_global_train is not None:
        if reconstruct_original_data:
            X_global_train, X_local_train, X_out_train, y_global_train, y_local_train, y_out_train = \
                shuffle(X_global_train, X_local_train, X_out_train,
                        y_global_train, y_local_train, y_out_train, random_state=42)
            X_train = [X_global_train, X_local_train, X_out_train]
            y_train = [y_global_train, y_local_train, y_out_train]
            val_data = ([X_global_val, X_local_val, X_out_val], [y_global_val, y_local_val, y_out_val])
        else:
            X_global_train, X_local_train, y_global_train, y_local_train = \
                shuffle(X_global_train, X_local_train, y_global_train, y_local_train, random_state=42)
            X_train = [X_global_train, X_local_train]
            y_train = [y_global_train, y_local_train]
            val_data = ([X_global_val, X_local_val], [y_global_val, y_local_val])
    else:
        if reconstruct_original_data:
            X_global_train, X_local_train, X_out_train = \
                shuffle(X_global_train, X_local_train, X_out_train, random_state=42)
            X_train = [X_global_train, X_local_train, X_out_train]
            y_train = None
            val_data = ([X_global_val, X_local_val, X_out_val],)
        else:
            X_global_train, X_local_train = shuffle(X_global_train, X_local_train, random_state=42)
            X_train = [X_global_train, X_local_train]
            y_train = None
            val_data = ([X_global_val, X_local_val],)

    # Model
    print('\nInstantiating combined anomaly model ...')
    global_input_dim = X_global_train.shape[-1]
    local_input_dim = X_local_train.shape[-1]
    model_args = {'input_length': input_length, 'global_input_dim': global_input_dim,
                  'local_input_dim': local_input_dim, 'reconstruction_length': rec_length,
                  'prediction_length': pred_length, 'global_hidden_dims': global_hidden_dims,
                  'local_hidden_dims': local_hidden_dims, 'extra_hidden_dims': extra_hidden_dims,
                  'output_activation': output_activation, 'cell_type': cell_type,
                  'reconstruct_reverse': reconstruct_reverse, 'reconstruct_original_data': reconstruct_original_data,
                  'multiple_outputs': multiple_outputs,
                  'multiple_outputs_before_concatenation': multiple_outputs_before_concatenation,
                  'optimiser': optimiser, 'learning_rate': learning_rate, 'loss': loss,
                  'l1_reg': l1_reg, 'l2_reg': l2_reg}
    if message_passing:
        combined_rnn_ae = MessagePassingEncoderDecoder(**model_args)
    else:
        combined_rnn_ae = CombinedEncoderDecoder(**model_args)

    log_dir = set_up_logging(camera_id=camera_id, root_log_dir=root_log_dir, resume_training=resume_training)
    last_epoch = resume_training_from_last_epoch(model=combined_rnn_ae, resume_training=resume_training)

    combined_rnn_ae.train(X_train, y_train, epochs=epochs, initial_epoch=last_epoch, batch_size=batch_size,
                          val_data=val_data, log_dir=log_dir)
    print('\nCombined anomaly model successfully trained.')

    if log_dir is not None:
        global_scaler_file_name = os.path.join(log_dir, 'global_scaler.pkl')
        local_scaler_file_name = os.path.join(log_dir, 'local_scaler.pkl')
        joblib.dump(global_scaler, filename=global_scaler_file_name)
        joblib.dump(local_scaler, filename=local_scaler_file_name)
        if reconstruct_original_data:
            out_scaler_file_name = os.path.join(log_dir, 'out_scaler.pkl')
            joblib.dump(out_scaler, filename=out_scaler_file_name)
        print('log files were written to: %s' % log_dir)

    if reconstruct_original_data:
        return combined_rnn_ae, global_scaler, local_scaler, out_scaler

    return combined_rnn_ae, global_scaler, local_scaler
def eval_combined_model(args):
    trajectories_path = args.trajectories  # e.g. .../optflow/alphapose/07
    camera_id = os.path.basename(trajectories_path)
    pretrained_model_path = args.pretrained_model  # e.g. .../16_0_2_rrs_mse/07_2018_09_20_13_15_13
    video_resolution = [
        float(measurement) for measurement in args.video_resolution.split('x')
    ]
    video_resolution = np.array(video_resolution, dtype=np.float32)
    frame_level_anomaly_masks_path = args.frame_level_anomaly_masks
    overlapping_trajectories = args.overlapping_trajectories
    # Logging
    write_reconstructions = args.write_reconstructions
    write_bounding_boxes = args.write_bounding_boxes
    write_predictions = args.write_predictions
    write_predictions_bounding_boxes = args.write_predictions_bounding_boxes
    write_anomaly_masks = args.write_anomaly_masks
    write_mistakes = args.write_mistakes

    model_info = os.path.basename(os.path.split(pretrained_model_path)[0])
    message_passing = 'mp' in model_info
    is_avenue = 'Avenue' in trajectories_path

    pretrained_combined_model, global_scaler, local_scaler, out_scaler = \
        load_pretrained_combined_model(pretrained_model_path, message_passing=message_passing)

    # Extract information about the models
    reconstruct_original_data = 'down' in model_info
    global_normalisation_strategy = 'zero_one'
    if '_G3stds_' in model_info:
        global_normalisation_strategy = 'three_stds'
    elif '_Grobust_' in model_info:
        global_normalisation_strategy = 'robust'

    local_normalisation_strategy = 'zero_one'
    if '_L3stds_' in model_info:
        local_normalisation_strategy = 'three_stds'
    elif '_Lrobust_' in model_info:
        local_normalisation_strategy = 'robust'

    out_normalisation_strategy = 'zero_one'
    if '_O3stds_' in model_info:
        out_normalisation_strategy = 'three_stds'
    elif '_Orobust_' in model_info:
        out_normalisation_strategy = 'robust'

    multiple_outputs = pretrained_combined_model.multiple_outputs
    input_length, rec_length = pretrained_combined_model.input_length, pretrained_combined_model.reconstruction_length
    input_gap, pred_length = 0, pretrained_combined_model.prediction_length
    reconstruct_reverse = pretrained_combined_model.reconstruct_reverse
    loss = pretrained_combined_model.loss

    # Data
    trajectories = load_trajectories(trajectories_path)

    trajectories = remove_short_trajectories(trajectories,
                                             input_length=input_length,
                                             input_gap=input_gap,
                                             pred_length=pred_length)

    global_trajectories = extract_global_features(
        deepcopy(trajectories), video_resolution=video_resolution)
    global_trajectories = change_coordinate_system(
        global_trajectories,
        video_resolution=video_resolution,
        coordinate_system='global',
        invert=False)
    trajectories_ids, frames, X_global = \
        aggregate_rnn_ae_evaluation_data(global_trajectories,
                                         input_length=input_length,
                                         input_gap=input_gap,
                                         pred_length=pred_length,
                                         overlapping_trajectories=overlapping_trajectories)
    X_global, _ = scale_trajectories(X_global,
                                     scaler=global_scaler,
                                     strategy=global_normalisation_strategy)

    local_trajectories = deepcopy(trajectories)
    local_trajectories = change_coordinate_system(
        local_trajectories,
        video_resolution=video_resolution,
        coordinate_system='bounding_box_centre',
        invert=False)
    _, _, X_local = aggregate_rnn_ae_evaluation_data(
        local_trajectories,
        input_length=input_length,
        input_gap=input_gap,
        pred_length=pred_length,
        overlapping_trajectories=overlapping_trajectories)
    X_local, _ = scale_trajectories(X_local,
                                    scaler=local_scaler,
                                    strategy=local_normalisation_strategy)

    original_trajectories = deepcopy(trajectories)
    _, _, X_original = aggregate_rnn_ae_evaluation_data(
        original_trajectories,
        input_length=input_length,
        input_gap=input_gap,
        pred_length=pred_length,
        overlapping_trajectories=overlapping_trajectories)

    if reconstruct_original_data:
        out_trajectories = trajectories
        out_trajectories = change_coordinate_system(
            out_trajectories,
            video_resolution=video_resolution,
            coordinate_system='global',
            invert=False)
        _, _, X_out = aggregate_rnn_ae_evaluation_data(
            out_trajectories,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length,
            overlapping_trajectories=overlapping_trajectories)
        X_out, _ = scale_trajectories(X_out,
                                      scaler=out_scaler,
                                      strategy=out_normalisation_strategy)

    # Reconstruct
    X_input = [X_global, X_local]
    if pred_length == 0:
        if multiple_outputs:
            _, _, reconstructed_X = pretrained_combined_model.predict(
                X_input, batch_size=1024)
        else:
            reconstructed_X = pretrained_combined_model.predict(
                X_input, batch_size=1024)
    else:
        if multiple_outputs:
            _, _, reconstructed_X, _, _, predicted_y = \
                pretrained_combined_model.predict(X_input, batch_size=1024)
        else:
            reconstructed_X, predicted_y = pretrained_combined_model.predict(
                X_input, batch_size=1024)

    if reconstruct_reverse:
        reconstructed_X = reconstructed_X[:, ::-1, :]

    X = X_out if reconstruct_original_data else np.concatenate(
        (X_global, X_local), axis=-1)
    reconstruction_errors = compute_rnn_ae_reconstruction_errors(
        X[:, :rec_length, :], reconstructed_X, loss)
    reconstruction_ids, reconstruction_frames, reconstruction_errors = \
        summarise_reconstruction_errors(reconstruction_errors, frames[:, :rec_length], trajectories_ids[:, :rec_length])

    # Evaluate performance
    anomaly_masks = load_anomaly_masks(frame_level_anomaly_masks_path)
    y_true, y_hat, video_ids = assemble_ground_truth_and_reconstructions(
        anomaly_masks,
        reconstruction_ids,
        reconstruction_frames,
        reconstruction_errors,
        return_video_ids=True)

    if is_avenue:
        auroc, aupr = roc_auc_score(
            y_true[AVENUE_MASK], y_hat[AVENUE_MASK]), average_precision_score(
                y_true[AVENUE_MASK], y_hat[AVENUE_MASK])
    else:
        auroc, aupr = roc_auc_score(y_true, y_hat), average_precision_score(
            y_true, y_hat)

    print('Reconstruction Based:')
    print('Camera %s:\tAUROC\tAUPR' % camera_id)
    print('          \t%.4f\t%.4f\n' % (auroc, aupr))

    if pred_length > 0:
        predicted_frames = frames[:, :pred_length] + input_length
        predicted_ids = trajectories_ids[:, :pred_length]

        y = retrieve_future_skeletons(trajectories_ids, X, pred_length)

        pred_errors = compute_rnn_ae_reconstruction_errors(
            y, predicted_y, loss)

        pred_ids, pred_frames, pred_errors = discard_information_from_padded_frames(
            predicted_ids, predicted_frames, pred_errors, pred_length)

        pred_ids, pred_frames, pred_errors = summarise_reconstruction_errors(
            pred_errors, pred_frames, pred_ids)

        y_true_pred, y_hat_pred = assemble_ground_truth_and_reconstructions(
            anomaly_masks, pred_ids, pred_frames, pred_errors)
        if is_avenue:
            auroc, aupr = roc_auc_score(
                y_true_pred[AVENUE_MASK],
                y_hat_pred[AVENUE_MASK]), average_precision_score(
                    y_true_pred[AVENUE_MASK], y_hat_pred[AVENUE_MASK])
        else:
            auroc, aupr = roc_auc_score(y_true_pred,
                                        y_hat_pred), average_precision_score(
                                            y_true_pred, y_hat_pred)

        print('Prediction Based:')
        print('Camera %s:\tAUROC\tAUPR' % camera_id)
        print('          \t%.4f\t%.4f\n' % (auroc, aupr))

        y_true_comb, y_hat_comb = y_true, y_hat + y_hat_pred
        if is_avenue:
            auroc, aupr = roc_auc_score(
                y_true_comb[AVENUE_MASK],
                y_hat_comb[AVENUE_MASK]), average_precision_score(
                    y_true_comb[AVENUE_MASK], y_hat_comb[AVENUE_MASK])
        else:
            auroc, aupr = roc_auc_score(y_true_comb,
                                        y_hat_comb), average_precision_score(
                                            y_true_comb, y_hat_comb)

        print('Reconstruction + Prediction Based:')
        print('Camera %s:\tAUROC\tAUPR' % camera_id)
        print('          \t%.4f\t%.4f\n' % (auroc, aupr))

        if reconstruct_original_data:
            predicted_y_traj = inverse_scale(predicted_y, scaler=out_scaler)
            predicted_y_traj = restore_global_coordinate_system(
                predicted_y_traj, video_resolution=video_resolution)
        else:
            predicted_y_global = inverse_scale(predicted_y[..., :4],
                                               scaler=global_scaler)
            predicted_y_local = inverse_scale(predicted_y[..., 4:],
                                              scaler=local_scaler)
            predicted_y_global = restore_global_coordinate_system(
                predicted_y_global, video_resolution=video_resolution)
            predicted_y_traj = restore_original_trajectory(
                predicted_y_global, predicted_y_local)

        prediction_ids, prediction_frames, predicted_y_traj = \
            summarise_reconstruction(predicted_y_traj, predicted_frames, predicted_ids)

        predicted_bounding_boxes = np.apply_along_axis(
            compute_bounding_box,
            axis=1,
            arr=predicted_y_traj,
            video_resolution=video_resolution)

    # Post-Processing
    if reconstruct_original_data:
        reconstructed_X_traj = inverse_scale(reconstructed_X,
                                             scaler=out_scaler)
        reconstructed_X_traj = restore_global_coordinate_system(
            reconstructed_X_traj, video_resolution=video_resolution)
    else:
        reconstructed_X_global = inverse_scale(reconstructed_X[..., :4],
                                               scaler=global_scaler)
        reconstructed_X_local = inverse_scale(reconstructed_X[..., 4:],
                                              scaler=local_scaler)

        reconstructed_X_global = restore_global_coordinate_system(
            reconstructed_X_global, video_resolution=video_resolution)
        reconstructed_X_traj = restore_original_trajectory(
            reconstructed_X_global, reconstructed_X_local)

    reconstruction_ids, reconstruction_frames, reconstructed_X_traj = \
        summarise_reconstruction(reconstructed_X_traj, frames[:, :rec_length], trajectories_ids[:, :rec_length])

    reconstructed_bounding_boxes = np.apply_along_axis(
        compute_bounding_box,
        axis=1,
        arr=reconstructed_X_traj,
        video_resolution=video_resolution)

    worst_false_positives = compute_worst_mistakes(
        y_true=y_true_pred,
        y_hat=y_hat_pred,
        video_ids=video_ids,
        error_type='false_positives',
        top=25)
    worst_false_negatives = compute_worst_mistakes(
        y_true=y_true_pred,
        y_hat=y_hat_pred,
        video_ids=video_ids,
        error_type='false_negatives',
        top=25)

    # Logging
    if write_reconstructions:
        write_reconstructed_trajectories(pretrained_model_path,
                                         reconstructed_X_traj,
                                         reconstruction_ids,
                                         reconstruction_frames,
                                         trajectory_type='skeleton')

    if write_bounding_boxes:
        write_reconstructed_trajectories(pretrained_model_path,
                                         reconstructed_bounding_boxes,
                                         reconstruction_ids,
                                         reconstruction_frames,
                                         trajectory_type='bounding_box')

    if write_predictions:
        write_reconstructed_trajectories(pretrained_model_path,
                                         predicted_y_traj,
                                         prediction_ids,
                                         prediction_frames,
                                         trajectory_type='predicted_skeleton')

    if write_predictions_bounding_boxes:
        write_reconstructed_trajectories(
            pretrained_model_path,
            predicted_bounding_boxes,
            prediction_ids,
            prediction_frames,
            trajectory_type='predicted_bounding_box')

    if write_anomaly_masks:
        anomalous_frames = detect_most_anomalous_or_most_normal_frames(
            reconstruction_errors, anomalous=True, fraction=0.20)
        normal_frames = detect_most_anomalous_or_most_normal_frames(
            reconstruction_errors, anomalous=False, fraction=0.20)
        num_frames_per_video = compute_num_frames_per_video(anomaly_masks)
        write_predicted_masks(pretrained_model_path, num_frames_per_video,
                              anomalous_frames, normal_frames,
                              reconstructed_bounding_boxes, reconstruction_ids,
                              reconstruction_frames, video_resolution)

    if write_mistakes:
        write_worst_mistakes(pretrained_model_path,
                             worst_false_positives=worst_false_positives,
                             worst_false_negatives=worst_false_negatives)

    if pred_length > 0:
        return y_true, y_hat, y_true_pred, y_hat_pred, y_true_comb, y_hat_comb
    else:
        return y_true, y_hat, None, None, None, None
Ejemplo n.º 6
0
def eval_rnn_ae(args):
    # General
    trajectories_path = args.trajectories  # e.g. .../optflow/alphapose/08
    camera_id = os.path.basename(trajectories_path)
    pretrained_model_path = args.pretrained_model  # e.g. .../16_0_2_rrs_bb-c_3stds_mse/08_2018_10_02_11_39_20
    video_resolution = [
        float(measurement) for measurement in args.video_resolution.split('x')
    ]
    video_resolution = np.array(video_resolution, dtype=np.float32)
    frame_level_anomaly_masks_path = args.frame_level_anomaly_masks
    overlapping_trajectories = args.overlapping_trajectories

    # Load Pre-Trained Model
    pretrained_rnn_ae, scaler = load_pretrained_rnn_ae(pretrained_model_path)

    # Extract information about the model
    model_info = os.path.basename(os.path.split(pretrained_model_path)[0])
    global_model = '_gm_' in model_info
    extract_delta = '_ed_' in model_info
    use_first_step_as_reference = '_ufsar_' in model_info
    concatenate_model = '_cm_' in model_info

    coordinate_system = 'global'
    if '_bb-tl_' in model_info:
        coordinate_system = 'bounding_box_top_left'
    elif '_bb-c_' in model_info:
        coordinate_system = 'bounding_box_centre'

    normalisation_strategy = 'zero_one'
    if '_3stds_' in model_info:
        normalisation_strategy = 'three_stds'
    elif '_robust_' in model_info:
        normalisation_strategy = 'robust'

    input_length, input_gap = pretrained_rnn_ae.input_length, pretrained_rnn_ae.input_gap
    rec_length, pred_length = pretrained_rnn_ae.reconstruction_length, pretrained_rnn_ae.prediction_length
    reconstruct_reverse = pretrained_rnn_ae.reconstruct_reverse
    loss = pretrained_rnn_ae.loss

    # Data pre-processing
    trajectories = load_trajectories(trajectories_path)

    trajectories = remove_short_trajectories(trajectories,
                                             input_length=input_length,
                                             input_gap=input_gap,
                                             pred_length=pred_length)

    if concatenate_model:
        trajectories_global = extract_global_features(
            deepcopy(trajectories),
            video_resolution=video_resolution,
            extract_delta=extract_delta,
            use_first_step_as_reference=use_first_step_as_reference)
        trajectories_global = change_coordinate_system(
            trajectories_global,
            video_resolution=video_resolution,
            coordinate_system='global',
            invert=False)
        trajectories_ids, frames, X_global = \
            aggregate_rnn_ae_evaluation_data(trajectories_global,
                                             input_length=input_length,
                                             input_gap=input_gap,
                                             pred_length=pred_length,
                                             overlapping_trajectories=overlapping_trajectories)

        trajectories_local = trajectories
        trajectories_local = change_coordinate_system(
            trajectories_local,
            video_resolution=video_resolution,
            coordinate_system='bounding_box_centre',
            invert=False)
        _, _, X_local = aggregate_rnn_ae_evaluation_data(
            trajectories_local,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length,
            overlapping_trajectories=overlapping_trajectories)

        X = np.concatenate((X_global, X_local), axis=-1)
        X, _ = scale_trajectories(X,
                                  scaler=scaler,
                                  strategy=normalisation_strategy)
    else:
        if global_model:
            trajectories = extract_global_features(
                trajectories,
                video_resolution=video_resolution,
                extract_delta=extract_delta,
                use_first_step_as_reference=use_first_step_as_reference)
            coordinate_system = 'global'

        trajectories = change_coordinate_system(
            trajectories,
            video_resolution=video_resolution,
            coordinate_system=coordinate_system,
            invert=False)

        trajectories_ids, frames, X = aggregate_rnn_ae_evaluation_data(
            trajectories,
            input_length=input_length,
            input_gap=input_gap,
            pred_length=pred_length,
            overlapping_trajectories=overlapping_trajectories)

        X, _ = scale_trajectories(X,
                                  scaler=scaler,
                                  strategy=normalisation_strategy)

    # Reconstruct
    if pred_length == 0:
        reconstructed_X = pretrained_rnn_ae.predict(X)
    else:
        reconstructed_X, predicted_y = pretrained_rnn_ae.predict(X)

    if reconstruct_reverse:
        reconstructed_X = reconstructed_X[:, ::-1, :]

    reconstruction_errors = compute_rnn_ae_reconstruction_errors(
        X[:, :rec_length, :], reconstructed_X, loss)
    reconstruction_ids, reconstruction_frames, reconstruction_errors = \
        summarise_reconstruction_errors(reconstruction_errors, frames[:, :rec_length], trajectories_ids[:, :rec_length])

    # Evaluate performance
    anomaly_masks = load_anomaly_masks(frame_level_anomaly_masks_path)
    y_true, y_hat = assemble_ground_truth_and_reconstructions(
        anomaly_masks, reconstruction_ids, reconstruction_frames,
        reconstruction_errors)

    auroc, aupr = roc_auc_score(y_true,
                                y_hat), average_precision_score(y_true, y_hat)

    print('Reconstruction Based:')
    print('Camera %s:\tAUROC\tAUPR' % camera_id)
    print('          \t%.4f\t%.4f\n' % (auroc, aupr))

    # Future Prediction
    if pred_length > 0:
        predicted_frames = frames[:, :pred_length] + input_length
        predicted_ids = trajectories_ids[:, :pred_length]

        y = retrieve_future_skeletons(trajectories_ids, X, pred_length)

        pred_errors = compute_rnn_ae_reconstruction_errors(
            y, predicted_y, loss)

        predicted_ids, predicted_frames, pred_errors = discard_information_from_padded_frames(
            predicted_ids, predicted_frames, pred_errors, pred_length)

        pred_ids, pred_frames, pred_errors = \
            summarise_reconstruction_errors(pred_errors, predicted_frames, predicted_ids)

        y_true_pred, y_hat_pred = assemble_ground_truth_and_reconstructions(
            anomaly_masks, pred_ids, pred_frames, pred_errors)
        auroc, aupr = roc_auc_score(y_true_pred,
                                    y_hat_pred), average_precision_score(
                                        y_true_pred, y_hat_pred)

        print('Prediction Based:')
        print('Camera %s:\tAUROC\tAUPR' % camera_id)
        print('          \t%.4f\t%.4f\n' % (auroc, aupr))

        y_true_comb, y_hat_comb = y_true, y_hat + y_hat_pred
        auroc, aupr = roc_auc_score(y_true_comb,
                                    y_hat_comb), average_precision_score(
                                        y_true_comb, y_hat_comb)

        print('Reconstruction + Prediction Based:')
        print('Camera %s:\tAUROC\tAUPR' % camera_id)
        print('          \t%.4f\t%.4f\n' % (auroc, aupr))

    # Logging

    if pred_length > 0:
        return y_true, y_hat, y_true_pred, y_hat_pred, y_true_comb, y_hat_comb
    else:
        return y_true, y_hat, None, None, None, None