Beispiel #1
0
def visualize_context_vector(model: Model,
                             dataset_id,
                             dataset_prefix,
                             cutoff=None,
                             limit=None,
                             normalize_timeseries=False,
                             visualize_sequence=True,
                             visualize_classwise=False):
    X_train, y_train, X_test, y_test, is_timeseries = load_dataset_at(
        dataset_id, normalize_timeseries=normalize_timeseries)
    _, sequence_length = calculate_dataset_metrics(X_train)

    if sequence_length != MAX_SEQUENCE_LENGTH_LIST[dataset_id]:
        if cutoff is None:
            choice = cutoff_choice(dataset_id, sequence_length)
        else:
            assert cutoff in [
                'pre', 'post'
            ], 'Cutoff parameter value must be either "pre" or "post"'
            choice = cutoff

        if choice not in ['pre', 'post']:
            return
        else:
            X_train, X_test = cutoff_sequence(X_train, X_test, choice,
                                              dataset_id, sequence_length)

    attn_lstm_layer = [(i, layer) for (i, layer) in enumerate(model.layers)
                       if layer.__class__.__name__ == 'AttentionLSTM']

    if len(attn_lstm_layer) == 0:
        raise AttributeError('Provided model does not have an Attention layer')
    else:
        i, attn_lstm_layer = attn_lstm_layer[
            0]  # use first attention lstm layer only

    attn_lstm_layer.return_attention = True

    model.layers[i] = attn_lstm_layer
    model.load_weights("./weights/%s_weights.h5" % dataset_prefix)

    attention_output = model.layers[i].call(model.input)

    eval_functions = build_function(model,
                                    attn_lstm_layer.name,
                                    outputs=[attention_output])
    attention_vectors = []

    for i in range(X_train.shape[0]):
        activations = get_outputs(model,
                                  X_train[i, :, :][np.newaxis, ...],
                                  eval_functions,
                                  verbose=False)[0]

        attention_vector = np.sum(activations, axis=1).squeeze()
        attention_vectors.append(attention_vector)

    attention_vectors = np.array(attention_vectors)
    attention_vector_final = np.mean(attention_vectors, axis=0)

    if visualize_sequence:
        # plot input sequence part that is paid attention too in detail
        attention_vector_final = attention_vector_final.reshape(
            (1, attention_vector_final.shape[0]))

        X_train_attention = np.zeros_like(X_train)
        X_test_attention = np.zeros_like(X_test)

        for i in range(X_train.shape[0]):
            X_train_attention[
                i, :, :] = attention_vector_final * X_train[i, :, :]

        for i in range(X_test.shape[0]):
            X_test_attention[
                i, :, :] = attention_vector_final * X_test[i, :, :]

        plot_dataset(dataset_id,
                     seed=1,
                     limit=limit,
                     cutoff=cutoff,
                     normalize_timeseries=normalize_timeseries,
                     plot_data=(X_train, y_train, X_test, y_test,
                                X_train_attention, X_test_attention),
                     type='Context',
                     plot_classwise=visualize_classwise)

    else:
        # plot only attention chart

        train_df = pd.DataFrame({'attention (%)': attention_vector_final},
                                index=range(attention_vector_final.shape[0]))

        train_df.plot(kind='bar',
                      title='Attention Mechanism (Train) as '
                      'a function of input'
                      ' dimensions.')

        plt.show()
Beispiel #2
0
def visualize_context_vector(model: Model,
                             series_values,
                             labels,
                             run_prefix,
                             cutoff=None,
                             limit=None,
                             val_split=1 / 3,
                             random_state=0,
                             visualize_sequence=True,
                             visualize_classwise=False):
    """
    Visualize the Context Vector of the Attention LSTM.

    Args:
        model: an Attention LSTM-FCN Model.
        dataset_id: Integer id representing the dataset index containd in
            `utils/constants.py`.
        dataset_prefix: Name of the dataset. Used for weight saving.
        batch_size: Size of each batch for evaluation.
        test_data_subset: Optional integer id to subset the test set. To be used if
            the test set evaluation time is significantly.
        cutoff: Optional integer which slices of the first `cutoff` timesteps
            from the input signal.
        limit: Number of samples to be visualized in one plot.
        normalize_timeseries: Bool / Integer. Determines whether to normalize
            the timeseries.

            If False, does not normalize the time series.
            If True / int not equal to 2, performs standard sample-wise
                z-normalization.
            If 2: Performs full dataset z-normalization.
        visualize_sequence: Bool flag, whetehr to visualize the sequence attended to
            by the Context Vector or just the Context Vector itself.
        visualize_classwise: Bool flag. Wheter to visualize the samples
            seperated by class. When doing so, `limit` is multiplied by
            the number of classes so it is better to set `limit` to 1 in
            such cases.
    """

    inds = np.arange(series_values.shape[0])
    np.random.seed(random_state)
    np.random.shuffle(inds)
    series_values = series_values[inds]
    labels = labels[inds]
    val_split = int(val_split * series_values.shape[0])
    X_train, y_train = series_values[:-val_split], labels[:-val_split]
    X_test, y_test = series_values[-val_split:], labels[-val_split:]

    sequence_length = series_values.shape[1]

    attn_lstm_layer = [(i, layer) for (i, layer) in enumerate(model.layers)
                       if layer.__class__.__name__ == 'AttentionLSTM']

    if len(attn_lstm_layer) == 0:
        raise AttributeError('Provided model does not have an Attention layer')
    else:
        i, attn_lstm_layer = attn_lstm_layer[
            0]  # use first attention lstm layer only

    attn_lstm_layer.return_attention = True

    model.layers[i] = attn_lstm_layer
    model.load_weights("./weights/%s_weights.h5" % run_prefix)

    attention_output = model.layers[i].call(model.input)

    eval_functions = build_function(model,
                                    attn_lstm_layer.name,
                                    outputs=[attention_output])
    train_attention_vectors = []
    test_attention_vectors = []

    output_shape = [X_train.shape[-1], 1, 1]

    for i in range(X_train.shape[0]):
        activations = get_outputs(model,
                                  X_train[i, :, :][np.newaxis, ...],
                                  eval_functions,
                                  verbose=False)[0]

        # print("activations", activations.shape)
        attention_vector = activations.reshape((-1, 1, 1))

        attention_vector = (attention_vector - attention_vector.min()) / (
            attention_vector.max() - attention_vector.min())
        attention_vector = (attention_vector * 2.) - 1.

        attention_vector = resize(attention_vector,
                                  output_shape,
                                  mode='reflect',
                                  anti_aliasing=True)
        attention_vector = attention_vector.reshape([1, -1])
        train_attention_vectors.append(attention_vector)

    for i in range(X_test.shape[0]):
        activations = get_outputs(model,
                                  X_test[i, :, :][np.newaxis, ...],
                                  eval_functions,
                                  verbose=False)[0]

        # print("activations", activations.shape)
        attention_vector = activations.reshape((-1, 1, 1))

        attention_vector = (attention_vector - attention_vector.min()) / (
            attention_vector.max() - attention_vector.min())
        attention_vector = (attention_vector * 2.) - 1.

        attention_vector = resize(attention_vector,
                                  output_shape,
                                  mode='reflect',
                                  anti_aliasing=True)
        attention_vector = attention_vector.reshape([1, -1])
        test_attention_vectors.append(attention_vector)

    train_attention_vectors = np.array(train_attention_vectors)
    test_attention_vectors = np.array(test_attention_vectors)

    print("Train Attention Vectors Shape :", train_attention_vectors.shape)
    print("Test Attentin Vectors Shape :", test_attention_vectors.shape)

    if visualize_sequence:
        # plot input sequence part that is paid attention too in detail
        X_train_attention = train_attention_vectors * X_train
        X_test_attention = test_attention_vectors * X_test

        plot_dataset(series_values,
                     labels,
                     run_prefix,
                     val_split=val_split,
                     seed=1,
                     limit=limit,
                     cutoff=cutoff,
                     plot_data=(X_train, y_train, X_test, y_test,
                                X_train_attention, X_test_attention),
                     type='Context',
                     plot_classwise=visualize_classwise)

    else:
        # plot only attention chart
        choice = np.random.randint(0, train_attention_vectors.shape[0])

        train_df = pd.DataFrame(
            {'attention (%)': train_attention_vectors[choice, 0]},
            index=range(train_attention_vectors.shape[-1]))

        train_df.plot(kind='bar',
                      title='Attention Mechanism (Train) as '
                      'a function of input'
                      ' dimensions. Class = %d' % (y_train[choice]))

        plt.show()