Exemple #1
0
def trainIO(train_index, test_index):

    # Prepare data
    train_input, train_output = data.getSplit(train_index)
    test_input, test_output = data.getSplit(test_index)

    datahelper = DataHelper(train_input, train_output, test_input, test_output,
                            config, data)

    train_input = np.empty((len(train_input), 0))
    test_input = np.empty((len(test_input), 0))

    if config.use_target_text:

        if config.use_bert:
            train_input = np.concatenate(
                [train_input,
                 datahelper.getTargetBertFeatures(mode='train')],
                axis=1)
            test_input = np.concatenate(
                [test_input,
                 datahelper.getTargetBertFeatures(mode='test')],
                axis=1)
        else:
            train_input = np.concatenate([
                train_input,
                np.array([
                    datahelper.pool_text(utt)
                    for utt in datahelper.vectorizeUtterance(mode='train')
                ])
            ],
                                         axis=1)
            test_input = np.concatenate([
                test_input,
                np.array([
                    datahelper.pool_text(utt)
                    for utt in datahelper.vectorizeUtterance(mode='test')
                ])
            ],
                                        axis=1)

    if config.use_target_video:
        train_input = np.concatenate(
            [train_input,
             datahelper.getTargetVideoPool(mode='train')], axis=1)
        test_input = np.concatenate(
            [test_input,
             datahelper.getTargetVideoPool(mode='test')], axis=1)

    if config.use_target_audio:
        train_input = np.concatenate(
            [train_input,
             datahelper.getTargetAudioPool(mode='train')], axis=1)
        test_input = np.concatenate(
            [test_input,
             datahelper.getTargetAudioPool(mode='test')], axis=1)

    if train_input.shape[1] == 0:
        print("Invalid modalities")
        exit(1)

    # Aux input

    if config.use_author:
        train_input_author = datahelper.getAuthor(mode="train")
        test_input_author = datahelper.getAuthor(mode="test")

        train_input = np.concatenate([train_input, train_input_author], axis=1)
        test_input = np.concatenate([test_input, test_input_author], axis=1)

    if config.use_context:
        if config.use_bert:
            train_input_context = datahelper.getContextBertFeatures(
                mode="train")
            test_input_context = datahelper.getContextBertFeatures(mode="test")
        else:
            train_input_context = datahelper.getContextPool(mode="train")
            test_input_context = datahelper.getContextPool(mode="test")

        train_input = np.concatenate([train_input, train_input_context],
                                     axis=1)
        test_input = np.concatenate([test_input, test_input_context], axis=1)

    train_output = datahelper.oneHotOutput(mode="train",
                                           size=config.num_classes)
    test_output = datahelper.oneHotOutput(mode="test", size=config.num_classes)

    return train_input, train_output, test_input, test_output
Exemple #2
0
def train_io(
    config: Config, data: DataLoader, train_index: Iterable[int],
    test_index: Iterable[int]
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    train_input, train_output = data.get_split(train_index)
    test_input, test_output = data.get_split(test_index)

    datahelper = DataHelper(train_input, train_output, test_input, test_output,
                            config, data)

    train_input = np.empty((len(train_input), 0))
    test_input = np.empty((len(test_input), 0))

    if config.use_target_text:
        if config.use_bert:
            train_input = np.concatenate([
                train_input,
                datahelper.get_target_bert_feature(mode="train")
            ],
                                         axis=1)
            test_input = np.concatenate(
                [test_input,
                 datahelper.get_target_bert_feature(mode="test")],
                axis=1)
        else:
            train_input = np.concatenate([
                train_input,
                np.array([
                    datahelper.pool_text(utt)
                    for utt in datahelper.vectorize_utterance(mode="train")
                ])
            ],
                                         axis=1)
            test_input = np.concatenate([
                test_input,
                np.array([
                    datahelper.pool_text(utt)
                    for utt in datahelper.vectorize_utterance(mode="test")
                ])
            ],
                                        axis=1)

    if config.use_target_video:
        train_input = np.concatenate(
            [train_input,
             datahelper.get_target_video_pool(mode="train")],
            axis=1)
        test_input = np.concatenate(
            [test_input,
             datahelper.get_target_video_pool(mode="test")],
            axis=1)

    if config.use_target_audio:
        train_input = np.concatenate(
            [train_input,
             datahelper.get_target_audio_pool(mode="train")],
            axis=1)
        test_input = np.concatenate(
            [test_input,
             datahelper.get_target_audio_pool(mode="test")],
            axis=1)

    if train_input.shape[1] == 0:
        raise ValueError("Invalid modalities")

    # Aux input

    if config.use_author:
        train_input_author = datahelper.get_author(mode="train")
        test_input_author = datahelper.get_author(mode="test")

        train_input = np.concatenate([train_input, train_input_author], axis=1)
        test_input = np.concatenate([test_input, test_input_author], axis=1)

    if config.use_context:
        if config.use_bert:
            train_input_context = datahelper.get_context_bert_features(
                mode="train")
            test_input_context = datahelper.get_context_bert_features(
                mode="test")
        else:
            train_input_context = datahelper.get_context_pool(mode="train")
            test_input_context = datahelper.get_context_pool(mode="test")

        train_input = np.concatenate([train_input, train_input_context],
                                     axis=1)
        test_input = np.concatenate([test_input, test_input_context], axis=1)

    train_output = datahelper.one_hot_output(mode="train",
                                             size=config.num_classes)
    test_output = datahelper.one_hot_output(mode="test",
                                            size=config.num_classes)

    return train_input, train_output, test_input, test_output