def trainIO(train_index, test_index): # Prepare data train_input, train_output = data.getSplit(train_index) test_input, test_output = data.getSplit(test_index) datahelper = DataHelper(train_input, train_output, test_input, test_output, config, data) train_input = np.empty((len(train_input), 0)) test_input = np.empty((len(test_input), 0)) if config.use_target_text: if config.use_bert: train_input = np.concatenate( [train_input, datahelper.getTargetBertFeatures(mode='train')], axis=1) test_input = np.concatenate( [test_input, datahelper.getTargetBertFeatures(mode='test')], axis=1) else: train_input = np.concatenate([ train_input, np.array([ datahelper.pool_text(utt) for utt in datahelper.vectorizeUtterance(mode='train') ]) ], axis=1) test_input = np.concatenate([ test_input, np.array([ datahelper.pool_text(utt) for utt in datahelper.vectorizeUtterance(mode='test') ]) ], axis=1) if config.use_target_video: train_input = np.concatenate( [train_input, datahelper.getTargetVideoPool(mode='train')], axis=1) test_input = np.concatenate( [test_input, datahelper.getTargetVideoPool(mode='test')], axis=1) if config.use_target_audio: train_input = np.concatenate( [train_input, datahelper.getTargetAudioPool(mode='train')], axis=1) test_input = np.concatenate( [test_input, datahelper.getTargetAudioPool(mode='test')], axis=1) if train_input.shape[1] == 0: print("Invalid modalities") exit(1) # Aux input if config.use_author: train_input_author = datahelper.getAuthor(mode="train") test_input_author = datahelper.getAuthor(mode="test") train_input = np.concatenate([train_input, train_input_author], axis=1) test_input = np.concatenate([test_input, test_input_author], axis=1) if config.use_context: if config.use_bert: train_input_context = datahelper.getContextBertFeatures( mode="train") test_input_context = datahelper.getContextBertFeatures(mode="test") else: train_input_context = datahelper.getContextPool(mode="train") test_input_context = datahelper.getContextPool(mode="test") train_input = np.concatenate([train_input, train_input_context], axis=1) test_input = np.concatenate([test_input, test_input_context], axis=1) train_output = datahelper.oneHotOutput(mode="train", size=config.num_classes) test_output = datahelper.oneHotOutput(mode="test", size=config.num_classes) return train_input, train_output, test_input, test_output
def train_io( config: Config, data: DataLoader, train_index: Iterable[int], test_index: Iterable[int] ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: train_input, train_output = data.get_split(train_index) test_input, test_output = data.get_split(test_index) datahelper = DataHelper(train_input, train_output, test_input, test_output, config, data) train_input = np.empty((len(train_input), 0)) test_input = np.empty((len(test_input), 0)) if config.use_target_text: if config.use_bert: train_input = np.concatenate([ train_input, datahelper.get_target_bert_feature(mode="train") ], axis=1) test_input = np.concatenate( [test_input, datahelper.get_target_bert_feature(mode="test")], axis=1) else: train_input = np.concatenate([ train_input, np.array([ datahelper.pool_text(utt) for utt in datahelper.vectorize_utterance(mode="train") ]) ], axis=1) test_input = np.concatenate([ test_input, np.array([ datahelper.pool_text(utt) for utt in datahelper.vectorize_utterance(mode="test") ]) ], axis=1) if config.use_target_video: train_input = np.concatenate( [train_input, datahelper.get_target_video_pool(mode="train")], axis=1) test_input = np.concatenate( [test_input, datahelper.get_target_video_pool(mode="test")], axis=1) if config.use_target_audio: train_input = np.concatenate( [train_input, datahelper.get_target_audio_pool(mode="train")], axis=1) test_input = np.concatenate( [test_input, datahelper.get_target_audio_pool(mode="test")], axis=1) if train_input.shape[1] == 0: raise ValueError("Invalid modalities") # Aux input if config.use_author: train_input_author = datahelper.get_author(mode="train") test_input_author = datahelper.get_author(mode="test") train_input = np.concatenate([train_input, train_input_author], axis=1) test_input = np.concatenate([test_input, test_input_author], axis=1) if config.use_context: if config.use_bert: train_input_context = datahelper.get_context_bert_features( mode="train") test_input_context = datahelper.get_context_bert_features( mode="test") else: train_input_context = datahelper.get_context_pool(mode="train") test_input_context = datahelper.get_context_pool(mode="test") train_input = np.concatenate([train_input, train_input_context], axis=1) test_input = np.concatenate([test_input, test_input_context], axis=1) train_output = datahelper.one_hot_output(mode="train", size=config.num_classes) test_output = datahelper.one_hot_output(mode="test", size=config.num_classes) return train_input, train_output, test_input, test_output