def base_ensemble(): max_l = Config.Train.max_len val_df = pd.read_csv(Config.validation_path) models_paths = list( Path(Config.Train.checkpoint_dir / Config.model_type).iterdir()) start_idx = 0 end_idx = 0 jaccards = [] for path in models_paths: tf.keras.backend.clear_session() _generator = RobertaDataGenerator(val_df, augment=False) dataset = tf.data.Dataset.from_generator(_generator.generate, output_types=({ 'ids': tf.int32, 'att': tf.int32, 'tti': tf.int32 }, { 'sts': tf.int32, 'ets': tf.int32 })) dataset = dataset.padded_batch(Config.Train.batch_size, padded_shapes=({ 'ids': [max_l], 'att': [max_l], 'tti': [max_l] }, { 'sts': [max_l], 'ets': [max_l] }), padding_values=({ 'ids': 1, 'att': 0, 'tti': 0 }, { 'sts': 0, 'ets': 0 })) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) model = get_roberta() model.load_weights(str(path)) s_idx, e_idx = model.predict(dataset, verbose=1) start_idx += s_idx end_idx += e_idx jaccard = get_jaccard_from_df(val_df, np.argmax(s_idx, axis=-1), np.argmax(e_idx, axis=-1), 'roberta', None) jaccards.append(jaccard) print(f'\nMean jaccard for all models: {np.mean(jaccards)}') start_idx /= 5 end_idx /= 5 e_jaccard = get_jaccard_from_df(val_df, np.argmax(start_idx, axis=-1), np.argmax(end_idx, axis=-1), 'roberta', None) print(f'Mean ensemble jaccard for models (base): {e_jaccard}\n')
def max_joint_proba_ensemble(): max_l = Config.Train.max_len val_df = pd.read_csv(Config.validation_path) joint_probs = np.zeros((val_df.shape[0], 5)) start_idx = np.zeros((val_df.shape[0], 5)) end_idx = np.zeros((val_df.shape[0], 5)) models_paths = list( Path(Config.Train.checkpoint_dir / Config.model_type).iterdir()) for i, path in enumerate(models_paths): tf.keras.backend.clear_session() _generator = RobertaDataGenerator(val_df, augment=False) dataset = tf.data.Dataset.from_generator(_generator.generate, output_types=({ 'ids': tf.int32, 'att': tf.int32, 'tti': tf.int32 }, { 'sts': tf.int32, 'ets': tf.int32 })) dataset = dataset.padded_batch(Config.Train.batch_size, padded_shapes=({ 'ids': [max_l], 'att': [max_l], 'tti': [max_l] }, { 'sts': [max_l], 'ets': [max_l] }), padding_values=({ 'ids': 1, 'att': 0, 'tti': 0 }, { 'sts': 0, 'ets': 0 })) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) model = get_roberta() model.load_weights(str(path)) s_idx, e_idx = model.predict(dataset, verbose=1) joint_probs[:, i] = np.max(s_idx, axis=-1) * np.max(e_idx, axis=-1) start_idx[:, i] = np.argmax(s_idx, axis=-1) end_idx[:, i] = np.argmax(e_idx, axis=-1) selection_idx = np.argmax(joint_probs, axis=-1) start_idx = start_idx[:, selection_idx][:, 0] end_idx = end_idx[:, selection_idx][:, 0] jaccard = get_jaccard_from_df(val_df, start_idx.astype('int'), end_idx.astype('int'), 'roberta', None) print(f'\nMax joint probability jaccard: {jaccard}\n')
def train_albert(train_df, val_df, fold_i, augment=False): max_l = Config.Albert.max_len _train_generator = AlbertDataGenerator(train_df, augment=augment) train_dataset = tf.data.Dataset.from_generator(_train_generator.generate, output_types=({ 'ids': tf.int32, 'att': tf.int32, 'tti': tf.int32 }, { 'sts': tf.int32, 'ets': tf.int32 })) train_dataset = train_dataset.padded_batch(Config.Train.batch_size, padded_shapes=({ 'ids': [None], 'att': [None], 'tti': [None] }, { 'sts': [None], 'ets': [None] })) train_dataset = train_dataset.repeat().prefetch( tf.data.experimental.AUTOTUNE) _val_generator = AlbertDataGenerator(val_df, augment=False) val_dataset = tf.data.Dataset.from_generator(_val_generator.generate, output_types=({ 'ids': tf.int32, 'att': tf.int32, 'tti': tf.int32 }, { 'sts': tf.int32, 'ets': tf.int32 })) val_dataset = val_dataset.padded_batch(Config.Train.batch_size, padded_shapes=({ 'ids': [None], 'att': [None], 'tti': [None] }, { 'sts': [None], 'ets': [None] })) val_dataset = val_dataset.repeat().prefetch(tf.data.experimental.AUTOTUNE) model = get_albert() if fold_i == 0: model.summary() model_name = f'weights_v{Config.version}_f{fold_i + 1}.h5' train_spe = get_steps(train_df) val_spe = get_steps(val_df) cbs = [ WarmUpCosineDecayScheduler(6e-5, 1200, warmup_steps=300, hold_base_rate_steps=200, verbose=0), keras.callbacks.ModelCheckpoint(str(Config.Train.checkpoint_dir / Config.model_type / model_name), verbose=1, save_best_only=True, save_weights_only=True) ] model.fit(train_dataset, epochs=2, verbose=1, validation_data=val_dataset, callbacks=cbs, steps_per_epoch=train_spe, validation_steps=val_spe) print(f'Loading checkpoint {model_name}...') model.load_weights( str(Config.Train.checkpoint_dir / Config.model_type / model_name)) _val_generator = AlbertDataGenerator(val_df, augment=False) val_dataset = tf.data.Dataset.from_generator(_val_generator.generate, output_types=({ 'ids': tf.int32, 'att': tf.int32, 'tti': tf.int32 }, { 'sts': tf.int32, 'ets': tf.int32 })) val_dataset = val_dataset.padded_batch(Config.Train.batch_size, padded_shapes=({ 'ids': [max_l], 'att': [max_l], 'tti': [max_l] }, { 'sts': [max_l], 'ets': [max_l] })) val_dataset = val_dataset.prefetch(tf.data.experimental.AUTOTUNE) s_idx, e_idx = model.predict(val_dataset, verbose=1) s_idx = np.argmax(s_idx, axis=-1) e_idx = np.argmax(e_idx, axis=-1) jaccard_score = get_jaccard_from_df(val_df, s_idx, e_idx, 'albert', 'albert.csv') print( f'\n>>> Fold {fold_i + 1}: jaccard_score for albert: {jaccard_score}\n' ) return jaccard_score