def read_data_files(train_file, test_file, inject_features=None): train = pd.read_csv(train_file, sep='\t', error_bad_lines=False) test = pd.read_csv(test_file, sep='\t', error_bad_lines=False) select_columns = ['original', 'translation', 'z_mean'] if inject_features is not None: select_columns.extend(inject_features) train = train[select_columns] test = test[select_columns] train = train.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() test = test.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() train = fit(train, 'labels') test = fit(test, 'labels') return train, test
train = train[['original', 'translation', 'z_mean']] test = test[['original', 'translation', 'z_mean']] train = train.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() test = test.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() train = fit(train, 'labels') test = fit(test, 'labels') if transformer_config["evaluate_during_training"]: if transformer_config["n_fold"] > 1: test_preds = np.zeros((len(test), transformer_config["n_fold"])) for i in range(transformer_config["n_fold"]): if os.path.exists( transformer_config['output_dir']) and os.path.isdir( transformer_config['output_dir']): shutil.rmtree(transformer_config['output_dir']) model = QuestModel(MODEL_TYPE, MODEL_NAME, num_labels=1,