def leave_one_out_evaluation(X, Y, compounds, model='autoencoder', x_vivo_arg=False, y_vivo_arg=False): print("Performing leave-one-out evaluation with '{}' model".format(model)) #train the specified model using leave-one-compound-out cross validation global x_vivo, y_vivo x_vivo = x_vivo_arg y_vivo = y_vivo_arg total_errors = [] total_X_train = None total_Y_train = None total_X_valid = None total_Y_valid = None total_recon_train = None total_recon_valid = None total_train_compounds = None total_valid_compounds = None unique_compounds = np.unique(compounds) counter = 0 for i, exclude_compound in enumerate(unique_compounds): counter = counter + 1 print("Excluding compound", exclude_compound, "[", counter, "/", len(unique_compounds), "]") X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, train_compounds, \ valid_compounds = split_train_test(X, Y, compounds, x_vivo, y_vivo, exclude_compound=exclude_compound) if model == 'mod_autoencoder': X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_mod_autoencoders(X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y) elif model == 'cnn': X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = cnn_model( X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y) elif model == 'naive_encoder': X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_naive_encoder( X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y) else: X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_base_model(X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, model) if i == 0: total_X_train = X_train total_Y_train = Y_train total_X_valid = X_valid total_Y_valid = Y_valid total_recon_train = recon_train total_recon_valid = recon_valid total_train_compounds = train_compounds total_valid_compounds = valid_compounds else: total_X_train = np.append(total_X_train, X_train, axis=0) total_Y_train = np.append(total_Y_train, Y_train, axis=0) total_X_valid = np.append(total_X_valid, X_valid, axis=0) total_Y_valid = np.append(total_Y_valid, Y_valid, axis=0) total_recon_train = np.append(total_recon_train, recon_train, axis=0) total_recon_valid = np.append(total_recon_valid, recon_valid, axis=0) total_train_compounds = np.append(total_train_compounds, train_compounds, axis=0) total_valid_compounds = np.append(total_valid_compounds, valid_compounds, axis=0) total_errors.append(errors) total_errors = np.array(total_errors) avg_errors = np.mean(total_errors, axis=0) print("Average Errors:") print("Training mae:{}, mse:{}".format(avg_errors[0], avg_errors[1])) print("Validation mae:{}, mse:{}".format(avg_errors[2], avg_errors[3])) store_mae = avg_errors[2] # Dan: I added this print("Compounds sorted by validation MAE (compound, MAE):") mae = total_errors[:, 2] for i in np.argsort(mae): print(unique_compounds[i], mae[i]) data = { 'X_train': total_X_train, 'Y_train': total_Y_train, 'recon_train': total_recon_train, 'X_valid': total_X_valid, 'Y_valid': total_Y_valid, 'recon_valid': total_recon_valid, 'train_compounds': total_train_compounds, 'valid_compounds': total_valid_compounds } return data, store_mae
def random_split_evaluation(X, Y, compounds, model='autoencoder', iterations=50): print("Performing random split evaluation with '{}' model".format(model)) total_errors = [] total_X_train = None total_Y_train = None total_X_valid = None total_Y_valid = None total_recon_train = None total_recon_valid = None total_train_compounds = None total_valid_compounds = None total_errors = [] for i in range(iterations): print(i) X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, train_compounds, \ valid_compounds = split_train_test(X, Y, compounds, x_vivo, y_vivo, train_split=0.8) if model == 'autoencoder': X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_autoencoders( X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y) else: X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_base_model( X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, model) if i == 0: total_X_train = X_train total_Y_train = Y_train total_X_valid = X_valid total_Y_valid = Y_valid total_recon_train = recon_train total_recon_valid = recon_valid total_train_compounds = train_compounds total_valid_compounds = valid_compounds else: total_X_train = np.append(total_X_train, X_train, axis=0) total_Y_train = np.append(total_Y_train, Y_train, axis=0) total_X_valid = np.append(total_X_valid, X_valid, axis=0) total_Y_valid = np.append(total_Y_valid, Y_valid, axis=0) total_recon_train = np.append(total_recon_train, recon_train, axis=0) total_recon_valid = np.append(total_recon_valid, recon_valid, axis=0) total_train_compounds = np.append(total_train_compounds, train_compounds, axis=0) total_valid_compounds = np.append(total_valid_compounds, valid_compounds, axis=0) total_errors.append(errors) total_errors = np.array(total_errors) avg_errors = np.mean(total_errors, axis=0) print("Average Errors:") print("Training mae:{}, mse:{}".format(avg_errors[0], avg_errors[1])) print("Validation mae:{}, mse:{}".format(avg_errors[2], avg_errors[3])) data = { 'X_train': total_X_train, 'Y_train': total_Y_train, 'recon_train': total_recon_train, 'X_valid': total_X_valid, 'Y_valid': total_Y_valid, 'recon_valid': total_recon_valid, 'train_compounds': total_train_compounds, 'valid_compounds': total_valid_compounds } return data