def evaluate(Y, Yhat, S2=None, mY=None, sY=None, metrics=['Rho', 'RMSE', 'SMSE', 'EXPV', 'MSLL']): feature_num = Y.shape[1] # Remove metrics that cannot be computed with only a single data point if Y.shape[0] == 1: if 'MSLL' in metrics: metrics.remove('MSLL') if 'SMSE' in metrics: metrics.remove('SMSE') # find and remove bad variables from the response variables nz = np.where( np.bitwise_and(np.isfinite(Y).any(axis=0), np.var(Y, axis=0) != 0))[0] MSE = np.mean((Y - Yhat)**2, axis=0) results = dict() if 'RMSE' in metrics: RMSE = np.sqrt(MSE) results['RMSE'] = RMSE if 'Rho' in metrics: Rho = np.zeros(feature_num) pRho = np.ones(feature_num) Rho[nz], pRho[nz] = compute_pearsonr(Y[:, nz], Yhat[:, nz]) results['Rho'] = Rho results['pRho'] = pRho if 'SMSE' in metrics: SMSE = np.zeros_like(MSE) SMSE[nz] = MSE[nz] / np.var(Y[:, nz], axis=0) results['SMSE'] = SMSE if 'EXPV' in metrics: EXPV = np.zeros(feature_num) EXPV[nz] = explained_var(Y[:, nz], Yhat[:, nz]) results['EXPV'] = EXPV if 'MSLL' in metrics: if ((S2 is not None) and (mY is not None) and (sY is not None)): MSLL = np.zeros(feature_num) MSLL[nz] = compute_MSLL(Y[:, nz], Yhat[:, nz], S2[:, nz], mY.reshape(-1, 1).T, (sY**2).reshape(-1, 1).T) results['MSLL'] = MSLL return results
def evaluate(Y, Yhat, S2=None, mY=None, sY=None, metrics=['Rho', 'RMSE', 'SMSE', 'EXPV', 'MSLL']): ''' Compute error metrics This function will compute error metrics based on a set of predictions Yhat and a set of true response variables Y, namely: * Rho: Pearson correlation * RMSE: root mean squared error * SMSE: standardized mean squared error * EXPV: explained variance If the predictive variance is also specified the log loss will be computed (which also takes into account the predictive variance). If the mean and standard deviation are also specified these will be used to standardize this, yielding the mean standardized log loss :param Y: N x P array of true response variables :param Yhat: N x P array of predicted response variables :param S2: predictive variance :param mY: mean of the training set :param sY: standard deviation of the training set :returns metrics: evaluation metrics ''' feature_num = Y.shape[1] # Remove metrics that cannot be computed with only a single data point if Y.shape[0] == 1: if 'MSLL' in metrics: metrics.remove('MSLL') if 'SMSE' in metrics: metrics.remove('SMSE') # find and remove bad variables from the response variables nz = np.where( np.bitwise_and(np.isfinite(Y).any(axis=0), np.var(Y, axis=0) != 0))[0] MSE = np.mean((Y - Yhat)**2, axis=0) results = dict() if 'RMSE' in metrics: RMSE = np.sqrt(MSE) results['RMSE'] = RMSE if 'Rho' in metrics: Rho = np.zeros(feature_num) pRho = np.ones(feature_num) Rho[nz], pRho[nz] = compute_pearsonr(Y[:, nz], Yhat[:, nz]) results['Rho'] = Rho results['pRho'] = pRho if 'SMSE' in metrics: SMSE = np.zeros_like(MSE) SMSE[nz] = MSE[nz] / np.var(Y[:, nz], axis=0) results['SMSE'] = SMSE if 'EXPV' in metrics: EXPV = np.zeros(feature_num) EXPV[nz] = explained_var(Y[:, nz], Yhat[:, nz]) results['EXPV'] = EXPV if 'MSLL' in metrics: if ((S2 is not None) and (mY is not None) and (sY is not None)): MSLL = np.zeros(feature_num) MSLL[nz] = compute_MSLL(Y[:, nz], Yhat[:, nz], S2[:, nz], mY.reshape(-1, 1).T, (sY**2).reshape(-1, 1).T) results['MSLL'] = MSLL return results
def estimate(args): torch.set_default_dtype(torch.float32) args.type = 'MT' print('Loading the input Data ...') responses = fileio.load_nifti(args.respfile, vol=True).transpose([3, 0, 1, 2]) response_shape = responses.shape with open(args.covfile, 'rb') as handle: covariates = pickle.load(handle)['covariates'] with open(args.testcovfile, 'rb') as handle: test_covariates = pickle.load(handle)['test_covariates'] if args.mask is not None: mask = fileio.load_nifti(args.mask, vol=True) mask = fileio.create_mask(mask, mask=None) else: mask = fileio.create_mask(responses[0, :, :, :], mask=None) if args.testrespfile is not None: test_responses = fileio.load_nifti(args.testrespfile, vol=True).transpose([3, 0, 1, 2]) test_responses_shape = test_responses.shape print('Normalizing the input Data ...') covariates_scaler = StandardScaler() covariates = covariates_scaler.fit_transform(covariates) test_covariates = covariates_scaler.transform(test_covariates) response_scaler = MinMaxScaler() responses = unravel_2D(response_scaler.fit_transform(ravel_2D(responses)), response_shape) if args.testrespfile is not None: test_responses = unravel_2D( response_scaler.transform(ravel_2D(test_responses)), test_responses_shape) test_responses = np.expand_dims(test_responses, axis=1) factor = args.m x_context = np.zeros([covariates.shape[0], factor, covariates.shape[1]], dtype=np.float32) y_context = np.zeros([ responses.shape[0], factor, responses.shape[1], responses.shape[2], responses.shape[3] ], dtype=np.float32) x_all = np.zeros([covariates.shape[0], factor, covariates.shape[1]], dtype=np.float32) x_context_test = np.zeros( [test_covariates.shape[0], factor, test_covariates.shape[1]], dtype=np.float32) y_context_test = np.zeros([ test_covariates.shape[0], factor, responses.shape[1], responses.shape[2], responses.shape[3] ], dtype=np.float32) print('Estimating the fixed-effects ...') for i in range(factor): x_context[:, i, :] = covariates[:, :] x_context_test[:, i, :] = test_covariates[:, :] idx = np.random.randint(0, covariates.shape[0], covariates.shape[0]) if args.estimator == 'ST': for j in range(responses.shape[1]): for k in range(responses.shape[2]): for l in range(responses.shape[3]): reg = LinearRegression() reg.fit(x_context[idx, i, :], responses[idx, j, k, l]) y_context[:, i, j, k, l] = reg.predict(x_context[:, i, :]) y_context_test[:, i, j, k, l] = reg.predict(x_context_test[:, i, :]) elif args.estimator == 'MT': reg = MultiTaskLasso(alpha=0.1) reg.fit( x_context[idx, i, :], np.reshape(responses[idx, :, :, :], [covariates.shape[0], np.prod(responses.shape[1:])])) y_context[:, i, :, :, :] = np.reshape( reg.predict(x_context[:, i, :]), [ x_context.shape[0], responses.shape[1], responses.shape[2], responses.shape[3] ]) y_context_test[:, i, :, :, :] = np.reshape( reg.predict(x_context_test[:, i, :]), [ x_context_test.shape[0], responses.shape[1], responses.shape[2], responses.shape[3] ]) print('Fixed-effect %d of %d is computed!' % (i + 1, factor)) x_all = x_context responses = np.expand_dims(responses, axis=1).repeat(factor, axis=1) ################################## TRAINING ################################# encoder = Encoder(x_context, y_context, args).to(args.device) args.cnn_feature_num = encoder.cnn_feature_num decoder = Decoder(x_context, y_context, args).to(args.device) model = NP(encoder, decoder, args).to(args.device) print('Estimating the Random-effect ...') k = 1 epochs = [ int(args.epochs / 4), int(args.epochs / 2), int(args.epochs / 5), int(args.epochs - args.epochs / 4 - args.epochs / 2 - args.epochs / 5) ] mini_batch_num = args.batchnum batch_size = int(x_context.shape[0] / mini_batch_num) model.train() for e in range(len(epochs)): optimizer = optim.Adam(model.parameters(), lr=10**(-e - 2)) for j in range(epochs[e]): train_loss = 0 rand_idx = np.random.permutation(x_context.shape[0]) for i in range(mini_batch_num): optimizer.zero_grad() idx = rand_idx[i * batch_size:(i + 1) * batch_size] y_hat, z_all, z_context, dummy = model( torch.tensor(x_context[idx, :, :], device=args.device), torch.tensor(y_context[idx, :, :, :, :], device=args.device), torch.tensor(x_all[idx, :, :], device=args.device), torch.tensor(responses[idx, :, :, :, :], device=args.device)) loss = np_loss( y_hat, torch.tensor(responses[idx, :, :, :, :], device=args.device), z_all, z_context) loss.backward() train_loss += loss.item() optimizer.step() print('Epoch: %d, Loss:%f, Average Loss:%f' % (k, train_loss, train_loss / responses.shape[0])) k += 1 ################################## Evaluation ################################# print('Predicting on Test Data ...') model.eval() model.apply(apply_dropout_test) with torch.no_grad(): y_hat, z_all, z_context, y_sigma = model( torch.tensor(x_context_test, device=args.device), torch.tensor(y_context_test, device=args.device), n=15) if args.testrespfile is not None: test_loss = np_loss(y_hat[0:test_responses_shape[0], :], torch.tensor(test_responses, device=args.device), z_all, z_context).item() print('Average Test Loss:%f' % (test_loss / test_responses_shape[0])) RMSE = np.sqrt( np.mean((test_responses - y_hat[0:test_responses_shape[0], :].cpu().numpy())**2, axis=0)).squeeze() * mask SMSE = RMSE**2 / np.var(test_responses, axis=0).squeeze() Rho, pRho = compute_pearsonr( test_responses.squeeze(), y_hat[0:test_responses_shape[0], :].cpu().numpy().squeeze()) EXPV = explained_var( test_responses.squeeze(), y_hat[0:test_responses_shape[0], :].cpu().numpy().squeeze()) * mask MSLL = compute_MSLL( test_responses.squeeze(), y_hat[0:test_responses_shape[0], :].cpu().numpy().squeeze(), y_sigma[0:test_responses_shape[0], :].cpu().numpy().squeeze()**2, train_mean=test_responses.mean(0), train_var=test_responses.var(0)).squeeze() * mask NPMs = (test_responses - y_hat[0:test_responses_shape[0], :].cpu().numpy()) / ( y_sigma[0:test_responses_shape[0], :].cpu().numpy()) NPMs = NPMs.squeeze() NPMs = NPMs * mask NPMs = np.nan_to_num(NPMs) temp = NPMs.reshape( [NPMs.shape[0], NPMs.shape[1] * NPMs.shape[2] * NPMs.shape[3]]) EVD_params = extreme_value_prob_fit(temp, 0.01) abnormal_probs = extreme_value_prob(EVD_params, temp, 0.01) ############################## SAVING RESULTS ################################# print('Saving Results to: %s' % (args.outdir)) exfile = args.respfile y_hat = y_hat.squeeze().cpu().numpy() y_hat = response_scaler.inverse_transform(ravel_2D(y_hat)) y_hat = y_hat[:, mask.flatten()] fileio.save(y_hat.T, args.outdir + '/yhat.nii.gz', example=exfile, mask=mask) ys2 = y_sigma.squeeze().cpu().numpy() ys2 = ravel_2D(ys2) * (response_scaler.data_max_ - response_scaler.data_min_) ys2 = ys2**2 ys2 = ys2[:, mask.flatten()] fileio.save(ys2.T, args.outdir + '/ys2.nii.gz', example=exfile, mask=mask) if args.testrespfile is not None: NPMs = ravel_2D(NPMs)[:, mask.flatten()] fileio.save(NPMs.T, args.outdir + '/Z.nii.gz', example=exfile, mask=mask) fileio.save(Rho.flatten()[mask.flatten()], args.outdir + '/Rho.nii.gz', example=exfile, mask=mask) fileio.save(pRho.flatten()[mask.flatten()], args.outdir + '/pRho.nii.gz', example=exfile, mask=mask) fileio.save(RMSE.flatten()[mask.flatten()], args.outdir + '/rmse.nii.gz', example=exfile, mask=mask) fileio.save(SMSE.flatten()[mask.flatten()], args.outdir + '/smse.nii.gz', example=exfile, mask=mask) fileio.save(EXPV.flatten()[mask.flatten()], args.outdir + '/expv.nii.gz', example=exfile, mask=mask) fileio.save(MSLL.flatten()[mask.flatten()], args.outdir + '/msll.nii.gz', example=exfile, mask=mask) with open(args.outdir + 'model.pkl', 'wb') as handle: pickle.dump( { 'model': model, 'covariates_scaler': covariates_scaler, 'response_scaler': response_scaler, 'EVD_params': EVD_params, 'abnormal_probs': abnormal_probs }, handle, protocol=configs.PICKLE_PROTOCOL) ############################################################################### print('DONE!')