def weighted_average_method(prediction_matrix, average, eps=1e-14, expert_weights=None, *args, **kwargs): if len(prediction_matrix) == 0: return np.zeros(600) prediction_matrix = prediction_matrix[None, :, :] weights = generate_information_weight_matrix(prediction_matrix, average, expert_weights=expert_weights) assert np.isfinite(weights).all() pdf = utils.cdf_to_pdf(prediction_matrix) if True: x_log = np.log(pdf) x_log[pdf <= 0] = np.log(eps) # Compute the mean geom_av_log = np.sum(x_log * weights, axis=(0, 1)) / ( np.sum(weights, axis=(0, 1)) + eps) geom_av_log = geom_av_log - np.max( geom_av_log) # stabilizes rounding errors? geom_av = np.exp(geom_av_log) res = np.cumsum(geom_av / np.sum(geom_av)) else: res = np.cumsum( np.sum(pdf * weights, axis=(0, 1)) / np.sum(weights, axis=(0, 1))) return res
def generate_information_weight_matrix(expert_predictions, average_distribution, eps=1e-14, KL_weight = 1.0, cross_entropy_weight=1.0, expert_weights=None): pdf = utils.cdf_to_pdf(expert_predictions) average_pdf = utils.cdf_to_pdf(average_distribution) average_pdf[average_pdf<=0] = np.min(average_pdf[average_pdf>0])/2 # KL is not defined when Q=0 and P is not inside = pdf * (np.log(pdf) - np.log(average_pdf[None,None,:])) inside[pdf<=0] = 0 # (xlog(x) of zero is zero) KL_distance_from_average = np.sum(inside, axis=2) # (NUM_EXPERTS, NUM_VALIDATIONS) assert np.isfinite(KL_distance_from_average).all() clipped_predictions = np.clip(expert_predictions, 0.0, 1.0) cross_entropy_per_sample = - ( average_distribution[None,None,:] * np.log( clipped_predictions+eps) +\ (1.-average_distribution[None,None,:]) * np.log(1.-clipped_predictions+eps) ) cross_entropy_per_sample[cross_entropy_per_sample<0] = 0 # (NUM_EXPERTS, NUM_VALIDATIONS, 600) assert np.isfinite(cross_entropy_per_sample).all() if expert_weights is None: weights = cross_entropy_weight*cross_entropy_per_sample + KL_weight*KL_distance_from_average[:,:,None] #+ # <- is too big? else: weights = (cross_entropy_weight*cross_entropy_per_sample + KL_weight*KL_distance_from_average[:,:,None]) * expert_weights[:,None,None] #+ # <- is too big? #make sure the ones without predictions don't get weight, unless absolutely necessary weights[np.where((expert_predictions == average_distribution[None,None,:]).all(axis=2))] = 1e-14 return weights
def weighted_geom_no_entr(prediction_matrix, average, eps=1e-14, expert_weights=None, *args, **kwargs): if len(prediction_matrix.flatten()) == 0: return np.zeros(600) weights = generate_information_weight_matrix(prediction_matrix, average, expert_weights=expert_weights, use_entropy=False, *args, **kwargs) assert np.isfinite(weights).all() pdf = utils.cdf_to_pdf(prediction_matrix) x_log = np.log(pdf) x_log[pdf<=0] = np.log(eps) # Compute the mean geom_av_log = np.sum(x_log * weights, axis=(0,1)) / (np.sum(weights, axis=(0,1)) + eps) geom_av_log = geom_av_log - np.max(geom_av_log) # stabilizes rounding errors? geom_av = np.exp(geom_av_log) res = np.cumsum(geom_av/np.sum(geom_av)) return res
def weighted_geom_method(prediction_matrix, average, eps=1e-14, expert_weights=None, *args, **kwargs): if len(prediction_matrix.flatten()) == 0: return np.zeros(600) weights = generate_information_weight_matrix(prediction_matrix, average, expert_weights=expert_weights, *args, **kwargs) assert np.isfinite(weights).all() pdf = utils.cdf_to_pdf(prediction_matrix) x_log = np.log(pdf) x_log[pdf<=0] = np.log(eps) # Compute the mean geom_av_log = np.sum(x_log * weights, axis=(0,1)) / (np.sum(weights, axis=(0,1)) + eps) geom_av_log = geom_av_log - np.max(geom_av_log) # stabilizes rounding errors? geom_av = np.exp(geom_av_log) res = np.cumsum(geom_av/np.sum(geom_av)) return res
def geomav(x): if len(x) == 0: return np.zeros(600) res = np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x))) return res
def build_objective(interface_layers): # l2 regu on certain layers l2_penalty = nn.regularization.regularize_layer_params_weighted( interface_layers["regularizable"], nn.regularization.l2) # build objective return objectives.KaggleObjective(interface_layers["outputs"], penalty=l2_penalty) # Testing postprocess = postprocess.postprocess test_time_augmentations = 100 # More augmentations since a we only use single slices tta_average_method = lambda x: np.cumsum( utils.norm_geometric_average(utils.cdf_to_pdf(x))) # nonlinearity putting a lower bound on it's output def lb_softplus(lb): return lambda x: nn.nonlinearities.softplus(x) + lb init = nn.init.Orthogonal() rnn_layer = functools.partial(nn.layers.RecurrentLayer, W_in_to_hid=init, W_hid_to_hid=init, b=nn.init.Constant(0.1), nonlinearity=nn.nonlinearities.rectify, hid_init=nn.init.Constant(0.),
# Objective l2_weight = 0.000 l2_weight_out = 0.000 def build_objective(interface_layers): # l2 regu on certain layers l2_penalty = nn.regularization.regularize_layer_params_weighted( interface_layers["regularizable"], nn.regularization.l2) # build objective return objectives.KaggleObjective(interface_layers["outputs"], penalty=l2_penalty) # Testing postprocess = postprocess.postprocess test_time_augmentations = 1000 # More augmentations since a we only use single slices tta_average_method = lambda x: np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x))) # Architecture def build_model(): ################# # Regular model # ################# input_size = data_sizes["sliced:data:singleslice:4ch"] l0 = nn.layers.InputLayer(input_size) l1a = nn.layers.dnn.Conv2DDNNLayer(l0, W=nn.init.Orthogonal("relu"), filter_size=(3,3), num_filters=64, stride=(1,1), pad="same", nonlinearity=nn.nonlinearities.rectify) l1b = nn.layers.dnn.Conv2DDNNLayer(l1a, W=nn.init.Orthogonal("relu"), filter_size=(3,3), num_filters=64, stride=(1,1), pad="same", nonlinearity=nn.nonlinearities.rectify) l1 = nn.layers.dnn.MaxPool2DDNNLayer(l1b, pool_size=(2,2), stride=(2,2))
def prodav(x, **kwargs): if len(x) == 0: return np.zeros(600) return np.cumsum(utils.norm_prod(utils.cdf_to_pdf(x)))
def geomav(x, **kwargs): if len(x) == 0: return np.zeros(600) res = np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x))) return res
} # Objective l2_weight = 0.000 l2_weight_out = 0.000 def build_objective(interface_layers): # l2 regu on certain layers l2_penalty = nn.regularization.regularize_layer_params_weighted( interface_layers["regularizable"], nn.regularization.l2) # build objective return objectives.KaggleObjective(interface_layers["outputs"], penalty=l2_penalty) # Testing postprocess = postprocess.postprocess test_time_augmentations = 100 # More augmentations since a we only use single slices tta_average_method = lambda x: np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x))) # nonlinearity putting a lower bound on it's output def lb_softplus(lb): return lambda x: nn.nonlinearities.softplus(x) + lb init = nn.init.Orthogonal() rnn_layer = functools.partial(nn.layers.RecurrentLayer, W_in_to_hid=init, W_hid_to_hid=init, b=nn.init.Constant(0.1), nonlinearity=nn.nonlinearities.rectify, hid_init=nn.init.Constant(0.),
def make_monotone_distribution_fast(distributions): return utils.pdf_to_cdf(np.clip(utils.cdf_to_pdf(distributions), 0.0, 1.0))
def optimize_expert_weights(expert_predictions, average_distribution, mask_matrix=None, targets=None, num_cross_validation_masks=2, num_folds=1, eps=1e-14, cutoff=0.01, do_optimization=True, expert_weights=None, optimal_params=None, special_average=False, *args, **kwargs): """ :param expert_predictions: experts x validation_samples x 600 x :param mask_matrix: experts x validation_samples x :param targets: validation_samples x 600 x :param average_distribution: 600 x :param eps: :return: """ if expert_weights is not None: mask_matrix = mask_matrix[expert_weights>cutoff,:] # remove expert_predictions = expert_predictions[expert_weights>cutoff,:,:] # remove NUM_EXPERTS = expert_predictions.shape[0] NUM_FILTER_PARAMETERS = 2 WINDOW_SIZE = 599 # optimizing weights X = theano.shared(expert_predictions.astype('float32')) # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600) x_coor = theano.shared(np.linspace(-(WINDOW_SIZE-1)/2, (WINDOW_SIZE-1)/2, num=WINDOW_SIZE, dtype='float32')) # targets = (NUM_VALIDATIONS, 600) NUM_VALIDATIONS = expert_predictions.shape[1] ind = theano.shared(np.zeros((NUM_VALIDATIONS,), dtype='int32')) # targets = (NUM_VALIDATIONS, 600) if optimal_params is None: params_init = np.concatenate([ np.ones((NUM_EXPERTS,), dtype='float32'), np.ones((NUM_FILTER_PARAMETERS,), dtype='float32') ]) else: params_init = optimal_params.astype('float32') params = theano.shared(params_init.astype('float32')) #params = T.vector('params', dtype='float32') # expert weights = (NUM_EXPERTS,) C = 0.0001 if not special_average: # Create theano expression # inputs: W = params[:NUM_EXPERTS] weights = T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0) preds = X.take(ind, axis=1) mask = theano.shared(mask_matrix.astype('float32')).take(ind, axis=1) # expression masked_weights = mask * weights tot_masked_weights = T.clip(masked_weights.sum(axis=0), 1e-7, utils.maxfloat) preds_weighted_masked = preds * masked_weights.dimshuffle(0, 1, 'x') cumulative_distribution = preds_weighted_masked.sum(axis=0) / tot_masked_weights.dimshuffle(0, 'x') # loss l1_loss = weights.sum() else: # calculate the weighted average for each of these experts weights = generate_information_weight_matrix(expert_predictions, average_distribution) # = (NUM_EXPERTS, NUM_VALIDATIONS, 600) weight_matrix = theano.shared((mask_matrix[:,:,None]*weights).astype('float32')) pdf = utils.cdf_to_pdf(expert_predictions) x_log = np.log(pdf) x_log[pdf<=0] = np.log(eps) # Compute the mean X_log = theano.shared(x_log.astype('float32')) # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600) X_log_i = X_log.take(ind, axis=1) w_i = weight_matrix.take(ind, axis=1) W = params[:NUM_EXPERTS] w_i = w_i * T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0, 'x') #the different predictions, are the experts geom_av_log = T.sum(X_log_i * w_i, axis=0) / (T.sum(w_i, axis=0) + eps) geom_av_log = geom_av_log - T.max(geom_av_log,axis=-1).dimshuffle(0,'x') # stabilizes rounding errors? geom_av = T.exp(geom_av_log) geom_pdf = geom_av/T.sum(geom_av,axis=-1).dimshuffle(0,'x') l1_loss = 0 cumulative_distribution = T.cumsum(geom_pdf, axis=-1) if not do_optimization: ind.set_value(list(range(NUM_VALIDATIONS))) f_eval = theano.function([], cumulative_distribution) cumulative_distribution = f_eval() return cumulative_distribution[0] else: # convert to theano_values (for regularization) t_valid = theano.shared(targets.astype('float32')) # targets = (NUM_VALIDATIONS, 600) t_train = theano.shared(targets.astype('float32')) # targets = (NUM_VALIDATIONS, 600) CRPS_train = T.mean((cumulative_distribution - t_train.take(ind, axis=0))**2) + C * l1_loss CRPS_valid = T.mean((cumulative_distribution - t_valid.take(ind, axis=0))**2) iter_optimize = theano.function([], CRPS_train, on_unused_input="ignore", updates=lasagne.updates.adam(CRPS_train, [params], 1.0)) f_val = theano.function([], CRPS_valid) def optimize_my_params(): for _ in range(40 if special_average else 100): # early stopping score = iter_optimize() result = params.get_value() return result, score if num_cross_validation_masks==0: ind.set_value(list(range(NUM_VALIDATIONS))) params.set_value(params_init) optimal_params, train_score = optimize_my_params() final_weights = -1e10 * np.ones(expert_weights.shape,) final_weights[np.where(expert_weights>cutoff)] = optimal_params[:NUM_EXPERTS] final_params = np.concatenate(( final_weights, optimal_params[NUM_EXPERTS:])) return softmax(final_weights), train_score, final_params else: final_params = [] final_losses = [] print() print() print() for fold in range(num_folds): for i_cross_validation in range(num_cross_validation_masks): print("\r\033[F\033[F\033[Fcross_validation %d/%d"%(fold*num_cross_validation_masks+i_cross_validation+1, num_folds*num_cross_validation_masks)) val_indices = get_cross_validation_indices(list(range(NUM_VALIDATIONS)), validation_index=i_cross_validation, number_of_splits=num_cross_validation_masks, rng_seed=fold, ) indices = [i for i in range(NUM_VALIDATIONS) if i not in val_indices] #out, crps, d = scipy.optimize.fmin_l_bfgs_b(f, w_init, fprime=g, pgtol=1e-09, epsilon=1e-08, maxfun=10000) ind.set_value(indices) params.set_value(params_init) result, train_score = optimize_my_params() final_params.append(result) ind.set_value(val_indices) validation_score = f_val() print(" Current train value: %.6f" % train_score) print(" Current validation value: %.6f" % validation_score) final_losses.append(validation_score) optimal_params = np.mean(final_params, axis=0) average_loss = np.mean(final_losses) expert_weights_result = softmax(optimal_params[:NUM_EXPERTS]) filter_param_result = optimal_params[NUM_EXPERTS:NUM_EXPERTS+NUM_FILTER_PARAMETERS] #print "filter param result:", filter_param_result return expert_weights_result, average_loss, optimal_params # (NUM_EXPERTS,)
def geomav(x, *args, **kwargs): x = x[0] if len(x) == 0: return np.zeros(600) res = np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x))) return res
def optimize_expert_weights(expert_predictions, average_distribution, mask_matrix=None, targets=None, num_cross_validation_masks=2, num_folds=1, eps=1e-14, cutoff=0.01, do_optimization=True, expert_weights=None, optimal_params=None, special_average=False, *args, **kwargs): """ :param expert_predictions: experts x validation_samples x 600 x :param mask_matrix: experts x validation_samples x :param targets: validation_samples x 600 x :param average_distribution: 600 x :param eps: :return: """ if expert_weights is not None: mask_matrix = mask_matrix[expert_weights>cutoff,:] # remove expert_predictions = expert_predictions[expert_weights>cutoff,:,:] # remove NUM_EXPERTS = expert_predictions.shape[0] NUM_FILTER_PARAMETERS = 2 WINDOW_SIZE = 599 # optimizing weights X = theano.shared(expert_predictions.astype('float32')) # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600) x_coor = theano.shared(np.linspace(-(WINDOW_SIZE-1)/2, (WINDOW_SIZE-1)/2, num=WINDOW_SIZE, dtype='float32')) # targets = (NUM_VALIDATIONS, 600) NUM_VALIDATIONS = expert_predictions.shape[1] ind = theano.shared(np.zeros((NUM_VALIDATIONS,), dtype='int32')) # targets = (NUM_VALIDATIONS, 600) if optimal_params is None: params_init = np.concatenate([ np.ones((NUM_EXPERTS,), dtype='float32'), np.ones((NUM_FILTER_PARAMETERS,), dtype='float32') ]) else: params_init = optimal_params.astype('float32') params = theano.shared(params_init.astype('float32')) #params = T.vector('params', dtype='float32') # expert weights = (NUM_EXPERTS,) C = 0.0001 if not special_average: # Create theano expression # inputs: W = params[:NUM_EXPERTS] weights = T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0) preds = X.take(ind, axis=1) mask = theano.shared(mask_matrix.astype('float32')).take(ind, axis=1) # expression masked_weights = mask * weights tot_masked_weights = T.clip(masked_weights.sum(axis=0), 1e-7, utils.maxfloat) preds_weighted_masked = preds * masked_weights.dimshuffle(0, 1, 'x') cumulative_distribution = preds_weighted_masked.sum(axis=0) / tot_masked_weights.dimshuffle(0, 'x') # loss l1_loss = weights.sum() else: # calculate the weighted average for each of these experts weights = generate_information_weight_matrix(expert_predictions, average_distribution) # = (NUM_EXPERTS, NUM_VALIDATIONS, 600) weight_matrix = theano.shared((mask_matrix[:,:,None]*weights).astype('float32')) pdf = utils.cdf_to_pdf(expert_predictions) x_log = np.log(pdf) x_log[pdf<=0] = np.log(eps) # Compute the mean X_log = theano.shared(x_log.astype('float32')) # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600) X_log_i = X_log.take(ind, axis=1) w_i = weight_matrix.take(ind, axis=1) W = params[:NUM_EXPERTS] w_i = w_i * T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0, 'x') #the different predictions, are the experts geom_av_log = T.sum(X_log_i * w_i, axis=0) / (T.sum(w_i, axis=0) + eps) geom_av_log = geom_av_log - T.max(geom_av_log,axis=-1).dimshuffle(0,'x') # stabilizes rounding errors? geom_av = T.exp(geom_av_log) geom_pdf = geom_av/T.sum(geom_av,axis=-1).dimshuffle(0,'x') l1_loss = 0 cumulative_distribution = T.cumsum(geom_pdf, axis=-1) if not do_optimization: ind.set_value(range(NUM_VALIDATIONS)) f_eval = theano.function([], cumulative_distribution) cumulative_distribution = f_eval() return cumulative_distribution[0] else: # convert to theano_values (for regularization) t_valid = theano.shared(targets.astype('float32')) # targets = (NUM_VALIDATIONS, 600) t_train = theano.shared(targets.astype('float32')) # targets = (NUM_VALIDATIONS, 600) CRPS_train = T.mean((cumulative_distribution - t_train.take(ind, axis=0))**2) + C * l1_loss CRPS_valid = T.mean((cumulative_distribution - t_valid.take(ind, axis=0))**2) iter_optimize = theano.function([], CRPS_train, on_unused_input="ignore", updates=lasagne.updates.adam(CRPS_train, [params], 1.0)) f_val = theano.function([], CRPS_valid) def optimize_my_params(): for _ in xrange(40 if special_average else 100): # early stopping score = iter_optimize() result = params.get_value() return result, score if num_cross_validation_masks==0: ind.set_value(range(NUM_VALIDATIONS)) params.set_value(params_init) optimal_params, train_score = optimize_my_params() final_weights = -1e10 * np.ones(expert_weights.shape,) final_weights[np.where(expert_weights>cutoff)] = optimal_params[:NUM_EXPERTS] final_params = np.concatenate(( final_weights, optimal_params[NUM_EXPERTS:])) return softmax(final_weights), train_score, final_params else: final_params = [] final_losses = [] print print print for fold in xrange(num_folds): for i_cross_validation in xrange(num_cross_validation_masks): print "\r\033[F\033[F\033[Fcross_validation %d/%d"%(fold*num_cross_validation_masks+i_cross_validation+1, num_folds*num_cross_validation_masks) val_indices = get_cross_validation_indices(range(NUM_VALIDATIONS), validation_index=i_cross_validation, number_of_splits=num_cross_validation_masks, rng_seed=fold, ) indices = [i for i in range(NUM_VALIDATIONS) if i not in val_indices] #out, crps, d = scipy.optimize.fmin_l_bfgs_b(f, w_init, fprime=g, pgtol=1e-09, epsilon=1e-08, maxfun=10000) ind.set_value(indices) params.set_value(params_init) result, train_score = optimize_my_params() final_params.append(result) ind.set_value(val_indices) validation_score = f_val() print " Current train value: %.6f" % train_score print " Current validation value: %.6f" % validation_score final_losses.append(validation_score) optimal_params = np.mean(final_params, axis=0) average_loss = np.mean(final_losses) expert_weights_result = softmax(optimal_params[:NUM_EXPERTS]) filter_param_result = optimal_params[NUM_EXPERTS:NUM_EXPERTS+NUM_FILTER_PARAMETERS] #print "filter param result:", filter_param_result return expert_weights_result, average_loss, optimal_params # (NUM_EXPERTS,)