Python cdf_to_pdf Examples, utils.cdf_to_pdf Python Examples

Example #1

0

Show file

File: merge_predictions_jeroen.py Project: Keesiu/meta-kaggle

def weighted_average_method(prediction_matrix,
                            average,
                            eps=1e-14,
                            expert_weights=None,
                            *args,
                            **kwargs):
    if len(prediction_matrix) == 0:
        return np.zeros(600)
    prediction_matrix = prediction_matrix[None, :, :]
    weights = generate_information_weight_matrix(prediction_matrix,
                                                 average,
                                                 expert_weights=expert_weights)
    assert np.isfinite(weights).all()
    pdf = utils.cdf_to_pdf(prediction_matrix)
    if True:
        x_log = np.log(pdf)
        x_log[pdf <= 0] = np.log(eps)
        # Compute the mean
        geom_av_log = np.sum(x_log * weights, axis=(0, 1)) / (
            np.sum(weights, axis=(0, 1)) + eps)
        geom_av_log = geom_av_log - np.max(
            geom_av_log)  # stabilizes rounding errors?

        geom_av = np.exp(geom_av_log)
        res = np.cumsum(geom_av / np.sum(geom_av))
    else:
        res = np.cumsum(
            np.sum(pdf * weights, axis=(0, 1)) / np.sum(weights, axis=(0, 1)))
    return res

Example #2

0

Show file

File: merge_predictions_jeroen.py Project: RickBoss/Data-Science-Bowl-1

def generate_information_weight_matrix(expert_predictions, average_distribution, eps=1e-14, KL_weight = 1.0, cross_entropy_weight=1.0, expert_weights=None):
    pdf = utils.cdf_to_pdf(expert_predictions)
    average_pdf = utils.cdf_to_pdf(average_distribution)
    average_pdf[average_pdf<=0] = np.min(average_pdf[average_pdf>0])/2  # KL is not defined when Q=0 and P is not
    inside = pdf * (np.log(pdf) - np.log(average_pdf[None,None,:]))
    inside[pdf<=0] = 0  # (xlog(x) of zero is zero)
    KL_distance_from_average = np.sum(inside, axis=2)  # (NUM_EXPERTS, NUM_VALIDATIONS)
    assert np.isfinite(KL_distance_from_average).all()

    clipped_predictions = np.clip(expert_predictions, 0.0, 1.0)
    cross_entropy_per_sample = - (    average_distribution[None,None,:]  * np.log(   clipped_predictions+eps) +\
                                  (1.-average_distribution[None,None,:]) * np.log(1.-clipped_predictions+eps) )

    cross_entropy_per_sample[cross_entropy_per_sample<0] = 0  # (NUM_EXPERTS, NUM_VALIDATIONS, 600)
    assert np.isfinite(cross_entropy_per_sample).all()
    if expert_weights is None:
        weights = cross_entropy_weight*cross_entropy_per_sample + KL_weight*KL_distance_from_average[:,:,None]  #+  # <- is too big?
    else:
        weights = (cross_entropy_weight*cross_entropy_per_sample + KL_weight*KL_distance_from_average[:,:,None]) * expert_weights[:,None,None]  #+  # <- is too big?

    #make sure the ones without predictions don't get weight, unless absolutely necessary
    weights[np.where((expert_predictions == average_distribution[None,None,:]).all(axis=2))] = 1e-14
    return weights

Example #3

0

Show file

File: merge_predictions.py Project: Keesiu/meta-kaggle

def weighted_geom_no_entr(prediction_matrix, average, eps=1e-14, expert_weights=None, *args, **kwargs):
    if len(prediction_matrix.flatten()) == 0:
        return np.zeros(600)
    weights = generate_information_weight_matrix(prediction_matrix, average, expert_weights=expert_weights, use_entropy=False, *args, **kwargs)
    assert np.isfinite(weights).all()
    pdf = utils.cdf_to_pdf(prediction_matrix)
    x_log = np.log(pdf)
    x_log[pdf<=0] = np.log(eps)
    # Compute the mean
    geom_av_log = np.sum(x_log * weights, axis=(0,1)) / (np.sum(weights, axis=(0,1)) + eps)
    geom_av_log = geom_av_log - np.max(geom_av_log)  # stabilizes rounding errors?

    geom_av = np.exp(geom_av_log)
    res = np.cumsum(geom_av/np.sum(geom_av))
    return res

Example #4

0

Show file

File: merge_predictions.py Project: 317070/kaggle-heart

def weighted_geom_method(prediction_matrix, average, eps=1e-14, expert_weights=None, *args, **kwargs):
    if len(prediction_matrix.flatten()) == 0:
        return np.zeros(600)
    weights = generate_information_weight_matrix(prediction_matrix, average, expert_weights=expert_weights, *args, **kwargs)
    assert np.isfinite(weights).all()
    pdf = utils.cdf_to_pdf(prediction_matrix)
    x_log = np.log(pdf)
    x_log[pdf<=0] = np.log(eps)
    # Compute the mean
    geom_av_log = np.sum(x_log * weights, axis=(0,1)) / (np.sum(weights, axis=(0,1)) + eps)
    geom_av_log = geom_av_log - np.max(geom_av_log)  # stabilizes rounding errors?

    geom_av = np.exp(geom_av_log)
    res = np.cumsum(geom_av/np.sum(geom_av))
    return res

Example #5

0

Show file

File: merge_predictions_ira.py Project: Keesiu/meta-kaggle

def geomav(x):
    if len(x) == 0:
        return np.zeros(600)
    res = np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x)))
    return res

Example #6

0

Show file

File: je_meta_fixedaggr_jsc80leakyconv_augzoombright_short.py Project: RickBoss/Data-Science-Bowl-1


def build_objective(interface_layers):
    # l2 regu on certain layers
    l2_penalty = nn.regularization.regularize_layer_params_weighted(
        interface_layers["regularizable"], nn.regularization.l2)
    # build objective
    return objectives.KaggleObjective(interface_layers["outputs"],
                                      penalty=l2_penalty)


# Testing
postprocess = postprocess.postprocess
test_time_augmentations = 100  # More augmentations since a we only use single slices
tta_average_method = lambda x: np.cumsum(
    utils.norm_geometric_average(utils.cdf_to_pdf(x)))


# nonlinearity putting a lower bound on it's output
def lb_softplus(lb):
    return lambda x: nn.nonlinearities.softplus(x) + lb


init = nn.init.Orthogonal()

rnn_layer = functools.partial(nn.layers.RecurrentLayer,
                              W_in_to_hid=init,
                              W_hid_to_hid=init,
                              b=nn.init.Constant(0.1),
                              nonlinearity=nn.nonlinearities.rectify,
                              hid_init=nn.init.Constant(0.),

Example #7

0

Show file


# Objective
l2_weight = 0.000
l2_weight_out = 0.000
def build_objective(interface_layers):
    # l2 regu on certain layers
    l2_penalty = nn.regularization.regularize_layer_params_weighted(
        interface_layers["regularizable"], nn.regularization.l2)
    # build objective
    return objectives.KaggleObjective(interface_layers["outputs"], penalty=l2_penalty)

# Testing
postprocess = postprocess.postprocess
test_time_augmentations = 1000  # More augmentations since a we only use single slices
tta_average_method = lambda x: np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x)))

# Architecture
def build_model():

    #################
    # Regular model #
    #################
    input_size = data_sizes["sliced:data:singleslice:4ch"]

    l0 = nn.layers.InputLayer(input_size)

    l1a = nn.layers.dnn.Conv2DDNNLayer(l0,  W=nn.init.Orthogonal("relu"), filter_size=(3,3), num_filters=64, stride=(1,1), pad="same", nonlinearity=nn.nonlinearities.rectify)
    l1b = nn.layers.dnn.Conv2DDNNLayer(l1a, W=nn.init.Orthogonal("relu"), filter_size=(3,3), num_filters=64, stride=(1,1), pad="same", nonlinearity=nn.nonlinearities.rectify)
    l1 = nn.layers.dnn.MaxPool2DDNNLayer(l1b, pool_size=(2,2), stride=(2,2))

Example #8

0

Show file

File: merge_predictions_jeroen.py Project: RickBoss/Data-Science-Bowl-1

def prodav(x, **kwargs):
    if len(x) == 0:
        return np.zeros(600)
    return np.cumsum(utils.norm_prod(utils.cdf_to_pdf(x)))

Example #9

0

Show file

File: merge_predictions_jeroen.py Project: 317070/kaggle-heart

def geomav(x, **kwargs):
    if len(x) == 0:
        return np.zeros(600)
    res = np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x)))
    return res

Example #10

0

Show file

File: je_os_segmentandintegrate_noreg_bn.py Project: 317070/kaggle-heart

}

# Objective
l2_weight = 0.000
l2_weight_out = 0.000
def build_objective(interface_layers):
    # l2 regu on certain layers
    l2_penalty = nn.regularization.regularize_layer_params_weighted(
        interface_layers["regularizable"], nn.regularization.l2)
    # build objective
    return objectives.KaggleObjective(interface_layers["outputs"], penalty=l2_penalty)

# Testing
postprocess = postprocess.postprocess
test_time_augmentations = 100  # More augmentations since a we only use single slices
tta_average_method = lambda x: np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x)))


# nonlinearity putting a lower bound on it's output
def lb_softplus(lb):
    return lambda x: nn.nonlinearities.softplus(x) + lb


init = nn.init.Orthogonal()

rnn_layer = functools.partial(nn.layers.RecurrentLayer,
    W_in_to_hid=init,
    W_hid_to_hid=init,
    b=nn.init.Constant(0.1),
    nonlinearity=nn.nonlinearities.rectify,
    hid_init=nn.init.Constant(0.),

Example #11

0

Show file

File: postprocess.py Project: 317070/kaggle-heart

def make_monotone_distribution_fast(distributions):
    return utils.pdf_to_cdf(np.clip(utils.cdf_to_pdf(distributions), 0.0, 1.0))

Example #12

0

Show file

File: postprocess.py Project: Keesiu/meta-kaggle

def make_monotone_distribution_fast(distributions):
    return utils.pdf_to_cdf(np.clip(utils.cdf_to_pdf(distributions), 0.0, 1.0))

Example #13

0

Show file

File: merge_predictions.py Project: Keesiu/meta-kaggle

def optimize_expert_weights(expert_predictions,
                            average_distribution,
                            mask_matrix=None,
                            targets=None,
                            num_cross_validation_masks=2,
                            num_folds=1,
                            eps=1e-14,
                            cutoff=0.01,
                            do_optimization=True,
                            expert_weights=None,
                            optimal_params=None,
                            special_average=False,
                            *args, **kwargs):
    """
    :param expert_predictions: experts x validation_samples x 600 x
    :param mask_matrix: experts x validation_samples x
    :param targets: validation_samples x 600 x
    :param average_distribution: 600 x
    :param eps:
    :return:
    """
    if expert_weights is not None:
        mask_matrix = mask_matrix[expert_weights>cutoff,:]  # remove
        expert_predictions = expert_predictions[expert_weights>cutoff,:,:]  # remove

    NUM_EXPERTS = expert_predictions.shape[0]
    NUM_FILTER_PARAMETERS = 2
    WINDOW_SIZE = 599

    # optimizing weights
    X = theano.shared(expert_predictions.astype('float32'))  # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600)
    x_coor = theano.shared(np.linspace(-(WINDOW_SIZE-1)/2, (WINDOW_SIZE-1)/2, num=WINDOW_SIZE, dtype='float32'))  # targets = (NUM_VALIDATIONS, 600)

    NUM_VALIDATIONS = expert_predictions.shape[1]
    ind = theano.shared(np.zeros((NUM_VALIDATIONS,), dtype='int32'))  # targets = (NUM_VALIDATIONS, 600)

    if optimal_params is None:
        params_init = np.concatenate([ np.ones((NUM_EXPERTS,), dtype='float32'),
                                       np.ones((NUM_FILTER_PARAMETERS,), dtype='float32') ])
    else:
        params_init = optimal_params.astype('float32')

    params = theano.shared(params_init.astype('float32'))
    #params = T.vector('params', dtype='float32')  # expert weights = (NUM_EXPERTS,)

    C = 0.0001
    if not special_average:
        # Create theano expression
        # inputs:
        W = params[:NUM_EXPERTS]
        weights = T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0)
        preds = X.take(ind, axis=1)
        mask = theano.shared(mask_matrix.astype('float32')).take(ind, axis=1)
        # expression
        masked_weights = mask * weights
        tot_masked_weights = T.clip(masked_weights.sum(axis=0), 1e-7, utils.maxfloat)
        preds_weighted_masked = preds * masked_weights.dimshuffle(0, 1, 'x')
        cumulative_distribution = preds_weighted_masked.sum(axis=0) / tot_masked_weights.dimshuffle(0, 'x')
        # loss
        l1_loss = weights.sum()
    else:
        # calculate the weighted average for each of these experts
        weights = generate_information_weight_matrix(expert_predictions, average_distribution)  # = (NUM_EXPERTS, NUM_VALIDATIONS, 600)
        weight_matrix = theano.shared((mask_matrix[:,:,None]*weights).astype('float32'))
        pdf = utils.cdf_to_pdf(expert_predictions)
        x_log = np.log(pdf)
        x_log[pdf<=0] = np.log(eps)
        # Compute the mean
        X_log = theano.shared(x_log.astype('float32'))  # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600)
        X_log_i = X_log.take(ind, axis=1)
        w_i = weight_matrix.take(ind, axis=1)

        W = params[:NUM_EXPERTS]
        w_i = w_i * T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0, 'x')

        #the different predictions, are the experts
        geom_av_log = T.sum(X_log_i * w_i, axis=0) / (T.sum(w_i, axis=0) + eps)
        geom_av_log = geom_av_log - T.max(geom_av_log,axis=-1).dimshuffle(0,'x')  # stabilizes rounding errors?

        geom_av = T.exp(geom_av_log)

        geom_pdf = geom_av/T.sum(geom_av,axis=-1).dimshuffle(0,'x')
        l1_loss = 0
        cumulative_distribution = T.cumsum(geom_pdf, axis=-1)

    if not do_optimization:
        ind.set_value(list(range(NUM_VALIDATIONS)))
        f_eval = theano.function([], cumulative_distribution)
        cumulative_distribution = f_eval()
        return cumulative_distribution[0]
    else:
        # convert to theano_values (for regularization)
        t_valid = theano.shared(targets.astype('float32'))  # targets = (NUM_VALIDATIONS, 600)
        t_train = theano.shared(targets.astype('float32'))  # targets = (NUM_VALIDATIONS, 600)

    CRPS_train = T.mean((cumulative_distribution - t_train.take(ind, axis=0))**2) + C * l1_loss
    CRPS_valid = T.mean((cumulative_distribution - t_valid.take(ind, axis=0))**2)

    iter_optimize = theano.function([], CRPS_train, on_unused_input="ignore", updates=lasagne.updates.adam(CRPS_train, [params], 1.0))
    f_val = theano.function([], CRPS_valid)

    def optimize_my_params():
        for _ in range(40 if special_average else 100):  # early stopping
            score = iter_optimize()
        result = params.get_value()
        return result, score


    if num_cross_validation_masks==0:

        ind.set_value(list(range(NUM_VALIDATIONS)))
        params.set_value(params_init)
        optimal_params, train_score = optimize_my_params()
        final_weights = -1e10 * np.ones(expert_weights.shape,)
        final_weights[np.where(expert_weights>cutoff)] = optimal_params[:NUM_EXPERTS]
        final_params = np.concatenate(( final_weights, optimal_params[NUM_EXPERTS:]))
        return softmax(final_weights), train_score, final_params
    else:
        final_params = []
        final_losses = []
        print()
        print()
        print()
        for fold in range(num_folds):
            for i_cross_validation in range(num_cross_validation_masks):
                print("\r\033[F\033[F\033[Fcross_validation %d/%d"%(fold*num_cross_validation_masks+i_cross_validation+1, num_folds*num_cross_validation_masks))
                val_indices = get_cross_validation_indices(list(range(NUM_VALIDATIONS)),
                                                       validation_index=i_cross_validation,
                                                       number_of_splits=num_cross_validation_masks,
                                                       rng_seed=fold,
                                                       )

                indices = [i for i in range(NUM_VALIDATIONS) if i not in val_indices]


                #out, crps, d = scipy.optimize.fmin_l_bfgs_b(f, w_init, fprime=g, pgtol=1e-09, epsilon=1e-08, maxfun=10000)
                ind.set_value(indices)
                params.set_value(params_init)
                result, train_score = optimize_my_params()

                final_params.append(result)

                ind.set_value(val_indices)
                validation_score = f_val()
                print("              Current train value: %.6f" % train_score)
                print("         Current validation value: %.6f" % validation_score)
                final_losses.append(validation_score)

        optimal_params = np.mean(final_params, axis=0)
        average_loss   = np.mean(final_losses)

        expert_weights_result = softmax(optimal_params[:NUM_EXPERTS])
        filter_param_result = optimal_params[NUM_EXPERTS:NUM_EXPERTS+NUM_FILTER_PARAMETERS]
        #print "filter param result:", filter_param_result

        return expert_weights_result, average_loss, optimal_params  # (NUM_EXPERTS,)

Example #14

0

Show file

File: merge_predictions.py Project: Keesiu/meta-kaggle

def geomav(x, *args, **kwargs):
    x = x[0]
    if len(x) == 0:
        return np.zeros(600)
    res = np.cumsum(utils.norm_geometric_average(utils.cdf_to_pdf(x)))
    return res

Example #15

0

Show file

File: merge_predictions.py Project: 317070/kaggle-heart

def optimize_expert_weights(expert_predictions,
                            average_distribution,
                            mask_matrix=None,
                            targets=None,
                            num_cross_validation_masks=2,
                            num_folds=1,
                            eps=1e-14,
                            cutoff=0.01,
                            do_optimization=True,
                            expert_weights=None,
                            optimal_params=None,
                            special_average=False,
                            *args, **kwargs):
    """
    :param expert_predictions: experts x validation_samples x 600 x
    :param mask_matrix: experts x validation_samples x
    :param targets: validation_samples x 600 x
    :param average_distribution: 600 x
    :param eps:
    :return:
    """
    if expert_weights is not None:
        mask_matrix = mask_matrix[expert_weights>cutoff,:]  # remove
        expert_predictions = expert_predictions[expert_weights>cutoff,:,:]  # remove

    NUM_EXPERTS = expert_predictions.shape[0]
    NUM_FILTER_PARAMETERS = 2
    WINDOW_SIZE = 599

    # optimizing weights
    X = theano.shared(expert_predictions.astype('float32'))  # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600)
    x_coor = theano.shared(np.linspace(-(WINDOW_SIZE-1)/2, (WINDOW_SIZE-1)/2, num=WINDOW_SIZE, dtype='float32'))  # targets = (NUM_VALIDATIONS, 600)

    NUM_VALIDATIONS = expert_predictions.shape[1]
    ind = theano.shared(np.zeros((NUM_VALIDATIONS,), dtype='int32'))  # targets = (NUM_VALIDATIONS, 600)

    if optimal_params is None:
        params_init = np.concatenate([ np.ones((NUM_EXPERTS,), dtype='float32'),
                                       np.ones((NUM_FILTER_PARAMETERS,), dtype='float32') ])
    else:
        params_init = optimal_params.astype('float32')

    params = theano.shared(params_init.astype('float32'))
    #params = T.vector('params', dtype='float32')  # expert weights = (NUM_EXPERTS,)

    C = 0.0001
    if not special_average:
        # Create theano expression
        # inputs:
        W = params[:NUM_EXPERTS]
        weights = T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0)
        preds = X.take(ind, axis=1)
        mask = theano.shared(mask_matrix.astype('float32')).take(ind, axis=1)
        # expression
        masked_weights = mask * weights
        tot_masked_weights = T.clip(masked_weights.sum(axis=0), 1e-7, utils.maxfloat)
        preds_weighted_masked = preds * masked_weights.dimshuffle(0, 1, 'x')
        cumulative_distribution = preds_weighted_masked.sum(axis=0) / tot_masked_weights.dimshuffle(0, 'x')
        # loss
        l1_loss = weights.sum()
    else:
        # calculate the weighted average for each of these experts
        weights = generate_information_weight_matrix(expert_predictions, average_distribution)  # = (NUM_EXPERTS, NUM_VALIDATIONS, 600)
        weight_matrix = theano.shared((mask_matrix[:,:,None]*weights).astype('float32'))
        pdf = utils.cdf_to_pdf(expert_predictions)
        x_log = np.log(pdf)
        x_log[pdf<=0] = np.log(eps)
        # Compute the mean
        X_log = theano.shared(x_log.astype('float32'))  # source predictions = (NUM_EXPERTS, NUM_VALIDATIONS, 600)
        X_log_i = X_log.take(ind, axis=1)
        w_i = weight_matrix.take(ind, axis=1)

        W = params[:NUM_EXPERTS]
        w_i = w_i * T.nnet.softmax(W.dimshuffle('x',0)).dimshuffle(1, 0, 'x')

        #the different predictions, are the experts
        geom_av_log = T.sum(X_log_i * w_i, axis=0) / (T.sum(w_i, axis=0) + eps)
        geom_av_log = geom_av_log - T.max(geom_av_log,axis=-1).dimshuffle(0,'x')  # stabilizes rounding errors?

        geom_av = T.exp(geom_av_log)

        geom_pdf = geom_av/T.sum(geom_av,axis=-1).dimshuffle(0,'x')
        l1_loss = 0
        cumulative_distribution = T.cumsum(geom_pdf, axis=-1)

    if not do_optimization:
        ind.set_value(range(NUM_VALIDATIONS))
        f_eval = theano.function([], cumulative_distribution)
        cumulative_distribution = f_eval()
        return cumulative_distribution[0]
    else:
        # convert to theano_values (for regularization)
        t_valid = theano.shared(targets.astype('float32'))  # targets = (NUM_VALIDATIONS, 600)
        t_train = theano.shared(targets.astype('float32'))  # targets = (NUM_VALIDATIONS, 600)

    CRPS_train = T.mean((cumulative_distribution - t_train.take(ind, axis=0))**2) + C * l1_loss
    CRPS_valid = T.mean((cumulative_distribution - t_valid.take(ind, axis=0))**2)

    iter_optimize = theano.function([], CRPS_train, on_unused_input="ignore", updates=lasagne.updates.adam(CRPS_train, [params], 1.0))
    f_val = theano.function([], CRPS_valid)

    def optimize_my_params():
        for _ in xrange(40 if special_average else 100):  # early stopping
            score = iter_optimize()
        result = params.get_value()
        return result, score


    if num_cross_validation_masks==0:

        ind.set_value(range(NUM_VALIDATIONS))
        params.set_value(params_init)
        optimal_params, train_score = optimize_my_params()
        final_weights = -1e10 * np.ones(expert_weights.shape,)
        final_weights[np.where(expert_weights>cutoff)] = optimal_params[:NUM_EXPERTS]
        final_params = np.concatenate(( final_weights, optimal_params[NUM_EXPERTS:]))
        return softmax(final_weights), train_score, final_params
    else:
        final_params = []
        final_losses = []
        print
        print
        print
        for fold in xrange(num_folds):
            for i_cross_validation in xrange(num_cross_validation_masks):
                print "\r\033[F\033[F\033[Fcross_validation %d/%d"%(fold*num_cross_validation_masks+i_cross_validation+1, num_folds*num_cross_validation_masks)
                val_indices = get_cross_validation_indices(range(NUM_VALIDATIONS),
                                                       validation_index=i_cross_validation,
                                                       number_of_splits=num_cross_validation_masks,
                                                       rng_seed=fold,
                                                       )

                indices = [i for i in range(NUM_VALIDATIONS) if i not in val_indices]


                #out, crps, d = scipy.optimize.fmin_l_bfgs_b(f, w_init, fprime=g, pgtol=1e-09, epsilon=1e-08, maxfun=10000)
                ind.set_value(indices)
                params.set_value(params_init)
                result, train_score = optimize_my_params()

                final_params.append(result)

                ind.set_value(val_indices)
                validation_score = f_val()
                print "              Current train value: %.6f" % train_score
                print "         Current validation value: %.6f" % validation_score
                final_losses.append(validation_score)

        optimal_params = np.mean(final_params, axis=0)
        average_loss   = np.mean(final_losses)

        expert_weights_result = softmax(optimal_params[:NUM_EXPERTS])
        filter_param_result = optimal_params[NUM_EXPERTS:NUM_EXPERTS+NUM_FILTER_PARAMETERS]
        #print "filter param result:", filter_param_result

        return expert_weights_result, average_loss, optimal_params  # (NUM_EXPERTS,)