Beispiel #1
0
    def raw_loss(score, nidx, tidx, specweight):
        # nidx = V x K
        # tidx = V x 1
        # specweight: V x 1
        # score: V x K-1 x 1
        n_tidxs = SelectWithDefault(nidx, tidx, -1)  # V x K x 1
        tf.assert_equal(tidx,
                        n_tidxs[:,
                                0])  #check that the nidxs have self-reference

        #int32 goes up to -2.something e-9
        n_tidxs = tf.where(n_tidxs < 0, -1000000000,
                           n_tidxs)  #set to -V for noise

        n_active = tf.where(nidx >= 0, tf.ones_like(nidx, dtype='float32'),
                            0.)[:, 1:]  # V x K-1
        specweight = tf.clip_by_value(specweight, 0., 1.)
        n_specw = SelectWithDefault(nidx, specweight, -1.)[:, 1:, 0]  # V x K-1

        #now this will be false for all noise
        n_sameasprobe = tf.cast(tf.expand_dims(tidx, axis=2) == n_tidxs[:,
                                                                        1:, :],
                                dtype='float32')  # V x K-1 x 1

        lossval = tf.keras.losses.binary_crossentropy(n_sameasprobe,
                                                      score)  # V x K-1
        lossval *= n_active
        lossval *= (1. - 0.9 * n_specw
                    )  #reduce spectators, but don't remove them

        lossval = tf.math.divide_no_nan(tf.reduce_sum(lossval, axis=1),
                                        tf.reduce_sum(n_active, axis=1))  # V
        lossval *= (1. - 0.9 * specweight[:, 0])  #V
        return tf.reduce_mean(lossval)
Beispiel #2
0
    def _rs_loop(coords, tidx):
        Msel, M_not, N_per_obj = CreateMidx(tidx,
                                            calc_m_not=True)  #N_per_obj: K x 1
        if N_per_obj is None:
            return 0., 0., 0.  #no objects, discard
        N_per_obj = tf.cast(N_per_obj, dtype='float32')
        N_tot = tf.cast(tidx.shape[0], dtype='float32')
        K = tf.cast(Msel.shape[0], dtype='float32')

        padmask_m = SelectWithDefault(Msel, tf.ones_like(coords[:, 0:1]),
                                      0.)  # K x V' x 1
        coords_m = SelectWithDefault(Msel, coords, 0.)  # K x V' x C
        #create average
        av_coords_m = tf.reduce_sum(coords_m * padmask_m, axis=1)  # K x C
        av_coords_m = tf.math.divide_no_nan(av_coords_m, N_per_obj)  #K x C
        av_coords_m = tf.expand_dims(av_coords_m, axis=1)  ##K x 1 x C

        distloss = tf.reduce_sum((av_coords_m - coords_m)**2, axis=2)
        distloss = tf.math.log(tf.math.exp(1.) * distloss +
                               1.) * padmask_m[:, :, 0]
        distloss = tf.math.divide_no_nan(tf.reduce_sum(distloss, axis=1),
                                         N_per_obj[:, 0])  #K
        distloss = tf.math.divide_no_nan(tf.reduce_sum(distloss), K)

        repdist = tf.expand_dims(coords, axis=0) - av_coords_m  #K x V x C
        repdist = tf.reduce_sum(repdist**2, axis=-1, keepdims=True)  #K x V x 1
        reploss = M_not * tf.exp(-repdist)  #K x V x 1
        #downweight noise
        reploss *= tf.expand_dims(
            (1. - 0.9 * tf.cast(tidx < 0, dtype='float32')), axis=0)
        reploss = tf.reduce_sum(reploss, axis=1) / (N_tot - N_per_obj)  #K x 1
        reploss = tf.reduce_sum(reploss) / (K + 1e-3)

        return distloss + reploss, distloss, reploss
Beispiel #3
0
def AccumulateKnn(distances, features, indices, mean_and_max=True):
    '''
    
    .Output("out_features: float32")
    .Output("out_max_idxs: int32");
    
    
    Assumes that neighbour indices can be padded with -1, but not mixed, e.g. [1,4,-1,2] needs to be [1,4,2,-1]
    Other than the padding, the indices must be unique
    
    '''
    #compatibility
    distances = tf.exp(-distances)

    if not gl.acc_ops_use_tf_gradients:
        return _accknn_op.AccumulateKnn(distances=distances,
                                        features=features,
                                        indices=indices,
                                        n_moments=0,
                                        mean_and_max=mean_and_max)

    distances = tf.expand_dims(distances, axis=2)  #V x K x 1
    nfeat = SelectWithDefault(indices, features, 0.)  # V x K x F
    wfeat = distances * nfeat
    fmean = tf.reduce_mean(wfeat, axis=1)  # V x F
    fmax = tf.reduce_max(wfeat, axis=1)
    fout = fmean
    if mean_and_max:
        fout = tf.concat([fmean, fmax], axis=1)
    return fout, None
Beispiel #4
0
def AccumulateLinKnn(weights, features, indices, mean_and_max=True):
    '''
    Accumulates neighbour features with linear weights (not exp(-w) as AccumulateKnn)
    '''
    if not gl.acc_ops_use_tf_gradients:
        return _accknn_op.AccumulateKnn(distances=weights,
                                        features=features,
                                        indices=indices,
                                        n_moments=0,
                                        mean_and_max=mean_and_max)

    weights = tf.expand_dims(weights, axis=2)  #V x K x 1
    nfeat = SelectWithDefault(indices, features, 0.)  # V x K x F
    wfeat = weights * nfeat
    fmean = tf.reduce_mean(wfeat, axis=1)  # V x F
    fmax = tf.reduce_max(wfeat, axis=1)
    fout = fmean
    if mean_and_max:
        fout = tf.concat([fmean, fmax], axis=1)
    return fout, None
Beispiel #5
0
    def raw_loss(score, nidx, tidxs, specweights):
        # score: V x 1
        # nidx: V x K
        # tidxs: V x 1
        # specweight: V x 1

        n_tidxs = SelectWithDefault(nidx, tidxs, -1)[:, :, 0]  # V x K
        tf.assert_equal(
            tidxs, n_tidxs[:, 0:1]
        )  #sanity check to make sure the self reference is in the nidxs
        n_tidxs = tf.where(n_tidxs < 0, -10, n_tidxs)  #set noise to -10

        #the actual check
        n_good = tf.cast(n_tidxs == tidxs,
                         dtype='float32')  #noise is always bad

        #downweight spectators but don't set them to zero
        n_active = tf.where(nidx >= 0, tf.ones_like(nidx, dtype='float32'),
                            0.)  # V x K
        truthscore = tf.math.divide_no_nan(
            tf.reduce_sum(n_good, axis=1, keepdims=True),
            tf.reduce_sum(n_active, axis=1, keepdims=True))  #V x 1
        #cut at 90% same
        truthscore = tf.where(truthscore > 0.9, 1., truthscore * 0.)  #V x 1

        lossval = tf.keras.losses.binary_crossentropy(truthscore, score)  #V

        specweights = specweights[:, 0]  #V
        isnotnoise = tf.cast(tidxs >= 0, dtype='float32')[:, 0]  #V
        obj_lossval = tf.math.divide_no_nan(
            tf.reduce_sum(specweights * isnotnoise * lossval),
            tf.reduce_sum(specweights * isnotnoise))
        noise_lossval = tf.math.divide_no_nan(
            tf.reduce_sum((1. - isnotnoise) * lossval),
            tf.reduce_sum(1. - isnotnoise))

        lossval = obj_lossval + 0.1 * noise_lossval  #noise doesn't really matter so much

        return lossval
Beispiel #6
0
    def raw_loss(dist, nidxs, tidxs, specweight, print_loss, name):

        sel_tidxs = SelectWithDefault(nidxs, tidxs, -1)[:, :, 0]
        sel_spec = SelectWithDefault(nidxs, specweight, 1.)[:, :, 0]
        active = tf.where(nidxs >= 0, tf.ones_like(dist), 0.)
        notspecmask = 1.  #(1. - 0.5*sel_spec)#only reduce spec #tf.where(sel_spec>0, 0., tf.ones_like(dist))

        probe_is_notnoise = tf.cast(tidxs >= 0, dtype='float32')[:, 0]  #V
        notnoisemask = tf.where(sel_tidxs < 0, 0., tf.ones_like(dist))
        notnoiseweight = notnoisemask + (1. - notnoisemask) * 0.01
        #notspecmask *= notnoisemask#noise can never be spec
        #mask spectators
        sameasprobe = tf.cast(sel_tidxs[:, 0:1] == sel_tidxs, dtype='float32')
        #sameasprobe *= notnoisemask #always push away noise, also from each other

        #only not noise can be attractive
        attmask = sameasprobe * notspecmask * active
        repmask = (1. - sameasprobe) * notspecmask * active

        attr = tf.math.log(tf.math.exp(1.) * dist + 1.) * attmask
        rep = tf.exp(
            -dist
        ) * repmask * notnoiseweight  # 1./(dist+1.) * repmask #2.*tf.exp(-3.16*tf.sqrt(dist+1e-6)) * repmask  #1./(dist+0.1)
        nattneigh = tf.reduce_sum(attmask, axis=1)
        nrepneigh = tf.reduce_sum(repmask, axis=1)

        attloss = probe_is_notnoise * tf.reduce_sum(
            attr, axis=1
        )  #tf.math.divide_no_nan(tf.reduce_sum(attr,axis=1), nattneigh)#same is always 0
        attloss = tf.math.divide_no_nan(attloss, nattneigh)
        reploss = probe_is_notnoise * tf.reduce_sum(
            rep, axis=1
        )  #tf.math.divide_no_nan(tf.reduce_sum(rep,axis=1), nrepneigh)
        reploss = tf.math.divide_no_nan(reploss, nrepneigh)
        #noise does not actively contribute
        lossval = attloss + reploss
        lossval = tf.math.divide_no_nan(
            tf.reduce_sum(probe_is_notnoise * lossval),
            tf.reduce_sum(probe_is_notnoise))

        if print_loss:
            avattdist = probe_is_notnoise * tf.math.divide_no_nan(
                tf.reduce_sum(attmask * tf.sqrt(dist), axis=1), nattneigh)
            avattdist = tf.reduce_sum(avattdist) / tf.reduce_sum(
                probe_is_notnoise)

            avrepdist = probe_is_notnoise * tf.math.divide_no_nan(
                tf.reduce_sum(repmask * tf.sqrt(dist), axis=1), nrepneigh)
            avrepdist = tf.reduce_sum(avrepdist) / tf.reduce_sum(
                probe_is_notnoise)

            if hasattr(lossval, "numpy"):
                print(
                    name,
                    'loss',
                    lossval.numpy(),
                    'mean att neigh',
                    tf.reduce_mean(nattneigh).numpy(),
                    'mean rep neigh',
                    tf.reduce_mean(nrepneigh).numpy(),
                    'att',
                    tf.reduce_mean(probe_is_notnoise * attloss).numpy(),
                    'rep',
                    tf.reduce_mean(probe_is_notnoise * reploss).numpy(),
                    'dist (same)',
                    avattdist.numpy(),
                    'dist (other)',
                    avrepdist.numpy(),
                )
            else:
                tf.print(name, 'loss', lossval, 'mean att neigh',
                         tf.reduce_mean(nattneigh), 'mean rep neigh',
                         tf.reduce_mean(nrepneigh))

        return lossval
def oc_per_batch_element(
        beta,
        x,
        q_min,
        object_weights, # V x 1 !!
        truth_idx,
        is_spectator,
        payload_loss,
        S_B=1.,
        payload_weight_function = None,  #receives betas as K x V x 1 as input, and a threshold val
        payload_weight_threshold = 0.8,
        use_mean_x = 0.,
        cont_beta_loss=False,
        prob_repulsion=False,
        phase_transition=False,
        phase_transition_double_weight=False,
        alt_potential_norm=False,
        cut_payload_beta_gradient=False,
        kalpha_damping_strength=0.
        ):
    '''
    all inputs
    V x X , where X can be 1
    '''
    
    if not alt_potential_norm:
        raise ValueError("not alt_potential_norm not implemented")
    if not prob_repulsion:
        raise ValueError("not prob_repulsion not implemented")
    if not phase_transition:
        raise ValueError("not phase_transition not implemented")
    if phase_transition_double_weight:
        raise ValueError("phase_transition_double_weight not implemented")
    if cont_beta_loss:
        raise ValueError("cont_beta_loss not implemented")
    if payload_weight_function is not None:
        raise ValueError("payload_weight_function not implemented")
        
        
    
    #set all spectators invalid here, everything scales with beta, so:
    beta_in = beta
    beta = tf.clip_by_value(beta, 0.,1.-1e-4)
    beta *= (1. - is_spectator)
    qraw = tf.math.atanh(beta)**2 
    q = qraw + q_min * (1. - is_spectator) # V x 1
    #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient
    
    N = tf.cast(beta.shape[0], dtype='float32')
    is_noise = tf.where(truth_idx<0, tf.zeros_like(truth_idx,dtype='float32'), 1.)#V x 1
    
    Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True)
    
    N_per_obj = tf.cast(N_per_obj, dtype='float32') # K x 1
    
    K = tf.cast(Msel.shape[0], dtype='float32') 
    
    padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in)+1., 0) #K x V-obj x 1
    x_m = SelectWithDefault(Msel, x, 0.) #K x V-obj x C
    beta_m = SelectWithDefault(Msel, beta_in, 0.) #K x V-obj x 1
    q_m = SelectWithDefault(Msel, q, 0.)#K x V-obj x 1
    object_weights_m = SelectWithDefault(Msel, object_weights, 0.)
    
    kalpha_m = tf.argmax(beta_m, axis=1) # K x 1
    
    x_kalpha_m = tf.gather_nd(x_m,kalpha_m, batch_dims=1) # K x C
    if use_mean_x>0:
        x_kalpha_m_m = tf.reduce_sum(q_m * x_m * padmask_m,axis=1) # K x C
        x_kalpha_m_m = tf.math.divide_no_nan(x_kalpha_m_m, tf.reduce_sum(q_m * padmask_m, axis=1)+1e-9)
        x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x)*x_kalpha_m
    
    if kalpha_damping_strength > 0:
        x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + (1. - kalpha_damping_strength)*x_kalpha_m
    
    q_kalpha_m = tf.gather_nd(q_m,kalpha_m, batch_dims=1) # K x 1
    beta_kalpha_m = tf.gather_nd(beta_m,kalpha_m, batch_dims=1) # K x 1
    
    object_weights_kalpha_m = tf.gather_nd(object_weights_m,kalpha_m, batch_dims=1) # K x 1
    
    distancesq_m = tf.reduce_sum( (tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2, axis=-1, keepdims=True) #K x V-obj x 1
    V_att = q_m * tf.expand_dims(q_kalpha_m,axis=1) * distancesq_m #K x V-obj x 1
    V_att = V_att * tf.expand_dims(object_weights_kalpha_m,axis=1) #K x V-obj x 1
    
    V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att,axis=1), N_per_obj+1e-9) # K x 1
    V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att,axis=0), K+1e-9) # 1
    
    
    #now the bit that needs Mnot
    V_rep = tf.expand_dims(x_kalpha_m, axis=1) #K x 1 x C
    V_rep = V_rep - tf.expand_dims(x, axis=0) #K x V x C
    V_rep = tf.reduce_sum(V_rep**2, axis=-1, keepdims=True)  #K x V x 1
    
    V_rep = -2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5)
    V_rep *= M_not * tf.expand_dims(q, axis=0) #K x V x 1
    V_rep = tf.reduce_sum(V_rep, axis=1) #K x 1
    
    V_rep *= object_weights_kalpha_m * q_kalpha_m #K x 1
    
    V_rep = tf.math.divide_no_nan(V_rep, 
                                  tf.expand_dims(tf.expand_dims(N,axis=0),axis=0) - N_per_obj+1e-9) # K x 1
    V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep,axis=0), K+1e-9) # 1
    
    
    ## beta terms
    B_pen = - tf.reduce_sum(padmask_m * 1./(20.*distancesq_m + 1.),axis=1) # K x 1
    B_pen += 1. #remove self-interaction term (just for offset)
    B_pen *= object_weights_kalpha_m * beta_kalpha_m
    B_pen = tf.math.divide_no_nan(B_pen, N_per_obj+1e-9) # K x 1
    #now 'standard' 1-beta
    B_pen -= 0.2*object_weights_kalpha_m * tf.math.sqrt(beta_kalpha_m+1e-6) 
    #another "-> 1, but slower" per object
    B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen,axis=0), K+1e-9) # 1
    
    
    too_much_B_pen = tf.constant([0.],dtype='float32')
    
    Noise_pen = S_B*tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in), tf.reduce_sum(is_noise))
    
    #explicit payload weight function here, the old one was odd
    
    p_w = tf.math.atanh(padmask_m * tf.clip_by_value(beta_m, 1e-4, 1.-1e-4))**2 #already zero-padded  , K x V_perobj x 1
    p_w = tf.math.divide_no_nan(p_w, tf.reduce_max(p_w, axis=1, keepdims=True)+1e-9) 
    #normalise to maximum; this + 1e-9 might be an issue POSSIBLE FIXME
    
    if cut_payload_beta_gradient:
        p_w = tf.stop_gradient(p_w)
        
    payload_loss_m = p_w * SelectWithDefault(Msel, payload_loss, 0.) #K x V_perobj x P
    payload_loss_m = tf.reduce_sum(payload_loss_m, axis=1)
    
    pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P
    pll = tf.math.divide_no_nan(tf.reduce_sum(pll,axis=0), K+1e-9) # P
    
    return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
Beispiel #8
0
def SlicingKnn(K : int, coords, row_splits, features_to_bin_on=None, 
               n_bins=None, bin_width=None, return_n_bins: bool=False,
               min_bins=[3,3]):
    '''
    Perform kNN search with slicing method

    @type K: int
    @param K: number of neighbours to search for

    @type coords: tf.Tensor
    @param coords: coordinate tensor

    @type row_splits: tf.Tensor
    @param row_splits: row splits tensor

    @type features_to_bin_on: Tuple[int, int]
    @param features_to_bin_on: indices of features to bin on

    @type n_bins: Tuple[int, int]
    @param n_bins: number of bins to split phase space for kNN search

    @type bin_width: Tuple[float, float] or Tuple[tf.Variable, tf.Variable]
    @param bin_width: width of phase-space bins
    
    @type return_n_bins: bool
    @param return_n_bins: also returns the total number of bins used
    
    @type min_bins: list
    @param min_bins: minimum binning (in 2D)
    
    '''

    #  start_time_int = time.time()

    # type and values check for input parameters
    check_tuple(features_to_bin_on,"features_to_bin_on",int)
    n_features = coords.shape[1]
    if (features_to_bin_on[0]>=n_features) or (features_to_bin_on[1]>=n_features) or (features_to_bin_on[0]==features_to_bin_on[1]):
        raise ValueError("Value error for <features_to_bin_on>!")
    if ((n_bins is None) and (bin_width is None)) or ((n_bins is not None) and (bin_width is not None)):
        raise ValueError("Specify either <n_bins> OR <bin_width> argument but not both!")
    if n_bins is None:
        check_tuple(bin_width,"bin_width",(float,tf.Variable),checkValue=not isinstance(bin_width,tf.Variable))
    else:
        check_tuple(n_bins,"n_bins",int)

    # select only 2 dimensions that will be used for binning
    r_coords = tf.gather(coords,features_to_bin_on,axis=1)

    # find min/max of selected coordinates
    r_coords = tf.transpose(r_coords) # since tf.map_fn apply fn to each element unstacked on axis 0

    r_max = tf.map_fn(tf.math.reduce_max, r_coords, fn_output_signature=tf.float32)
    r_min = tf.map_fn(tf.math.reduce_min, r_coords, fn_output_signature=tf.float32)

    # add safety margin to the phase-space for binning
    r_diff = tf.add(r_max,-1*r_min)
    r_max = tf.add(r_max,0.00001*r_diff)
    r_min = tf.add(r_min,-0.00001*r_diff)
    r_diff = tf.add(r_max,-1*r_min)
    

    # calculate n_bins if bin_width is given
    if bin_width is not None:
        if not isinstance(bin_width[0], tf.Variable): #already checked both are the same
            bin_width = tf.constant(bin_width)
        else:
            bin_width = [tf.expand_dims(a,axis=0) for a in bin_width]
            bin_width = tf.concat(bin_width,axis=0)
        _n_bins = tf.math.maximum(tf.constant(min_bins, dtype=tf.int32),
                tf.math.minimum(
                    tf.cast(tf.math.ceil(tf.multiply(r_diff,1.0/bin_width)),tf.int32), 
                    tf.constant([50,50], dtype=tf.int32))) # limit the number of bins to min 3x3 and max 50x50
    else:
        _n_bins = tf.constant(n_bins, dtype=tf.int32) # cast tuple to Tensor to match required argument type
    
    idx, dist = _nknn_op.SlicingKnn(n_neighbours=K, coords=coords, row_splits=row_splits, n_bins=_n_bins, features_to_bin_on=features_to_bin_on, coord_min=r_min, coord_max=r_max)
    
    with tf.control_dependencies([
        tf.assert_equal(tf.range(tf.shape(idx)[0]), idx[:,0]),
        tf.assert_less(idx, row_splits[-1]),
        tf.assert_less(-2, idx)
        ]):
        
        if gl.knn_ops_use_tf_gradients:
            ncoords = SelectWithDefault(idx, coords, 0.)
            dist = (ncoords[:,0:1,:]-ncoords)**2
            dist = tf.reduce_sum(dist,axis=2)
            dist = tf.where(idx<0, 0., dist)
        
        if return_n_bins:
            return idx, dist, tf.reduce_prod(_n_bins)
        return idx, dist
Beispiel #9
0
def oc_per_batch_element(
        beta,
        x,
        q_min,
        object_weights,  # V x 1 !!
        truth_idx,
        is_spectator,
        payload_loss,
        S_B=1.,
        distance_scale=None,
        payload_weight_function=None,  #receives betas as K x V x 1 as input, and a threshold val
        payload_weight_threshold=0.8,
        use_mean_x=0.,
        cont_beta_loss=False,
        prob_repulsion=False,
        phase_transition=False,
        phase_transition_double_weight=False,
        alt_potential_norm=False,
        payload_beta_gradient_damping_strength=0.,
        kalpha_damping_strength=0.,
        beta_gradient_damping=0.,
        soft_q_scaling=True,
        weight_by_q=False,
        repulsion_q_min=-1.,
        super_repulsion=False):
    '''
    all inputs
    V x X , where X can be 1
    '''

    if not alt_potential_norm:
        raise ValueError("not alt_potential_norm not implemented")
    if not prob_repulsion:
        raise ValueError("not prob_repulsion not implemented")
    if not phase_transition:
        raise ValueError("not phase_transition not implemented")
    if phase_transition_double_weight:
        raise ValueError("phase_transition_double_weight not implemented")
    if cont_beta_loss:
        raise ValueError("cont_beta_loss not implemented")
    if payload_weight_function is not None:
        raise ValueError("payload_weight_function not implemented")

    #set all spectators invalid here, everything scales with beta, so:
    if beta_gradient_damping > 0.:
        beta = beta_gradient_damping * tf.stop_gradient(beta) + (
            1. - beta_gradient_damping) * beta
    beta_in = beta
    beta = tf.clip_by_value(beta, 0., 1. - 1e-4)
    beta *= (1. - is_spectator)
    qraw = tf.math.atanh(beta)**2

    if soft_q_scaling:
        qraw = tf.math.atanh(beta / 1.002)**2  #beta_in**4 *20.
        beta = beta_in * (1. - is_spectator)  # no need for clipping

    q = qraw + q_min * (1. - is_spectator)  # V x 1
    #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient

    N = tf.cast(beta.shape[0], dtype='float32')
    is_noise = tf.where(truth_idx < 0,
                        tf.zeros_like(truth_idx, dtype='float32') + 1.,
                        0.)  #V x 1

    Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True)

    N_per_obj = tf.cast(N_per_obj, dtype='float32')  # K x 1

    K = tf.cast(Msel.shape[0], dtype='float32')

    padmask_m = SelectWithDefault(Msel,
                                  tf.zeros_like(beta_in) + 1.,
                                  0)  #K x V-obj x 1
    x_m = SelectWithDefault(Msel, x, 0.)  #K x V-obj x C
    beta_m = SelectWithDefault(Msel, beta_in, 0.)  #K x V-obj x 1
    q_m = SelectWithDefault(Msel, q, 0.)  #K x V-obj x 1
    object_weights_m = SelectWithDefault(Msel, object_weights, 0.)
    distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.)

    kalpha_m = tf.argmax(beta_m, axis=1)  # K x 1

    x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1)  # K x C
    if use_mean_x > 0:
        x_kalpha_m_m = tf.reduce_sum(q_m * x_m * padmask_m, axis=1)  # K x C
        x_kalpha_m_m = tf.math.divide_no_nan(
            x_kalpha_m_m,
            tf.reduce_sum(q_m * padmask_m, axis=1) + 1e-9)
        x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m

    if kalpha_damping_strength > 0:
        x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + (
            1. - kalpha_damping_strength) * x_kalpha_m

    q_kalpha_m = tf.gather_nd(q_m, kalpha_m, batch_dims=1)  # K x 1
    beta_kalpha_m = tf.gather_nd(beta_m, kalpha_m, batch_dims=1)  # K x 1

    object_weights_kalpha_m = tf.gather_nd(object_weights_m,
                                           kalpha_m,
                                           batch_dims=1)  # K x 1
    distance_scale_kalpha_m = tf.gather_nd(distance_scale_m,
                                           kalpha_m,
                                           batch_dims=1)  # K x 1
    distance_scale_kalpha_m_exp = tf.expand_dims(distance_scale_kalpha_m,
                                                 axis=2)  # K x 1 x 1

    distancesq_m = tf.reduce_sum((tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2,
                                 axis=-1,
                                 keepdims=True)  #K x V-obj x 1
    distancesq_m *= distance_scale_kalpha_m_exp**2

    huberdistsq = huber(tf.sqrt(distancesq_m + 1e-5), d=4)  #acts at 4
    V_att = q_m * tf.expand_dims(q_kalpha_m,
                                 axis=1) * huberdistsq  #K x V-obj x 1
    V_att = V_att * tf.expand_dims(object_weights_kalpha_m,
                                   axis=1)  #K x V-obj x 1

    if weight_by_q:
        V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1),
                                      tf.reduce_sum(q_m, axis=1))  # K x 1
    else:
        V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1),
                                      N_per_obj + 1e-9)  # K x 1
    V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att, axis=0), K + 1e-9)  # 1

    #what if Vatt and Vrep are weighted by q, not scaled by it?
    q_rep = q
    if repulsion_q_min >= 0:
        q_rep = qraw + repulsion_q_min
        q_kalpha_m += repulsion_q_min - q_min

    #now the bit that needs Mnot
    Mnot_distances = tf.expand_dims(x_kalpha_m, axis=1)  #K x 1 x C
    Mnot_distances = Mnot_distances - tf.expand_dims(x, axis=0)  #K x V x C

    if super_repulsion:
        sq_distance = tf.reduce_sum(Mnot_distances**2, axis=-1,
                                    keepdims=True)  #K x V x 1
        l_distance = tf.reduce_sum(tf.abs(Mnot_distances),
                                   axis=-1,
                                   keepdims=True)  #K x V x 1
        V_rep = 0.5 * (sq_distance + l_distance)

    else:
        V_rep = tf.reduce_sum(Mnot_distances**2, axis=-1,
                              keepdims=True)  #K x V x 1

    V_rep *= distance_scale_kalpha_m_exp**2  #K x V x 1 , same scaling as attractive potential

    V_rep = 1. / (V_rep + 0.1
                  )  #-2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5)

    V_rep *= M_not * tf.expand_dims(q_rep, axis=0)  #K x V x 1
    V_rep = tf.reduce_sum(V_rep, axis=1)  #K x 1

    V_rep *= object_weights_kalpha_m * q_kalpha_m  #K x 1

    if weight_by_q:
        sumq = tf.reduce_sum(M_not * tf.expand_dims(q_rep, axis=0), axis=1)
        V_rep = tf.math.divide_no_nan(V_rep, sumq)  # K x 1
    else:
        V_rep = tf.math.divide_no_nan(
            V_rep,
            tf.expand_dims(tf.expand_dims(N, axis=0), axis=0) - N_per_obj +
            1e-9)  # K x 1
    V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep, axis=0), K + 1e-9)  # 1

    ## beta terms
    B_pen = -tf.reduce_sum(padmask_m * 1. /
                           (20. * distancesq_m + 1.), axis=1)  # K x 1
    B_pen += 1.  #remove self-interaction term (just for offset)
    B_pen *= object_weights_kalpha_m * beta_kalpha_m
    B_pen = tf.math.divide_no_nan(B_pen, N_per_obj + 1e-9)  # K x 1
    #now 'standard' 1-beta
    B_pen -= 0.2 * object_weights_kalpha_m * (
        tf.math.log(beta_kalpha_m + 1e-9))  #tf.math.sqrt(beta_kalpha_m+1e-6)
    #another "-> 1, but slower" per object
    B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen, axis=0), K + 1e-9)  # 1

    too_much_B_pen = tf.constant([0.], dtype='float32')

    Noise_pen = S_B * tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in),
                                            tf.reduce_sum(is_noise))

    #explicit payload weight function here, the old one was odd

    #too aggressive scaling is bad for high learning rates. Move to simple x^4
    p_w = padmask_m * tf.clip_by_value(
        beta_m**2, 1e-3, 10.)  #already zero-padded  , K x V_perobj x 1
    #normalise to maximum; this + 1e-9 might be an issue POSSIBLE FIXME

    if payload_beta_gradient_damping_strength > 0:
        p_w = payload_beta_gradient_damping_strength * tf.stop_gradient(p_w) + \
        (1.- payload_beta_gradient_damping_strength)* p_w

    payload_loss_m = p_w * SelectWithDefault(
        Msel, (1. - is_noise) * payload_loss, 0.)  #K x V_perobj x P
    payload_loss_m = object_weights_kalpha_m * tf.reduce_sum(payload_loss_m,
                                                             axis=1)
    payload_loss_m = tf.math.divide_no_nan(payload_loss_m,
                                           tf.reduce_sum(p_w, axis=1))

    #pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P #really?
    pll = tf.math.divide_no_nan(tf.reduce_sum(payload_loss_m, axis=0),
                                K + 1e-3)  # P

    #explicit K**2 repulsion
    #if k_sq_repulsion_strength > 0.: #x_kalpha_m: K  x C
    #    k_sq_rep = tf.expand_dims(x_kalpha_m, axis=0) - tf.expand_dims(x_kalpha_m, axis=1) #x_kalpha_m: K  x K x C
    #    k_sq_rep = tf.reduce_sum(k_sq_rep**2, axis=-1) #distances**2 K x K
    #    k_sq_rep = -2.*tf.math.log(1.-tf.math.exp(-k_sq_rep/2.)+1e-5) #K x K
    #    #add qTq scaling also here?
    #    k_sq_rep *= q_kalpha_m # adding the latter term would just add a factor of 2. to the corresponding kalpha Mnot term * tf.expand_dims(q_kalpha_m[:,0], axis=0) #K x K
    #    k_sq_rep *= object_weights_kalpha_m * tf.expand_dims(object_weights_kalpha_m[:,0], axis=0) #K x K
    #    k_sq_rep = tf.math.divide_no_nan(tf.reduce_sum(k_sq_rep,axis=0), K+1e-9)
    #    k_sq_rep = tf.math.divide_no_nan(tf.reduce_sum(k_sq_rep,axis=0), K+1e-9)
    #
    #    V_rep += k_sq_repulsion_strength * k_sq_rep
    #    #object_weights_kalpha_m

    return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
Beispiel #10
0
def SelectKnn(K: int,
              coords,
              row_splits,
              masking_values=None,
              threshold=0.5,
              tf_compatible=True,
              max_radius=-1.,
              mask_mode='none',
              mask_logic='xor'):
    '''
    returns indices and distances**2 , gradient for distances is implemented!
    
    new: mask (switch):
    masked:
      0) none = no masking
      1) acc  = get to have neighbours
      2) scat = get to be neighbours
      
      10) xor: exclusive (one xor the other) -> exchange between collections, direction given by 1 and 2
      20) and: selected  (one and the other) -> pooling
      
    no gradient for the mask!
    
    '''
    assert mask_mode == 'none' or mask_mode == 'acc' or mask_mode == 'scat'
    assert mask_mode == 'none' or mask_logic == 'xor' or mask_logic == 'and'

    if masking_values is None:
        assert mask_mode == 'none'
        masking_values = tf.zeros_like(coords[:, 0:1])

    mask = tf.zeros_like(masking_values, dtype='int32')
    mask = tf.where(masking_values > threshold, mask + 1, mask)

    #print('mask',mask)

    op_mask_mode = 0

    if mask_logic == 'xor':
        op_mask_mode = 10
    elif mask_logic == 'and':
        op_mask_mode = 20

    if mask_mode == 'acc':
        op_mask_mode += 1
    elif mask_mode == 'scat':
        op_mask_mode += 2
    '''
      0) none = no masking
      1) acc  = get to have neighbours
      2) scat = get to be neighbours
      
      
      10) xor: exclusive (one xor the other) -> exchange between collections, direction given by 1 and 2
      20) and: selected  (one and the other) -> pooling (scat and acc don't matter)
    '''

    idx, distsq = _sknn_op.SelectKnn(n_neighbours=K,
                                     tf_compatible=tf_compatible,
                                     max_radius=max_radius,
                                     coords=coords,
                                     row_splits=row_splits,
                                     mask=mask,
                                     mask_mode=op_mask_mode)

    #safe guards
    with tf.control_dependencies([
            tf.assert_equal(tf.range(tf.shape(idx)[0]), idx[:, 0]),
            tf.assert_less(idx, row_splits[-1]),
            tf.assert_less(-2, idx)
    ]):

        if not gl.knn_ops_use_tf_gradients:
            return idx, distsq

        ncoords = SelectWithDefault(idx, coords, 0.)
        distsq = (ncoords[:, 0:1, :] - ncoords)**2
        distsq = tf.reduce_sum(distsq, axis=2)
        distsq = tf.where(idx < 0, 0., distsq)
        return idx, distsq
Beispiel #11
0
truth_idxs = tf.random.uniform((nvert,1), 0, 6, dtype='int32', seed=0) - 1 #for noise
features =  tf.random.uniform((nvert,1),seed=0)


selidx,mnot,cperunique = CreateMidx(truth_idxs, calc_m_not=True)

#just a small consistency check



#print(truth_idxs)
#print(selidx)
#print(mnot)
#print(cperunique)

beta_m  = SelectWithDefault(selidx, features, -1.)

kalpha_m = tf.argmax(beta_m,axis=1) 
#print(beta_m, kalpha_m)

#print(tf.gather_nd(beta_m,kalpha_m, batch_dims=1))

#now test the whole loss

from object_condensation import oc_per_batch_element, oc_per_batch_element_old

'''
oc_per_batch_element(
        beta,
        x,
        q_min,
Beispiel #12
0
def oc_per_batch_element(
        beta,
        x,
        q_min,
        object_weights,  # V x 1 !!
        truth_idx,
        is_spectator,
        payload_loss,
        S_B=1.,
        noise_q_min=None,
        distance_scale=None,
        payload_weight_function=None,  #receives betas as K x V x 1 as input, and a threshold val
        payload_weight_threshold=0.8,
        use_mean_x=0.,
        cont_beta_loss=False,
        prob_repulsion=False,
        phase_transition=False,
        phase_transition_double_weight=False,
        payload_beta_gradient_damping_strength=0.,
        kalpha_damping_strength=0.,
        beta_gradient_damping=0.,
        soft_q_scaling=True,
        weight_by_q=False,
        repulsion_q_min=-1.,
        super_repulsion=False,
        super_attraction=False,
        div_repulsion=False,
        soft_att=True,
        dynamic_payload_scaling_onset=-0.03):
    '''
    all inputs
    V x X , where X can be 1
    '''
    tf.assert_equal(True, is_spectator >= 0.)
    tf.assert_equal(True, beta >= 0.)

    if prob_repulsion:
        raise ValueError("prob_repulsion not implemented")
    if phase_transition_double_weight:
        raise ValueError("phase_transition_double_weight not implemented")
    if payload_weight_function is not None:
        raise ValueError("payload_weight_function not implemented")

    #set all spectators invalid here, everything scales with beta, so:
    if beta_gradient_damping > 0.:
        beta = beta_gradient_damping * tf.stop_gradient(beta) + (
            1. - beta_gradient_damping) * beta
    beta_in = beta
    beta = tf.clip_by_value(beta, 0., 1. - 1e-4)

    q_min *= (1. - is_spectator)

    qraw = tf.math.atanh(beta)**2
    if soft_q_scaling:
        qraw = tf.math.atanh(beta_in / 1.002)**2  #beta_in**4 *20.

    is_noise = tf.where(truth_idx < 0,
                        tf.zeros_like(truth_idx, dtype='float32') + 1.,
                        0.)  #V x 1
    if noise_q_min is not None:
        q_min = (1. - is_noise) * q_min + is_noise * noise_q_min

    q_min = tf.where(
        q_min < 0, 0.,
        q_min)  #just safety in case there are some numerical effects

    q = qraw + q_min  # V x 1
    #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient

    N = tf.cast(beta.shape[0], dtype='float32')

    Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True)
    #use eager here
    if Msel is None:
        #V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
        print(
            '>>> WARNING: Event has no objects, only noise! Will return zero loss. <<<'
        )
        zero_tensor = tf.reduce_mean(q, axis=0) * 0.
        zero_payload = tf.reduce_mean(payload_loss, axis=0) * 0.
        return zero_tensor, zero_tensor, zero_tensor, zero_tensor, zero_payload, zero_tensor

    N_per_obj = tf.cast(N_per_obj, dtype='float32')  # K x 1

    K = tf.cast(Msel.shape[0], dtype='float32')

    ########################################################
    #sanity check, use none of the following for the loss calculation
    truth_m = SelectWithDefault(Msel, truth_idx, -2)  #K x V-obj x 1
    truth_same = truth_m[:, 0:1] == truth_m
    truth_same = tf.where(truth_m == -2, True, truth_same)
    tf.assert_equal(
        tf.reduce_all(truth_same),
        True,
        message="truth indices do not match object selection, serious bug")
    #end sanity check
    ########################################################

    padmask_m = SelectWithDefault(Msel,
                                  tf.zeros_like(beta_in) + 1.,
                                  0.)  #K x V-obj x 1
    x_m = SelectWithDefault(Msel, x, 0.)  #K x V-obj x C
    beta_m = SelectWithDefault(Msel, beta, 0.)  #K x V-obj x 1
    is_spectator_m = SelectWithDefault(Msel, is_spectator, 0.)  #K x V-obj x 1
    q_m = SelectWithDefault(Msel, q, 0.)  #K x V-obj x 1
    object_weights_m = SelectWithDefault(Msel, object_weights, 0.)

    distance_scale += 1e-3
    distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.)

    tf.assert_greater(distance_scale_m,
                      0.,
                      message="predicted distances must be greater zero")

    kalpha_m = tf.argmax((1. - is_spectator_m) * beta_m, axis=1)  # K x 1

    x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1)  # K x C
    if use_mean_x > 0:
        x_kalpha_m_m = tf.reduce_sum(beta_m * q_m * x_m * padmask_m,
                                     axis=1)  # K x C
        x_kalpha_m_m = tf.math.divide_no_nan(
            x_kalpha_m_m,
            tf.reduce_sum(beta_m * q_m * padmask_m, axis=1) + 1e-9)
        x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m

    if kalpha_damping_strength > 0:
        x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + (
            1. - kalpha_damping_strength) * x_kalpha_m

    q_kalpha_m = tf.gather_nd(q_m, kalpha_m, batch_dims=1)  # K x 1
    beta_kalpha_m = tf.gather_nd(beta_m, kalpha_m, batch_dims=1)  # K x 1

    object_weights_kalpha_m = tf.gather_nd(object_weights_m,
                                           kalpha_m,
                                           batch_dims=1)  # K x 1

    #make the distance scale a beta weighted mean so that there is more than 1 impact per object
    distance_scale_kalpha_m = tf.math.divide_no_nan(
        tf.reduce_sum(distance_scale_m * beta_m * padmask_m, axis=1),
        tf.reduce_sum(beta_m * padmask_m, axis=1) + 1e-3) + 1e-3  #K x 1
    #distance_scale_kalpha_m = tf.gather_nd(distance_scale_m,kalpha_m, batch_dims=1) # K x 1

    distance_scale_kalpha_m_exp = tf.expand_dims(distance_scale_kalpha_m,
                                                 axis=2)  # K x 1 x 1

    distancesq_m = tf.reduce_sum((tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2,
                                 axis=-1,
                                 keepdims=True)  #K x V-obj x 1
    distancesq_m = tf.math.divide_no_nan(
        distancesq_m, 2. * distance_scale_kalpha_m_exp**2 + 1e-6)

    absdist = tf.sqrt(distancesq_m + 1e-6)
    huberdistsq = huber(absdist, d=4)  #acts at 4
    if super_attraction:
        huberdistsq += 1. - tf.math.exp(-100. * absdist)

    V_att = q_m * tf.expand_dims(q_kalpha_m,
                                 axis=1) * huberdistsq  #K x V-obj x 1

    if soft_att:
        V_att = q_m * tf.math.log(tf.math.exp(1.) * distancesq_m + 1.)

    V_att = V_att * tf.expand_dims(object_weights_kalpha_m,
                                   axis=1)  #K x V-obj x 1

    if weight_by_q:
        V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1),
                                      tf.reduce_sum(q_m, axis=1))  # K x 1
    else:
        V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1),
                                      N_per_obj + 1e-9)  # K x 1

    # opt. used later in payload loss
    V_att_K = V_att
    V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att, axis=0), K + 1e-9)  # 1

    #what if Vatt and Vrep are weighted by q, not scaled by it?
    q_rep = q
    if repulsion_q_min >= 0:
        raise ValueError("repulsion_q_min >= 0: spectators TBI")
        q_rep = (qraw + repulsion_q_min) * (1. - is_spectator)
        q_kalpha_m += repulsion_q_min - q_min

    #now the bit that needs Mnot
    Mnot_distances = tf.expand_dims(x_kalpha_m, axis=1)  #K x 1 x C
    Mnot_distances = Mnot_distances - tf.expand_dims(x, axis=0)  #K x V x C

    rep_distances = tf.reduce_sum(Mnot_distances**2, axis=-1,
                                  keepdims=True)  #K x V x 1

    rep_distances = tf.math.divide_no_nan(
        rep_distances, 2. * distance_scale_kalpha_m_exp**2 + 1e-6)

    V_rep = tf.math.exp(
        -rep_distances
    )  #1. / (V_rep + 0.1) #-2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5)

    if super_repulsion:
        V_rep += 10. * tf.math.exp(-100. * tf.sqrt(rep_distances + 1e-6))

    if div_repulsion:
        V_rep = 1. / (rep_distances + 0.1)

    #spec weights are in q
    V_rep *= M_not * tf.expand_dims(q_rep, axis=0)  #K x V x 1
    V_rep = tf.reduce_sum(V_rep, axis=1)  #K x 1

    V_rep *= object_weights_kalpha_m * q_kalpha_m  #K x 1

    if weight_by_q:
        sumq = tf.reduce_sum(M_not * tf.expand_dims(q_rep, axis=0), axis=1)
        V_rep = tf.math.divide_no_nan(V_rep, sumq)  # K x 1
    else:
        V_rep = tf.math.divide_no_nan(
            V_rep,
            tf.expand_dims(tf.expand_dims(N, axis=0), axis=0) - N_per_obj +
            1e-9)  # K x 1
    # opt used later in payload loss
    V_rep_K = V_rep
    V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep, axis=0), K + 1e-9)  # 1

    B_pen = None

    def bpenhelp(b_m, exponent: int):
        b_mes = tf.reduce_sum(b_m**exponent, axis=1)
        if not exponent == 1:
            b_mes = (b_mes + 1e-16)**(1. / float(exponent))
        return tf.math.log((1. - b_mes)**2 + 1. + 1e-8)

    if phase_transition:
        ## beta terms
        B_pen = -tf.reduce_sum(padmask_m * 1. / (20. * distancesq_m + 1.),
                               axis=1)  # K x 1
        B_pen += 1.  #remove self-interaction term (just for offset)
        B_pen *= object_weights_kalpha_m * beta_kalpha_m
        B_pen = tf.math.divide_no_nan(B_pen, N_per_obj + 1e-9)  # K x 1
        #now 'standard' 1-beta
        B_pen -= 0.2 * object_weights_kalpha_m * (
            tf.math.log(beta_kalpha_m + 1e-9)
        )  #tf.math.sqrt(beta_kalpha_m+1e-6)
        #another "-> 1, but slower" per object
        B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen, axis=0),
                                      K + 1e-9)  # 1

    else:
        B_pen_po = object_weights_kalpha_m * (1. - beta_kalpha_m)
        B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen_po, axis=0),
                                      K + 1e-9)  #1
        #get out of random gradients in the beginning
        #introduces gradients on all betas of hits rather than just the max one
        B_up = tf.math.divide_no_nan(
            tf.reduce_sum((1. - is_noise) * (1. - beta_in)),
            N - tf.reduce_sum(is_noise))
        B_pen += 0.01 * B_pen * B_up  #if it's high try to elevate all betas

    if cont_beta_loss:
        B_pen = bpenhelp(beta_m, 2) + bpenhelp(beta_m, 4)
        B_pen = tf.math.divide_no_nan(
            tf.reduce_sum(object_weights_kalpha_m * B_pen, axis=0), K + 1e-9)

    too_much_B_pen = object_weights_kalpha_m * bpenhelp(
        beta_m, 1)  #K x 1, don't make it steep
    too_much_B_pen = tf.math.divide_no_nan(tf.reduce_sum(too_much_B_pen),
                                           K + 1e-9)

    Noise_pen = S_B * tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in),
                                            tf.reduce_sum(is_noise) + 1e-3)

    #explicit payload weight function here, the old one was odd

    #too aggressive scaling is bad for high learning rates.
    p_w = padmask_m * tf.math.atanh(beta_m / 1.002)**2  #this is well behaved

    if payload_beta_gradient_damping_strength > 0:
        p_w = payload_beta_gradient_damping_strength * tf.stop_gradient(p_w) + \
        (1.- payload_beta_gradient_damping_strength)* p_w

    payload_loss_m = p_w * SelectWithDefault(
        Msel, (1. - is_noise) * payload_loss, 0.)  #K x V_perobj x P
    payload_loss_m = object_weights_kalpha_m * tf.reduce_sum(payload_loss_m,
                                                             axis=1)  # K x P

    #here normalisation per object
    payload_loss_m = tf.math.divide_no_nan(payload_loss_m,
                                           tf.reduce_sum(p_w, axis=1))

    #print('dynamic_payload_scaling_onset',dynamic_payload_scaling_onset)
    if dynamic_payload_scaling_onset > 0:
        #stop gradient
        V_scaler = tf.stop_gradient(V_rep_K + V_att_K)  # K x 1
        #print('N_per_obj[V_scaler=0]',N_per_obj[V_scaler==0])
        #max of V_scaler is around 1 given the potentials
        scaling = tf.exp(-tf.math.log(2.) * V_scaler /
                         (dynamic_payload_scaling_onset / 5.))
        #print('affected fraction',tf.math.count_nonzero(scaling>0.5,dtype='float32')/K,'max',tf.reduce_max(V_scaler,axis=0,keepdims=True))
        payload_loss_m *= scaling  #basically the onset of the rise
    #pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P #really?
    pll = tf.math.divide_no_nan(tf.reduce_sum(payload_loss_m, axis=0),
                                K + 1e-3)  # P

    return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen