# Stuff for critic (will be done several times per training iteration) Loss_w_grad_ = -( loss_w_ - gamma * loss_grad_ ) # We take the negative of this since the critic wants to maxize it. log.warning( "WARNING: Last bias in critic is not updated." ) # It's current value is %f", critic_nn.get_parameter_values()[-1]) params_to_update_critic = critic_nn.get_upd_parameters( )[: -1] # We excluded the last bias (scalar). It is intialized to 0 so this way it is effectively not used. log.info("Parameters to update in critic inner training loop:") print params_to_update_critic grads_w_, vars_w_ = zip( *optim.compute_gradients(loss=Loss_w_grad_, var_list=params_to_update_critic, gate_gradients=optim.GATE_GRAPH)) def get_w_input(X1, C1): inputs_ = {X1_p: X1, C1_p: np.array([]), is_test_p: False} outputs_ = embd_A_ return sess.run(outputs_, inputs_) min_op_w = optim.apply_gradients(zip(grads_w_, vars_w_)) ### M1 needed ?? tr_outputs_w_ = [loss_w_, loss_grad_, Loss_w_grad_, min_op_w] def train_function_w(M1, h1, lr): tr_inputs_w_ = {embd_A_: M1, h1_p: h1, lr_p: lr, is_test_p: False} return sess.run(tr_outputs_w_, tr_inputs_w_) # Stuff for training "generator" and speaker classifier network
loss_c_ = loss_c_ / np.log( n_spk) # Normalize with the loss of random predictions loss_reco_ = tf.losses.mean_squared_error(labels=reco_tgt_, predictions=reco_pred_) # Stuff for training "generator", "speaker classifiers" and feature/bottleneck predictors params_to_update_ = model.get_upd_parameters( ) + tdnn_fe_pred.get_upd_parameters() + bnorm_fe_pred.get_upd_parameters() params_to_update_ += [P_GB_] + [mu_GB_] + bnorm_GB.get_upd_parameters() Loss_1_ = class_weight * loss_c_ + mse_weight * loss_reco_ Loss_ = Loss_1_ + loss_GB_c_ grads_, vars_ = zip( *optim.compute_gradients(loss=Loss_, var_list=params_to_update_, gate_gradients=optim.GATE_GRAPH)) min_op_ = optim.apply_gradients(zip(grads_, vars_)) tr_outputs_ = [loss_c_, loss_reco_, Loss_1_, min_op_] tr_outputs_ += [loss_GB_c_] log.info("Parameters to update:") print params_to_update_ def train_function(X1, C1, WGT_c, L_c, X2_r, P2, lr): if not annoying_train: tr_inputs_ = { X1_p: X1, C1_p: C1, WGT_c_p: WGT_c, L_c_p: L_c,
log.info("Resetting it_tr_que") # Actually this will not be sufficient to avoid assertion error beacuae batch number is it_tr_que.batch_number=0 # already in the prepared batches. However, below I reinitialize it_tr_que. This mess # should be solved. ############################################################################# ### --- Define loss, train functions etc. ------------------------------- ### ############################################################################# loss_ = tf.tensordot(WGT_m_p, tf.nn.sparse_softmax_cross_entropy_with_logits( labels = L_m_p, logits = C_m_ ), axes=[[0], [0]] ) loss_ = loss_ / np.log(n_spk) # Normalize with the loss of random predictions loss_ += model.get_l2_reg(l2_reg_b_pool, l2_reg_a_pool, l2_reg_multi) params_to_update_ = model.get_upd_parameters() print(params_to_update_) grads_, vars_ = list(zip(*optim.compute_gradients(loss = loss_, var_list = params_to_update_, gate_gradients=optim.GATE_GRAPH))) info_ = [ ] # For debugging something can be added here (e.g. reg_loss, grads) will be printe each epoch # Apply Kaldi style gradient repairing # TODO: This is hardwired in some other scripts but not added here yet. # So far (2018-07-19) this has not improved performace though. min_op = optim.apply_gradients(list(zip(grads_,vars_))) tr_outputs_ = [info_, loss_, min_op] def train_function(X1, C1, WGT_m, L_m, lr): if not annoying_train: tr_inputs_ = {X1_p: X1, C1_p: C1, WGT_m_p: WGT_m, L_m_p: L_m, lr_p: lr, is_test_p: False} else: