loss_ += model.get_l2_reg(l2_reg_b_pool, l2_reg_a_pool, l2_reg_multi) params_to_update_ = model.get_upd_parameters() print(params_to_update_) grads_, vars_ = list( zip(*optim.compute_gradients(loss=loss_, var_list=params_to_update_, gate_gradients=optim.GATE_GRAPH))) info_ = [ ] # For debugging something can be added here (e.g. reg_loss, grads) will be printe each epoch # Apply Kaldi style gradient repairing # TODO: This is hardwired in some other scripts but not added here yet. # So far (2018-07-19) this has not improved performace though. min_op = optim.apply_gradients(list(zip(grads_, vars_))) tr_outputs_ = [info_, loss_, min_op] def train_function(X1, C1, WGT_m, L_m, lr): if not annoying_train: tr_inputs_ = { X1_p: X1, C1_p: C1, WGT_m_p: WGT_m, L_m_p: L_m, lr_p: lr, is_test_p: False } else: tr_inputs_ = {
loss_reco_ = tf.losses.mean_squared_error(labels=reco_tgt_, predictions=reco_pred_) # Stuff for training "generator", "speaker classifiers" and feature/bottleneck predictors params_to_update_ = model.get_upd_parameters( ) + tdnn_fe_pred.get_upd_parameters() + bnorm_fe_pred.get_upd_parameters() params_to_update_ += [P_GB_] + [mu_GB_] + bnorm_GB.get_upd_parameters() Loss_1_ = class_weight * loss_c_ + mse_weight * loss_reco_ Loss_ = Loss_1_ + loss_GB_c_ grads_, vars_ = zip( *optim.compute_gradients(loss=Loss_, var_list=params_to_update_, gate_gradients=optim.GATE_GRAPH)) min_op_ = optim.apply_gradients(zip(grads_, vars_)) tr_outputs_ = [loss_c_, loss_reco_, Loss_1_, min_op_] tr_outputs_ += [loss_GB_c_] log.info("Parameters to update:") print params_to_update_ def train_function(X1, C1, WGT_c, L_c, X2_r, P2, lr): if not annoying_train: tr_inputs_ = { X1_p: X1, C1_p: C1, WGT_c_p: WGT_c, L_c_p: L_c, X2_r_p: X2_r, P2_p: P2,
params_to_update_critic = critic_nn.get_upd_parameters( )[: -1] # We excluded the last bias (scalar). It is intialized to 0 so this way it is effectively not used. log.info("Parameters to update in critic inner training loop:") print params_to_update_critic grads_w_, vars_w_ = zip( *optim.compute_gradients(loss=Loss_w_grad_, var_list=params_to_update_critic, gate_gradients=optim.GATE_GRAPH)) def get_w_input(X1, C1): inputs_ = {X1_p: X1, C1_p: np.array([]), is_test_p: False} outputs_ = embd_A_ return sess.run(outputs_, inputs_) min_op_w = optim.apply_gradients(zip(grads_w_, vars_w_)) ### M1 needed ?? tr_outputs_w_ = [loss_w_, loss_grad_, Loss_w_grad_, min_op_w] def train_function_w(M1, h1, lr): tr_inputs_w_ = {embd_A_: M1, h1_p: h1, lr_p: lr, is_test_p: False} return sess.run(tr_outputs_w_, tr_inputs_w_) # Stuff for training "generator" and speaker classifier network params_to_update_g_c_ = model.get_upd_parameters() Loss_w_c_ = loss_c_ + delta * loss_w_ grads_g_c_, vars_g_c_ = zip( *optim.compute_gradients(loss=Loss_w_c_, var_list=params_to_update_g_c_, gate_gradients=optim.GATE_GRAPH)) min_op_g_c_ = optim.apply_gradients(zip(grads_g_c_, vars_g_c_))