def tf_so8_sugra_potential(t_v70): """Returns dict with key tensors from the SUGRA potential's TF graph.""" tc_28_8_8 = tf.constant(su8.m_28_8_8) t_e7_generator_v70 = tf.einsum( 'v,vIJ->JI', tf.complex(t_v70, tf.constant([0.0] * 70, dtype=tf.float64)), tf.constant(e7.t_a_ij_kl[:70, :, :], dtype=tf.complex128)) t_complex_vielbein = tf.linalg.expm(t_e7_generator_v70) def expand_ijkl(t_ab): return 0.5 * tf.einsum('ijB,BIJ->ijIJ', tf.einsum('AB,Aij->ijB', t_ab, tc_28_8_8), tc_28_8_8) # t_u_ijIJ = expand_ijkl(t_complex_vielbein[:28, :28]) t_u_klKL = expand_ijkl(t_complex_vielbein[28:, 28:]) t_v_ijKL = expand_ijkl(t_complex_vielbein[:28, 28:]) t_v_klIJ = expand_ijkl(t_complex_vielbein[28:, :28]) # t_uv = t_u_klKL + t_v_klIJ t_uuvv = (tf.einsum('lmJK,kmKI->lkIJ', t_u_ijIJ, t_u_klKL) - tf.einsum('lmJK,kmKI->lkIJ', t_v_ijKL, t_v_klIJ)) t_T = tf.einsum('ijIJ,lkIJ->lkij', t_uv, t_uuvv) t_A1 = (-4.0 / 21.0) * tf.trace(tf.einsum('mijn->ijmn', t_T)) t_A2 = (-4.0 / (3 * 3)) * ( # Antisymmetrize in last 3 indices, taking into account antisymmetry # in last two indices. t_T + tf.einsum('lijk->ljki', t_T) + tf.einsum('lijk->lkij', t_T)) t_A1_real = tf.real(t_A1) t_A1_imag = tf.imag(t_A1) t_A2_real = tf.real(t_A2) t_A2_imag = tf.imag(t_A2) t_A1_potential = (-3.0 / 4) * (tf.einsum('ij,ij->', t_A1_real, t_A1_real) + tf.einsum('ij,ij->', t_A1_imag, t_A1_imag)) t_A2_potential = (1.0 / 24) * (tf.einsum('ijkl,ijkl->', t_A2_real, t_A2_real) + tf.einsum('ijkl,ijkl->', t_A2_imag, t_A2_imag)) t_potential = t_A1_potential + t_A2_potential # return dict(v70=t_v70, vielbein=t_complex_vielbein, tee_tensor=t_T, a1=t_A1, a2=t_A2, potential=t_potential)
def c2q(w, gain): """ Scale by gain and convert from complex w(:,:,1:2) to real quad-numbers in z. Arrange pixels from the real and imag parts of the 2 highpasses into 4 separate subimages . A----B Re Im of w(:,:,1) | | | | C----D Re Im of w(:,:,2) """ # Input has shape [batch, r, c, 2] r, c = w.get_shape().as_list()[1:3] sc = np.sqrt(0.5) * gain P = w[:, :, :, 0] * sc[0] + w[:, :, :, 1] * sc[1] Q = w[:, :, :, 0] * sc[0] - w[:, :, :, 1] * sc[1] # Recover each of the 4 corners of the quads. x1 = tf.real(P) x2 = tf.imag(P) x3 = tf.imag(Q) x4 = -tf.real(Q) # Stack 2 inputs of shape [batch, r, c] to [batch, r, 2, c] x_rows1 = tf.stack([x1, x3], axis=-2) # Reshaping interleaves the results x_rows1 = tf.reshape(x_rows1, [-1, 2 * r, c]) # Do the same for the even columns x_rows2 = tf.stack([x2, x4], axis=-2) x_rows2 = tf.reshape(x_rows2, [-1, 2 * r, c]) # Stack the two [batch, 2*r, c] tensors to [batch, 2*r, c, 2] x_cols = tf.stack([x_rows1, x_rows2], axis=-1) y = tf.reshape(x_cols, [-1, 2 * r, 2 * c]) return y
def tf_so8_sugra_stationarity(t_a1, t_a2): """Computes the stationarity-condition tensor.""" # See: https://arxiv.org/pdf/1302.6219.pdf, text after (3.2). t_x0 = (+4.0 * tf.einsum('mi,mjkl->ijkl', t_a1, t_a2) - 3.0 * tf.einsum('mnij,nklm->ijkl', t_a2, t_a2)) t_x0_real = tf.real(t_x0) t_x0_imag = tf.imag(t_x0) tc_sd = tf.constant(get_proj_35_8888(True)) tc_asd = tf.constant(get_proj_35_8888(False)) t_x_real_sd = tf.einsum('aijkl,ijkl->a', tc_sd, t_x0_real) t_x_imag_asd = tf.einsum('aijkl,ijkl->a', tc_asd, t_x0_imag) return (tf.einsum('a,a->', t_x_real_sd, t_x_real_sd) + tf.einsum('a,a->', t_x_imag_asd, t_x_imag_asd))
def stabilized_power_compress_abs(values, power=0.5, offset=1e-8): """Outputs stabilized power-law compression of the abs of the input.""" if values.dtype is tf.complex64: # Note that tf.abs(a+bj) = tf.sqrt(a*a+b*b). # Need to avoid 0.0 for complex numbers. # The offset is in default magnitude-level offset. We need to square # it when it is used for power-level offset. However, (1e-8)**2=1e-16 # in default could be too much small, here we use offset**1.5 as the # power-level offset. stabilized_values = stabilized_real_imag_abs(tf.real(values), tf.imag(values), offset=offset**1.5) else: stabilized_values = tf.abs(values) + offset return stabilized_values if power == 1.0 else tf.pow( stabilized_values, power)
def build(self): self.audios = tf.placeholder(tf.float32, [self.batch_size, self.n_speaker, None], name='input_signals') self.mix_input = tf.reduce_sum(self.audios, axis=1) with tf.variable_scope("encoder"): # [batch, encode_len, channels] encoded_input = tf.layers.Conv1D( filters=self.config["model"]["filters"]["ae"], kernel_size=self.fft_len, strides=self.fft_hop, activation=tf.nn.relu, name="conv1d_relu")(tf.expand_dims(self.mix_input, -1)) stfts_mix = tf.signal.stft(self.mix_input, frame_length=self.fft_len, frame_step=self.fft_hop, fft_length=self.fft_len, window_fn=self.fft_wnd) magni_mix = tf.abs(stfts_mix) phase_mix = tf.atan2(tf.imag(stfts_mix), tf.real(stfts_mix)) with tf.variable_scope("bottle_start"): norm_input = self.cLN( tf.concat([encoded_input, tf.log1p(magni_mix)], axis=-1), "layer_norm") block_input = tf.layers.Conv1D( filters=self.config["model"]["filters"]["1*1-conv"], kernel_size=1)(norm_input) for stack_i in range(self.num_stacks): for dilation in self.dilations: with tf.variable_scope("conv_block_{}_{}".format( stack_i, dilation)): block_output = tf.layers.Conv1D( filters=self.config["model"]["filters"]["d-conv"], kernel_size=1)(block_input) block_output = self.prelu(block_output, name='1st-prelu', shared_axes=[1]) block_output = self.gLN(block_output, "first") block_output = self._depthwise_conv1d( block_output, dilation) block_output = self.prelu(block_output, name='2nd-prelu', shared_axes=[1]) block_output = self.gLN(block_output, "second") block_output = tf.layers.Conv1D( filters=self.config["model"]["filters"]["1*1-conv"], kernel_size=1)(block_output) block_input += block_output if self.output_ratio == 1: embed_channel = self.config["model"]["filters"]["ae"] feature_map = encoded_input elif self.output_ratio == 0: embed_channel = self.stft_ch feature_map = magni_mix else: embed_channel = self.concat_channels feature_map = tf.concat([encoded_input, magni_mix], axis=-1) with tf.variable_scope('separator'): s_embed = tf.layers.Dense( embed_channel * self.config["model"]["embed_size"])(block_input) s_embed = tf.reshape(s_embed, [ self.batch_size, -1, embed_channel, self.config["model"]["embed_size"] ]) # Estimate attractor from best combination from anchors v_anchors = tf.get_variable( 'anchors', [self.n_anchor, self.config["model"]["embed_size"]], dtype=tf.float32) c_combs = tf.constant(list( itertools.combinations(range(self.n_anchor), self.n_speaker)), name='combs') s_anchor_sets = tf.gather(v_anchors, c_combs) s_anchor_assignment = tf.einsum('btfe,pce->bptfc', s_embed, s_anchor_sets) s_anchor_assignment = tf.nn.softmax(s_anchor_assignment) s_attractor_sets = tf.einsum('bptfc,btfe->bpce', s_anchor_assignment, s_embed) s_attractor_sets /= tf.expand_dims( tf.reduce_sum(s_anchor_assignment, axis=(2, 3)), -1) sp = tf.matmul(s_attractor_sets, tf.transpose(s_attractor_sets, [0, 1, 3, 2])) diag = tf.fill(sp.shape[:-1], float("-inf")) sp = tf.linalg.set_diag(sp, diag) s_in_set_similarities = tf.reduce_max(sp, axis=(-1, -2)) s_subset_choice = tf.argmin(s_in_set_similarities, axis=1) s_subset_choice_nd = tf.transpose( tf.stack([ tf.range(self.batch_size, dtype=tf.int64), s_subset_choice ])) s_attractors = tf.gather_nd(s_attractor_sets, s_subset_choice_nd) s_logits = tf.einsum('btfe,bce->bctf', s_embed, s_attractors) output_code = s_logits * tf.expand_dims(feature_map, 1) with tf.variable_scope("decoder"): conv_out = pred_istfts = 0 if self.output_ratio != 0: output_frame = tf.layers.Dense( self.config["model"]["kernel_size"]["ae"])(output_code[ ..., :self.config["model"]["filters"]["ae"]]) conv_out = tf.signal.overlap_and_add(signal=output_frame, frame_step=self.fft_hop) if self.output_ratio != 1: phase_mix_expand = tf.expand_dims(phase_mix, 1) pred_stfts = tf.complex( tf.cos(phase_mix_expand) * output_code[..., -self.stft_ch:], tf.sin(phase_mix_expand) * output_code[..., -self.stft_ch:]) pred_istfts = tf.signal.inverse_stft( pred_stfts, frame_length=self.fft_len, frame_step=self.fft_hop, fft_length=self.fft_len, window_fn=tf.signal.inverse_stft_window_fn( self.fft_hop, forward_window_fn=self.fft_wnd)) self.data_out = conv_out * self.output_ratio + pred_istfts * ( 1 - self.output_ratio) self.loss, self.pred_output, self.sdr, self.perm_idxs = loss.pit_loss( self.audios, self.data_out, self.config, self.batch_size, self.n_speaker, self.n_output) ### fixed loss not implemented yet !!!!!! ### self.loss_fix, self.pred_output_fix, self.sdr_fix, self.perm_idxs_fix = loss.pit_loss( self.audios, self.data_out, self.config, self.batch_size, self.n_speaker, self.n_output)
mean=0.0, stddev=np.sqrt(2 / (num_filter_1))), name='wf4'), } biases_decoder1 = { 'bf1': tf.Variable(tf.zeros([num_filter_3]), name='bf1'), 'bf2': tf.Variable(tf.zeros([num_filter_2]), name='bf2'), 'bf3': tf.Variable(tf.zeros([num_filter_1]), name='bf3'), 'bf4': tf.Variable(tf.zeros([dim_feature * n_user]), name='bf4'), } # Construct model for u_ii in range(n_user): x_reshape = tf.reshape(x[:, :, :, u_ii], [-1, L_ * n_rx]) x_user = tf.concat([tf.real(x_reshape), tf.imag(x_reshape)], axis=1) Out_encoder_low, _ = Dnn_Encoder(x_user, weights_encoder1, biases_encoder1, train_flag) if u_ii == 0: Out_encoder_low_cat = Out_encoder_low else: Out_encoder_low_cat = tf.concat([Out_encoder_low_cat, Out_encoder_low], 1) # Transmitter DNN with quantization Out_decoder_low = Dnn_Decoder_low(Out_encoder_low_cat, weights_decoder1, biases_decoder1, train_flag) w_esti_low = tf.reshape(Out_decoder_low, [-1, n_tx, n_rx, n_user])