def get_tecogan_inputs(r_inputs, r_targets): """ Generate and return the ping-pong sequence (forward and backward) from given inputs and targets """ r_inputs = F.concatenate(r_inputs, r_inputs[:, -2::-1, :, :, :], axis=1) r_targets = F.concatenate(r_targets, r_targets[:, -2::-1, :, :, :], axis=1) return r_inputs, r_targets
def make_symmetric_matrix(_x): # input # _x : type=nn.Variable(), _x.shape=(batch_size, *, *, *) # output # j_vector : type=nn.Variable(), j_vector.shape=(batch_size, batch_size - 1, *, *, *) batch_size = _x.shape[0] var_list = F.split(_x) concat_list = [] # --- split & gather components --- for i in range(batch_size): tmp_list = [] for j in range(batch_size): if i != j: tmp_list.append( F.reshape(var_list[j], [ 1, ] + list(var_list[j].shape))) if len(tmp_list) > 1: concat_var = F.concatenate(*tmp_list, axis=0) else: concat_var = tmp_list[0] concat_list.append( F.reshape(concat_var, [ 1, ] + list(concat_var.shape))) # --- concatenate --- j_vector = F.concatenate(*concat_list, axis=0) return j_vector
def compute_sample_points_for_variable_depth(ray_origins, ray_directions, near_plane, far_plane, num_samples, randomize=False): depth_steps = F.arange(0, 1 + 1 / num_samples, 1 / (num_samples - 1)) depth_steps = F.broadcast(depth_steps[None, :], (far_plane.shape[0], depth_steps.shape[0])) depth_values = near_plane[:, None] * \ (1-depth_steps) + far_plane[:, None] * depth_steps if randomize: depth_vals_mid = 0.5 * (depth_values[:, :-1] + depth_values[:, 1:]) # get intervals between samples upper = F.concatenate(depth_vals_mid, depth_values[:, -1:], axis=-1) lower = F.concatenate(depth_values[:, :1], depth_vals_mid, axis=-1) noise = F.rand(shape=depth_values.shape) depth_values = lower + (upper - lower) * noise sample_points = ray_origins[..., None, :] + \ ray_directions[..., None, :]*depth_values[..., :, None] return sample_points, depth_values
def build_model(): x = nn.Variable((batch_size, sentence_length_source)) mask = get_mask(x) y = nn.Variable((batch_size, sentence_length_target)) enc_input = time_distributed(PF.embed)( x, vocab_size_source, embedding_size, name='enc_embeddings') * mask # -> (batch_size, sentence_length_source, embedding_size) dec_input = F.concatenate(F.constant(w2i_target['<bos>'], shape=(batch_size, 1)), y[:, :sentence_length_target - 1], axis=1) dec_input = time_distributed(PF.embed)(dec_input, vocab_size_target, embedding_size, name='dec_embeddings') # -> (batch_size, sentence_length_target, embedding_size) # encoder with nn.parameter_scope('encoder'): enc_output, c, h = lstm(enc_input, hidden, mask=mask, return_sequences=True, return_state=True) # -> (batch_size, sentence_length_source, hidden), (batch_size, hidden), (batch_size, hidden) # decoder with nn.parameter_scope('decoder'): dec_output = lstm(dec_input, hidden, initial_state=(c, h), return_sequences=True) # -> (batch_size, sentence_length_target, hidden) attention_output = global_attention(dec_output, enc_output, mask=mask, score='dot') # -> (batch_size, sentence_length_target, hidden) output = F.concatenate(dec_output, attention_output, axis=2) output = time_distributed(PF.affine)(output, vocab_size_target, name='output') # -> (batch_size, sentence_length_target, vocab_size_target) t = F.reshape(y, (batch_size, sentence_length_target, 1)) entropy = time_distributed_softmax_cross_entropy(output, t) mask = F.sum(F.sign(t), axis=2) # do not predict 'pad'. count = F.sum(mask, axis=1) entropy *= mask loss = F.mean(F.sum(entropy, axis=1) / count) return x, y, loss
def main(): """ Inference function to generate SR images. """ nn.load_parameters(args.model) # Inference data loader inference_data = inference_data_loader(args.input_dir_lr) input_shape = [ 1, ] + list(inference_data.inputs[0].shape) output_shape = [1, input_shape[1] * 4, input_shape[2] * 4, 3] oh = input_shape[1] - input_shape[1] // 8 * 8 ow = input_shape[2] - input_shape[2] // 8 * 8 # Build the computation graph inputs_raw = nn.Variable(input_shape) pre_inputs = nn.Variable(input_shape) pre_gen = nn.Variable(output_shape) pre_warp = nn.Variable(output_shape) transposed_pre_warp = space_to_depth(pre_warp) inputs_all = F.concatenate(inputs_raw, transposed_pre_warp) with nn.parameter_scope("generator"): gen_output = generator(inputs_all, 3, args.num_resblock) outputs = (gen_output + 1) / 2 inputs_frames = F.concatenate(pre_inputs, inputs_raw) with nn.parameter_scope("fnet"): flow_lr = flow_estimator(inputs_frames) flow_lr = F.pad(flow_lr, (0, 0, 0, oh, 0, ow, 0, 0), "reflect") flow_hr = upscale_four(flow_lr * 4.0) pre_gen_warp = warp_by_flow(pre_gen, flow_hr) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) max_iter = len(inference_data.inputs) print('Frame evaluation starts!!') pre_inputs.d, pre_gen.d, pre_warp.d = 0, 0, 0 for i in range(max_iter): inputs_raw.d = np.array([inference_data.inputs[i]]).astype(np.float32) if i != 0: pre_gen_warp.forward() pre_warp.data.copy_from(pre_gen_warp.data) outputs.forward() output_frame = outputs.d if i >= 5: name, _ = os.path.splitext( os.path.basename(str(inference_data.paths_lr[i]))) filename = args.output_name + '_' + name print('saving image %s' % filename) out_path = os.path.join(args.output_dir, "%s.%s" % (filename, args.output_ext)) save_img(out_path, output_frame[0]) else: # First 5 is a hard-coded symmetric frame padding, ignored but time added! print("Warming up %d" % (5 - i)) pre_inputs.data.copy_from(inputs_raw.data) pre_gen.data.copy_from(outputs.data)
def call(self, x1, x2): y1 = self.conv_bn_1(x1) y2 = self.conv_bn_2(x2) y = F.concatenate(y1, y2, axis=1) # ConvBn() will be destroyed when leave this scope. # Thus, the parameters owned by `cb` object will be released too. cb = ConvBn(1) y = F.concatenate(y, cb(x1), axis=1) return y
def network_LSTM(x, D, C, InputShape, HiddenSize, test=False): # Input_2:x -> 687 # Delya_in:D -> 100 # Cell_in:C -> 100 # Concatenate -> 787 h = F.concatenate(D, x, axis=1) # Affine -> 100 h1 = PF.affine(h, HiddenSize, name='Affine') # InputGate -> 100 h2 = PF.affine(h, HiddenSize, name='InputGate') # OutputGate -> 100 h3 = PF.affine(h, HiddenSize, name='OutputGate') # ForgetGate -> 100 h4 = PF.affine(h, HiddenSize, name='ForgetGate') # Sigmoid h1 = F.sigmoid(h1) # Sigmoid_2 h2 = F.sigmoid(h2) # Sigmoid_3 h3 = F.sigmoid(h3) # Sigmoid_4 h4 = F.sigmoid(h4) # Mul2 -> 100 h1 = F.mul2(h1, h2) # Mul2_3 -> 100 h4 = F.mul2(h4, C) # Add2 -> 100 h1 = F.add2(h1, h4, True) # Tanh h5 = F.tanh(h1) # Cell_out h6 = F.identity(h1) # Mul2_2 -> 100 h5 = F.mul2(h5, h3) # Dropout if not test: h5 = F.dropout(h5) # Output h5 = F.identity(h5) # Concatenate_2 -> 200 h5 = F.concatenate(h5, h6, axis=1) return h5
def sdr_loss(mix, pred, gt_time): # SDR-Combination Loss # mix -> (BatchSize(16), 2(1 source x 2 channels), TimeLen) -> (B, C, T) # pred -> (4(sources), Bsize, 2(channels), Len) -> (S, B, C, T) # gt_time -> (BatchSize(16), 8(4 source x 2 channels), TimeLen) -> (B, S*C, T) # channel-dim -> [bass1, bass2, drums1, drums2, ...] _, batch_size, n_channels, length = pred.shape # Fix Length mix = mix[Ellipsis, :length] gt_time = gt_time[Ellipsis, :length] # Fix Shape mix = unsqueeze(mix) # [1, B, C, T] gt_time = unsqueeze(gt_time) # [1, B, S*C, T] data_t = mix # [1, B, C, T] for i in range(4): data_t = F.concatenate(data_t, gt_time[Ellipsis, 2*i:2*i+2, :], axis=0) data_t = F.reshape(data_t, (-1, length)) # [5*B*C, T] pred = F.reshape(pred, (batch_size*n_channels * pred.shape[0], pred.shape[-1])) # [B*C*S, T] # Combination List (4C2 + 4C3) combi_list = [(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), (1, 2, 3), (1, 2, 4), (1, 3, 4), (2, 3, 4)] for combi in combi_list: if len(combi) == 2: tmp_data = data_t[batch_size*n_channels*combi[0]:batch_size*n_channels*( combi[0]+1), Ellipsis] + data_t[batch_size*n_channels*combi[1]:batch_size*n_channels*(combi[1]+1), Ellipsis] tmp_pred = pred[batch_size*n_channels*(combi[0]-1):batch_size*n_channels*combi[0], Ellipsis] + \ pred[batch_size*n_channels*( combi[1]-1):batch_size*n_channels*combi[1], Ellipsis] else: tmp_data = data_t[batch_size*n_channels*combi[0]:batch_size*n_channels*(combi[0]+1), Ellipsis] + data_t[batch_size*n_channels*combi[1]:batch_size*n_channels*( combi[1]+1), Ellipsis] + data_t[batch_size*n_channels*combi[2]:batch_size*n_channels*(combi[2]+1), Ellipsis] tmp_pred = pred[batch_size*n_channels*(combi[0]-1):batch_size*n_channels*combi[0], Ellipsis] + pred[batch_size*n_channels*( combi[1]-1):batch_size*n_channels*combi[1], Ellipsis] + pred[batch_size*n_channels*(combi[2]-1):batch_size*n_channels*combi[2], Ellipsis] data_t = F.concatenate(data_t, tmp_data, axis=0) pred = F.concatenate(pred, tmp_pred, axis=0) # All 14 Combinations (4C1 + 4C2 + 4C3) mix_t = F.tile(data_t[:batch_size*n_channels, Ellipsis], (14, 1)) data_t = data_t[batch_size*n_channels:, Ellipsis] # SDR Loss Calculation loss_sdr = sdr_loss_core(pred, data_t, mix_t, weighted=True) return 1.0 + loss_sdr
def bicubic_four(inputs, scope='bicubic_four'): """ Equivalent to tf.image.resize_bicubic( inputs, (h*4, w*4) ) for a fix ratio of 4 FOR API <=1.13 For API 2.0, tf.image.resize_bicubic will be different, old version is tf.compat.v1.image.resize_bicubic **Parallel Catmull-Rom Spline Interpolation Algorithm for Image Zooming Based on CUDA*[Wu et. al.]** """ with nn.parameter_scope(scope): b, h, w, c = inputs.shape p_inputs = F.concatenate(inputs[:, :1, :, :], inputs, axis=1) # pad top p_inputs = F.concatenate(p_inputs[:, :, :1, :], p_inputs, axis=2) # pad left p_inputs = F.concatenate(p_inputs, p_inputs[:, -1:, :, :], p_inputs[:, -1:, :, :], axis=1) # pad bottom p_inputs = F.concatenate(p_inputs, p_inputs[:, :, -1:, :], p_inputs[:, :, -1:, :], axis=2) # pad right hi_res_bin = [p_inputs[:, bi:bi + h, :, :] for bi in range(4)] r = 0.75 mat = np.float32([[0, 1, 0, 0], [-r, 0, r, 0], [2 * r, r - 3, 3 - 2 * r, -r], [-r, 2 - r, r - 2, r]]) weights = [ np.float32([1.0, t, t * t, t * t * t]).dot(mat) for t in [0.0, 0.25, 0.5, 0.75] ] hi_res_array = [] # [hi_res_bin[1]] for hi in range(4): cur_wei = weights[hi] cur_data = cur_wei[0] * hi_res_bin[0] + cur_wei[1] * hi_res_bin[1] + \ cur_wei[2] * hi_res_bin[2] + cur_wei[3] * hi_res_bin[3] hi_res_array.append(cur_data) hi_res_y = F.stack(*hi_res_array, axis=2) # shape (b,h,4,w,c) hi_res_y = F.reshape(hi_res_y, (b, h * 4, w + 3, c)) hi_res_bin = [hi_res_y[:, :, bj:bj + w, :] for bj in range(4)] hi_res_array = [] # [hi_res_bin[1]] for hj in range(4): cur_wei = weights[hj] cur_data = cur_wei[0] * hi_res_bin[0] + cur_wei[1] * hi_res_bin[1] + \ cur_wei[2] * hi_res_bin[2] + cur_wei[3] * hi_res_bin[3] hi_res_array.append(cur_data) hi_res = F.stack(*hi_res_array, axis=3) # shape (b,h*4,w,4,c) hi_res = F.reshape(hi_res, (b, h * 4, w * 4, c)) return hi_res
def conv_bn_relu(h, i, name, skip=True): s = h imaps = h.shape[1] with nn.parameter_scope(name): h = PF.convolution(h, imaps, (3, 3), pad=(1, 1)) h = PF.batch_normalization(h) h = F.relu(h) if not skip: return F.concatenate(*[h, s], axis=1) if i % 2 == 0 else h + s h = F.split(h, axis=1) h = [h_.reshape(h_.shape[:1] + (1, ) + h_.shape[1:]) for h_ in h] h = F.concatenate(*h, axis=1) return h
def dyn_sep_up_operation(x, dr_k_v, dr_k_h, k_sz, sf): """ Dynamic separable upsampling operation with 1D separable local kernels. x: [B, H, W, C], dr_k_v: [B, H, W, 41*sf*sf], dr_k_h: [B, H, W, 41*sf*sf] out: [B, H*sf, W*sf, C] """ sz = x.shape pad = k_sz // 2 # local filter pad size # [B, H, W, C*sf*sf] out_v = nn.Variable((sz[0], sz[1], sz[2], sz[3] * sf**2)) out_v.data.zero() # [B, H, W, C*sf*sf] out_h = nn.Variable((sz[0], sz[1], sz[2], sz[3] * sf**2)) out_h.data.zero() img_pad = F.pad(x, (0, 0, pad, pad, 0, 0, 0, 0)) img_pad_y = F.reshape( img_pad[:, :, :, 0], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1)) img_pad_y = F.tile(img_pad_y, [1, 1, 1, sf**2]) img_pad_u = F.reshape( img_pad[:, :, :, 1], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1)) img_pad_u = F.tile(img_pad_u, [1, 1, 1, sf**2]) img_pad_v = F.reshape( img_pad[:, :, :, 2], (img_pad.shape[0], img_pad.shape[1], img_pad.shape[2], 1)) img_pad_v = F.tile(img_pad_v, [1, 1, 1, sf**2]) img_pad = F.concatenate(img_pad_y, img_pad_u, img_pad_v, axis=3) # vertical 1D filter for i in range(k_sz): out_v = out_v + img_pad[:, i:i + sz[1], :, :] * F.tile( dr_k_v[:, :, :, i:k_sz * sf**2:k_sz], [1, 1, 1, 3]) img_pad = F.pad(out_v, (0, 0, 0, 0, pad, pad, 0, 0)) # horizontal 1D filter for i in range(k_sz): out_h = out_h + img_pad[:, :, i:i + sz[2], :] * F.tile( dr_k_h[:, :, :, i:k_sz * sf**2:k_sz], [1, 1, 1, 3]) # depth to space upsampling (YUV) out = depth_to_space(out_h[:, :, :, 0:sf**2], sf) out = F.concatenate(out, depth_to_space(out_h[:, :, :, sf**2:2 * sf**2], sf), axis=3) out = F.concatenate(out, depth_to_space(out_h[:, :, :, 2 * sf**2:3 * sf**2], sf), axis=3) return out
def __call__(self, x, z=None): b, c = x.shape[0:2] h = x h = self.affine_act(h, self.dims, name="fc0") h = self.affine_act(h, self.dims, name="fc1") h = self.affine_act(h, self.dims, name="fc2") h = self.affine_act(h, self.dims - (self.ldims + c), name="fc3") h = F.concatenate(*[x, z, h], axis=1) if z is not None else F.concatenate(*[x, h], axis=1) h = self.affine_act(h, self.dims, name="fc4") h = self.affine_act(h, self.dims, name="fc5") h = self.affine_act(h, self.dims, name="fc6") y = self.last_affine(h, 1, name="fc7") return y
def get_t_d(conf, r_inputs, d_data): """ Create Real and fake temoral discriminators """ # to crop out unstable part for temporal discriminator, details in TecoGAN supplemental paper crop_size_dt = int(conf.train.crop_size * 4 * conf.gan.crop_dt) offset_dt = (conf.train.crop_size * 4 - crop_size_dt) // 2 crop_size_dt = conf.train.crop_size * 4 - offset_dt * 2 paddings = (0, 0, offset_dt, offset_dt, offset_dt, offset_dt, 0, 0) with nn.parameter_scope("discriminator"): real_warp = warp_by_flow(d_data.t_targets, d_data.t_vel) real_warp = space_to_depth_disc(real_warp, d_data.t_batch) # equivalent to tf.image.crop_to_bounding_box real_warp = real_warp[:, offset_dt:offset_dt + crop_size_dt, offset_dt:offset_dt + crop_size_dt, :] real_warp = F.pad(real_warp, paddings) before_warp = space_to_depth_disc(d_data.t_targets, d_data.t_batch) t_input = space_to_depth_disc(r_inputs[:, :d_data.t_size, :, :, :], d_data.t_batch) # resizing using bilinear interpolation input_hi = F.interpolate(t_input, scale=(4, 4), mode='linear', channel_last=True) real_warp = F.concatenate(before_warp, real_warp, input_hi) tdiscrim_real_output, real_layers = discriminator(real_warp) fake_warp = warp_by_flow(d_data.t_gen_output, d_data.t_vel) fake_warp = space_to_depth_disc(fake_warp, d_data.t_batch) fake_warp = fake_warp[:, offset_dt:offset_dt + crop_size_dt, offset_dt:offset_dt + crop_size_dt, :] fake_warp = F.pad(fake_warp, paddings) before_warp = space_to_depth_disc(d_data.t_gen_output, d_data.t_batch, inplace=False) fake_warp = F.concatenate(before_warp, fake_warp, input_hi) tdiscrim_fake_output, fake_layers = discriminator(fake_warp) temporal_disc = collections.namedtuple( 'temporal_disc', 'tdiscrim_real_output,' 'real_layers, tdiscrim_fake_output, fake_layers') return temporal_disc(tdiscrim_real_output=tdiscrim_real_output, real_layers=real_layers, tdiscrim_fake_output=tdiscrim_fake_output, fake_layers=fake_layers)
def decoder(x: list, block_expansion: int, num_blocks=3, max_features=256, test=False, comm=None): up_blocks = [] for i in range(num_blocks)[::-1]: up_block = functools.partial(upblock, out_features=min(max_features, block_expansion * (2**i)), kernel_size=3, padding=1, test=test, comm=comm) up_blocks.append(up_block) out = x.pop() # Variable((B, 256, 32, 32)), the last feature from encoder for i, up_block in enumerate(up_blocks): with nn.parameter_scope(f"upblock_{i}"): out = up_block(out) skip = x.pop() out = F.concatenate(out, skip, axis=1) return out
def __call__(self, features): upsampled_inputs = [ F.interpolate(x, output_size=features[0].shape[2:], mode='linear', align_corners=False, half_pixel=True) for x in features ] inputs = F.concatenate(*upsampled_inputs, axis=1) out = self.conv2d(inputs, self.hparams['channels'], kernel_size=1, stride=1, bias=False, name='convs/0/conv') out = F.relu(self.batch_norm(out, name='convs/0/bn')) out = self.conv2d(out, self.hparams['num_classes'], kernel_size=1, stride=1, bias=True, name='conv_seg') out = F.interpolate(out, output_size=self.output_size, mode='linear', align_corners=False, half_pixel=True) if self.test: return F.softmax(out, axis=1) return out
def shortcut(x, ochannels, stride, shortcut_type, test, channel_last=False): axes = [3 if channel_last else 1] ichannels = x.shape[axes[0]] use_conv = shortcut_type.lower() == 'c' if ichannels != ochannels: assert (ichannels * 2 == ochannels) or (ichannels * 4 == ochannels) if shortcut_type.lower() == 'b': use_conv = True if use_conv: # Convolution does everything. # Matching channels, striding. with nn.parameter_scope("shortcut_conv"): x = pf_convolution(x, ochannels, (1, 1), stride=stride, channel_last=channel_last) x = PF.batch_normalization(x, axes=axes, batch_stat=not test) else: if stride != (1, 1): # Stride x = F.average_pooling(x, (1, 1), stride, channel_last=channel_last) if ichannels != ochannels: # Zero-padding to channel axis ishape = x.shape if channel_last: zero_shape = (ishape[0],) + ishape[1:3] + \ (ochannels - ichannels,) else: zero_shape = (ishape[0], ochannels - ichannels) + ishape[-2:] zeros = F.constant(zero_shape, 0) x = F.concatenate(x, zeros, axis=1) return x
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] axis = self.forward_func.info.args["axis"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: maskp = F.greater_equal_scalar(x0, 0.0) maskn = maskp - 1.0 g_dy_p = maskp * g_dx0 g_dy_n = maskn * g_dx0 g_dy_ = F.concatenate(*[g_dy_p, g_dy_n], axis=axis) if accum[1]: g_dy.copy_from(g_dy + g_dy_) else: g_dy.copy_from(g_dy_)
def simple_rnn(inputs, units, return_sequences=False, fix_parameters=False): ''' A vanilla recurrent neural network layer Args: inputs (nnabla.Variable): A shape of [B, SentenceLength, EmbeddingSize]. units (int): Dimensionality of the output space. return_sequences (bool): Whether to return the last output. in the output sequence, or the full sequence. fix_parameters (bool): Fix parameters (Set need_grad=False). Returns: nn.Variable: A shape [B, SentenceLength, units]. or nn.Variable: A shape [B, units] ''' hs = [] batch_size = inputs.shape[0] sentence_length = inputs.shape[1] h0 = nn.Variable.from_numpy_array(np.zeros((batch_size, units))) inputs = F.split(inputs, axis=1) # split in the direction of sequence h = h0 for x in inputs: h = F.tanh(PF.affine(F.concatenate(x, h, axis=1), units, fix_parameters=fix_parameters)) hs.append(h) if return_sequences: hs = F.stack(*hs, axis=1) return hs else: return hs[-1]
def sin_cos_positional_embedding(x, num_encoding_functions, include_input=True, log_sampling=True): """Given coordinate positions of sampling points as a (N,3) array, this functions returns embeds each point with the sine and cosine function Args: x (nn.Variable or nn.NdArray): Shape is (N, 3). num_encoding_functions (int): number of frequencies to encode for each grid position include_input (bool, optional): Whether include the original grid position along with the encoding of the position. Defaults to True. log_sampling (bool, optional): Sample logarithmically and not linearly. Defaults to True. Returns: [nn.Variable or nn.NdArray]: (N, num_encoding_functions*3*2+3) if include_input is True else (N, num_encoding_functions*3*2) """ encoding = [x] if include_input else [] if log_sampling: frequency_increments = F.arange(0, num_encoding_functions) frequency_bands = F.pow2( F.constant(2, shape=frequency_increments.shape), frequency_increments) else: frequency_bands = F.arange(2**0, 2**(num_encoding_functions - 1) + 1e-5, (2**(num_encoding_functions - 1) - 1) / (num_encoding_functions - 1.0)) for freq in frequency_bands: for func in [F.sin, F.cos]: encoding.append(func(x * F.reshape(freq, (1, 1)))) return F.concatenate(*encoding, axis=x.ndim - 1)
def shuffle_unit(x, scope_name, dn=False): """ Figure. 2 (b) and (c) in https://arxiv.org/pdf/1707.01083.pdf """ C = x.shape[1] h = x with nn.parameter_scope(scope_name): with nn.parameter_scope("gconv1"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), group=groups, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h, True) with nn.parameter_scope("shuffle"): # no meaning but semantics h = shuffle(h) with nn.parameter_scope("dconv"): stride = (2, 2) if dn else (1, 1) h = PF.depthwise_convolution(h, kernel=(3, 3), pad=(1, 1), stride=stride, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) with nn.parameter_scope("gconv2"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), group=groups, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) s = F.average_pooling(x, (2, 2)) if dn else x h = F.concatenate(*[h, s], axis=1) if dn else h + s h = F.relu(h) return h
def conv_lstm_cell(input_tensor, cur_state, n_filt, kernel_size): """ conv lstm cell definition """ def split(inp): _, channels, _, _ = inp.shape channels = channels / 4 return inp[:, :channels, :, :], inp[:, channels:2 * channels, :, :], \ inp[:, 2 * channels:3 * channels, :, :], \ inp[:, 3 * channels:4 * channels, :, :] h_cur, c_cur = cur_state # concatenate along channel axis combined = F.concatenate(*[input_tensor, h_cur], axis=1) combined_conv = conv2d(combined, 4 * n_filt, kernel_size, 1, kernel_size // 2, name='conv_lstm_cell') cc_i, cc_f, cc_o, cc_g = split(combined_conv) act_i = F.sigmoid(cc_i) act_f = F.sigmoid(cc_f) act_o = F.sigmoid(cc_o) act_g = F.tanh(cc_g) c_next = F.add2(act_f * c_cur, act_i * act_g) h_next = act_o * F.tanh(c_next) return h_next, c_next
def _gru(x, h, w, b, with_bias): """GRU cell. Args: x (:obj:`~nnabla.Variable`): Input data. h (:obj:`~nnabla.Variable`): Hidden state. w (:obj:`~nnabla.Variable`): Weight. b (:obj:`~nnabla.Variable`): Bias. with_bias (bool): Include the bias or not. """ hidden_size = h.shape[1] xh = F.concatenate(*(x, h), axis=1) w0, w1, w2 = F.split(w, axis=0) b0 = b1 = b2 = b3 = None if with_bias: b0, b1, b2, b3 = F.split(b, axis=0) r_t = F.sigmoid(F.affine(xh, F.transpose(w0, (1, 0)), b0)) z_t = F.sigmoid(F.affine(xh, F.transpose(w1, (1, 0)), b1)) w2_0 = w2[:, :w2.shape[1] - hidden_size] w2_1 = w2[:, w2.shape[1] - hidden_size:] n_t = F.tanh( F.affine(x, F.transpose(w2_0, (1, 0)), b2) + r_t * F.affine(h, F.transpose(w2_1, (1, 0)), b3)) h_t = (1 - z_t) * n_t + z_t * h return h_t
def yolov2_activate(x, anchors, biases): shape = x.shape y = F.reshape(x, ( shape[0], anchors, -1, ) + shape[2:]) stop = list(y.shape) stop[2] = 2 t_xy = F.slice(y, (0, 0, 0, 0, 0), stop) stop[2] = 4 t_wh = F.slice(y, (0, 0, 2, 0, 0), stop) stop[2] = 5 t_o = F.slice(y, (0, 0, 4, 0, 0), stop) stop[2] = y.shape[2] t_p = F.slice(y, (0, 0, 5, 0, 0), stop) t_xy = F.sigmoid(t_xy) t_wh = F.exp(t_wh) t_o = F.sigmoid(t_o) t_p = F.softmax(t_p, axis=2) t_x, t_y, t_wh = yolov2_image_coordinate(t_xy, t_wh, biases) y = F.concatenate(t_x, t_y, t_wh, t_o, t_p, axis=2) y = F.transpose(y, (0, 1, 3, 4, 2)).reshape( (shape[0], -1, shape[1] / anchors)) return y
def lighting_network(x_hat, normal, feature, view, D=512, L=4, N=4, including_input=True): """ Args x_hat: Differentiable intersection point (B, R, 3) normal: Normal on x_hat (B, R, 3) (should be normalized before). feature: Intermediate output of the implicit network (B, R, feature_size). view: View direction (B, R, 3) D: Dimension of a network. L: Number of layers. N: Number of frequency of the positional encoding. inclugin_input: Include input to the positional encoding (PE). """ pe_view = positional_encoding(view, N, including_input) h = F.concatenate(*[x_hat, normal, feature, pe_view], axis=-1) for l in range(L - 1): h = affine(h, D, name=f"affine-{l:02d}") h = F.relu(h) h = affine(h, 3, name=f"affine-{L - 1:02d}") h = F.tanh(h) return h
def shortcut(x, ochannels, stride, shortcut_type, test): ichannels = x.shape[1] use_conv = shortcut_type.lower() == 'c' if ichannels != ochannels: assert (ichannels * 2 == ochannels) or (ichannels * 4 == ochannels) if shortcut_type.lower() == 'b': use_conv = True if use_conv: # Convolution does everything. # Matching channels, striding. with nn.parameter_scope("shortcut_conv"): x = PF.convolution(x, ochannels, (1, 1), stride=stride, with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) else: if stride != (1, 1): # Stride x = F.average_pooling(x, (1, 1), stride) if ichannels != ochannels: # Zero-padding to channel axis ishape = x.shape zeros = F.constant(0, (ishape[0], ochannels - ichannels) + ishape[-2:]) x = F.concatenate(x, zeros, axis=1) return x
def factorized_reduction(x, output_filter, scope, test): """ Applying spatial reduction to input variable. Input variable is passed to: Skip path 1, applied average pooling with stride 2. Skip path 2, first padded with 0 on the right and bottom, then shifted by 1 (so that those 0-padded sides will be added, whereas its shape is the same as the original), Then these 2 variables are concatenated along the depth dimension. """ with nn.parameter_scope(scope): path1 = F.average_pooling(x, (1, 1), (2, 2)) with nn.parameter_scope("path1_conv"): path1 = PF.convolution( path1, output_filter // 2, (1, 1), with_bias=False) path2 = F.pad(x, (0, 1, 0, 1), mode='constant') path2 = F.slice(path2, (0, 0, 1, 1)) path2 = F.average_pooling(path2, (1, 1), (2, 2)) with nn.parameter_scope("path2_conv"): path2 = PF.convolution( path2, output_filter // 2, (1, 1), with_bias=False) final_path = F.concatenate(path1, path2, axis=1) with nn.parameter_scope("reduction_bn"): final_path = PF.batch_normalization( final_path, batch_stat=not test) return final_path
def yolov2_feature(c13, c18, test=False, feature_dict=None): ''' ''' if feature_dict is None: feature_dict = {} # Extra feature extraction for c18 h = conv_bn_pool(c18, 1024, 3, pool=False, test=test, name='c18_19') feature_dict['c18_19'] = h h = conv_bn_pool(h, 1024, 3, pool=False, test=test, name='c18_20') feature_dict['c18_20'] = h # Extra feature extraction for c13 c13_h = conv_bn_pool(c13, 64, 1, pool=False, test=test, name='c13_14') feature_dict['c13_14'] = c13_h c13_h = reorg_darknet_bug(c13_h, 2) feature_dict['reorg'] = c13_h # Concatenate c13 and c18 features together h = F.concatenate(c13_h, h, axis=1) feature_dict['route'] = h # Extra feature extraction of the multi-scale features h = conv_bn_pool(h, 1024, 3, pool=False, test=test, name='c21') feature_dict['c21'] = h return h
def frame_colorization(IA_lab, IB_lab, IA_last_lab, features_B, joint_training=True, feature_noise=0, luminance_noise=0, temperature=0.01): # change to rgb for feature extraction IA_l = IA_lab[:, 0:1, :, :] # if luminance_noise: nonlocal_BA_lab, similarity_map, features_A_gray = warp_color( IA_l, IB_lab, features_B, feature_noise, temperature=temperature) nonlocal_BA_ab = nonlocal_BA_lab[:, 1:3, :, :] color_input = F.concatenate( IA_l, nonlocal_BA_ab, similarity_map, IA_last_lab, axis=1) with nn.parameter_scope('colornet'): IA_ab_predict = colorvidnet(color_input) return IA_ab_predict, nonlocal_BA_lab, features_A_gray
def d3_block(self, inp, growth_rate, num_layers, n_blocks): ''' Define D3Block ''' out = self.dilated_dense_block_2(inp, growth_rate * n_blocks, num_layers, scope_name='initial_block') if n_blocks > 1: lst = [] for i in range(n_blocks): lst.append(out[:, i * growth_rate:(i + 1) * growth_rate]) def update(inp_, n): for j in range(n_blocks - n - 1): lst[j + 1 + n] += inp_[:, j * growth_rate:(j + 1) * growth_rate] for i in range(n_blocks - 1): tmp = self.dilated_dense_block_2( lst[i], growth_rate * (n_blocks - i - 1), num_layers, scope_name='layers/layer%s' % (i + 1)) update(tmp, i) out = F.concatenate(*lst, axis=1) return out[:, -growth_rate:]
def cnn(batch_size, vocab_size, text_len, classes, features=128, train=True): text = nn.Variable([batch_size, text_len]) with nn.parameter_scope("text_embed"): embed = PF.embed(text, n_inputs=vocab_size, n_features=features) print("embed", embed.shape) embed = F.reshape(embed, (batch_size, 1, text_len, features)) print("embed", embed.shape) combined = None for n in range(2, 6): # 2 - 5 gram with nn.parameter_scope(str(n) + "_gram"): with nn.parameter_scope("conv"): conv = PF.convolution(embed, 128, kernel=(n, features)) conv = F.relu(conv) with nn.parameter_scope("pool"): pool = F.max_pooling(conv, kernel=(conv.shape[2], 1)) if not combined: combined = F.identity(pool) else: combined = F.concatenate(combined, pool) if train: combined = F.dropout(combined, 0.5) with nn.parameter_scope("output"): y = PF.affine(combined, classes) t = nn.Variable([batch_size, 1]) _loss = F.softmax_cross_entropy(y, t) loss = F.reduce_mean(_loss) return text, y, loss, t