def ssd_separate_conf_pos_neg(_ssd_conf): # input # _ssd_conf : type=nn.Variable, shape=(batch_size, default boxes, pos num + neg num) # output # ssd_pos_conf : type=nn.Variable, shape=(batch_size, default boxes, pos num) # ssd_neg_conf : type=nn.Variable, shape=(batch_size, default boxes, neg num) ssd_pos_conf = F.slice( _ssd_conf, start=(0,0,0), stop=( _ssd_conf.shape[0], _ssd_conf.shape[1], _ssd_conf.shape[2] - 1 ), step=(1,1,1) ) ssd_neg_conf = F.slice( _ssd_conf, start=(0,0,_ssd_conf.shape[2] - 1), stop=( _ssd_conf.shape[0], _ssd_conf.shape[1], _ssd_conf.shape[2] ), step=(1,1,1) ) return ssd_pos_conf, ssd_neg_conf
def celu_backward(inputs, alpha=1.0, axis=1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] fstart, fstop, fstep = create_slice(dy.shape, axis, True) bstart, bstop, bstep = create_slice(dy.shape, axis, False) dy0 = F.slice(dy, fstart, fstop, fstep) dy1 = F.slice(dy, bstart, bstop, bstep) aep = alpha * F.exp(x0) aen = alpha * F.exp(-x0) m0 = F.greater_scalar(x0, 0) m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) dx00 = dy0 * (m0 + aep * m1) dx01 = dy1 * (m1 + aen * m0) dx = dx00 - dx01 return dx
def yolov2_activate(x, anchors, biases): shape = x.shape y = F.reshape(x, ( shape[0], anchors, -1, ) + shape[2:]) stop = list(y.shape) stop[2] = 2 t_xy = F.slice(y, (0, 0, 0, 0, 0), stop) stop[2] = 4 t_wh = F.slice(y, (0, 0, 2, 0, 0), stop) stop[2] = 5 t_o = F.slice(y, (0, 0, 4, 0, 0), stop) stop[2] = y.shape[2] t_p = F.slice(y, (0, 0, 5, 0, 0), stop) t_xy = F.sigmoid(t_xy) t_wh = F.exp(t_wh) t_o = F.sigmoid(t_o) t_p = F.softmax(t_p, axis=2) t_x, t_y, t_wh = yolov2_image_coordinate(t_xy, t_wh, biases) y = F.concatenate(t_x, t_y, t_wh, t_o, t_p, axis=2) y = F.transpose(y, (0, 1, 3, 4, 2)).reshape( (shape[0], -1, shape[1] / anchors)) return y
def lstm_cell(x, c, h): batch_size, units = c.shape _hidden = PF.affine(F.concatenate(x, h, axis=1), 4*units) a = F.tanh (F.slice(_hidden, start=(0, units*0), stop=(batch_size, units*1))) input_gate = F.sigmoid(F.slice(_hidden, start=(0, units*1), stop=(batch_size, units*2))) forgate_gate = F.sigmoid(F.slice(_hidden, start=(0, units*2), stop=(batch_size, units*3))) output_gate = F.sigmoid(F.slice(_hidden, start=(0, units*3), stop=(batch_size, units*4))) cell = input_gate * a + forgate_gate * c hidden = output_gate * F.tanh(cell) return cell, hidden
def jacobian(self, coordinates): new_coordinates = self.warp_coordinates(coordinates) new_coordinates_x = F.slice(new_coordinates, start=( 0, 0, 0), stop=new_coordinates.shape[:2] + (1,)) grad_x = nn.grad([F.sum(new_coordinates_x)], [coordinates]) new_coordinates_y = F.slice(new_coordinates, start=( 0, 0, 1), stop=new_coordinates.shape[:2] + (2,)) grad_y = nn.grad([F.sum(new_coordinates_y)], [coordinates]) gx = F.reshape(grad_x[0], grad_x[0].shape[:-1] + (1,) + grad_x[0].shape[-1:]) gy = F.reshape(grad_y[0], grad_y[0].shape[:-1] + (1,) + grad_y[0].shape[-1:]) jacobian = F.concatenate(gx, gy, axis=gy.ndim-2) return jacobian
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args start = self.forward_func.info.args["start"] stop = self.forward_func.info.args["stop"] step = self.forward_func.info.args["step"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: g_dx0_ = F.slice(g_dx0, start, stop, step) if accum[1]: g_dy += g_dx0_ else: g_dy.copy_from(g_dx0_)
def factorized_reduction(x, output_filter, scope, test): """ Applying spatial reduction to input variable. Input variable is passed to: Skip path 1, applied average pooling with stride 2. Skip path 2, first padded with 0 on the right and bottom, then shifted by 1 (so that those 0-padded sides will be added, whereas its shape is the same as the original), Then these 2 variables are concatenated along the depth dimension. """ with nn.parameter_scope(scope): path1 = F.average_pooling(x, (1, 1), (2, 2)) with nn.parameter_scope("path1_conv"): path1 = PF.convolution( path1, output_filter // 2, (1, 1), with_bias=False) path2 = F.pad(x, (0, 1, 0, 1), mode='constant') path2 = F.slice(path2, (0, 0, 1, 1)) path2 = F.average_pooling(path2, (1, 1), (2, 2)) with nn.parameter_scope("path2_conv"): path2 = PF.convolution( path2, output_filter // 2, (1, 1), with_bias=False) final_path = F.concatenate(path1, path2, axis=1) with nn.parameter_scope("reduction_bn"): final_path = PF.batch_normalization( final_path, batch_stat=not test) return final_path
def _in_projection_packed(q, k, v, w, b): if k is v: if q is k: # self-attention w = F.transpose(w, (1, 0)) to_ret = F.affine(q, w, b, base_axis=2) ind = -(-to_ret.size_from_axis(2) // 3) a, b, c = to_ret.shape return F.slice(to_ret, (0, 0, 0), (a, b, ind)), F.slice( to_ret, (0, 0, ind), (a, b, ind * 2)), F.slice(to_ret, (0, 0, ind * 2), (a, b, c)) else: # encoder-decoder attention raise NotImplementedError() else: raise NotImplementedError()
def factorized_reduction(x, output_filter, scope, test, is_search): """ Applying spatial reduction to input variable. """ assert output_filter % 2 == 0 x = F.relu(x) with nn.parameter_scope(scope): with nn.parameter_scope("conv_1"): conv_1 = PF.convolution(x, output_filter // 2, (1, 1), pad=None, stride=(2, 2), with_bias=False) conv_2 = F.pad(x, (0, 1, 0, 1), mode='constant') conv_2 = F.slice(conv_2, (0, 0, 1, 1)) with nn.parameter_scope("conv_2"): conv_2 = PF.convolution(conv_2, output_filter // 2, (1, 1), pad=None, stride=(2, 2), with_bias=False) final_conv = F.concatenate(conv_1, conv_2, axis=1) with nn.parameter_scope("reduction_bn"): final_conv = PF.batch_normalization(final_conv, batch_stat=not test, fix_parameters=is_search) return final_conv
def build_model(): x = nn.Variable((batch_size, sentence_length_source)) input_mask = F.sign( F.reshape(F.slice(x), (batch_size, sentence_length_source, 1))) y = nn.Variable((batch_size, sentence_length_target)) enc_input = time_distributed(PF.embed)(x, vocab_size_source, embedding_size, name='enc_embeddings') #*input_mask # -> (batch_size, sentence_length_source, embedding_size) dec_input = time_distributed(PF.embed)(y, vocab_size_target, embedding_size, name='dec_embeddings') # -> (batch_size, sentence_length_target, embedding_size) # encoder with nn.parameter_scope('encoder'): output, c, h = LSTMEncoder(enc_input, hidden, return_sequences=True, return_state=True) # -> (batch_size, sentence_length_source, hidden), (batch_size, hidden), (batch_size, hidden) # decoder output = LSTMAttentionDecoder(dec_input, output, initial_state=(c, h), return_sequences=True, name='decoder') # -> (batch_size, sentence_length_target, hidden) output = time_distributed(PF.affine)(output, vocab_size_target, name='output') # -> (batch_size, sentence_length_target, vocab_size_target) t = F.reshape(F.slice(y), (batch_size, sentence_length_target, 1)) entropy = time_distributed_softmax_cross_entropy(output, t) mask = F.sum(F.sign(t), axis=2) # do not predict 'pad'. count = F.sum(mask, axis=1) entropy *= mask loss = F.mean(F.sum(entropy, axis=1) / count) return x, y, loss
def vision_transformer(x, input_res, patch_size, v_width, v_layers, v_heads, embed_dim): scale = v_width**-0.5 with nn.parameter_scope("visual"): con1_w = nn.parameter.get_parameter_or_create(name="conv1/W", shape=(v_width, 3, patch_size, patch_size)) x = F.convolution( x, con1_w, bias=None, stride=(patch_size, patch_size)) # shape = [*, width, grid, grid] # shape = [*, width, grid ** 2] x = F.reshape(x, (x.shape[0], x.shape[1], -1)) x = F.transpose(x, (0, 2, 1)) # shape = [*, grid ** 2, width] z = np.zeros((x.shape[0], 1, x.shape[-1])) zeros = nn.Variable.from_numpy_array(z) class_embed = nn.parameter.get_parameter_or_create( name="class_embedding", shape=(v_width, )).reshape( (x.shape[0], 1, v_width)) # shape = [*, grid ** 2 + 1, width] x = F.concatenate(class_embed + zeros, x, axis=1) positional_embedding = nn.parameter.get_parameter_or_create( name='positional_embedding', shape=((input_res // patch_size)**2 + 1, v_width)).reshape( (x.shape[0], x.shape[1], v_width)) x = x + positional_embedding ln_pre_w = nn.parameter.get_parameter_or_create( name="ln_pre/W", shape=(v_width, )).reshape((1, 1, v_width)) ln_pre_b = nn.parameter.get_parameter_or_create( name="ln_pre/b", shape=(v_width, )).reshape((1, 1, v_width)) x = F.layer_normalization(x, ln_pre_b, ln_pre_w, batch_axis=(0, 1)) x = F.transpose(x, (1, 0, 2)) # NLD -> LND x = transformer(x, v_width, v_layers, v_heads) x = F.transpose(x, (1, 0, 2)) # LND -> NLD ln_post_w = nn.parameter.get_parameter_or_create( name="ln_post/W", shape=(v_width, )).reshape((1, 1, v_width)) ln_post_b = nn.parameter.get_parameter_or_create( name="ln_post/b", shape=(v_width, )).reshape((1, 1, v_width)) x = F.slice(x, stop=(x.shape[0], 1, x.shape[2])) x = F.layer_normalization(x, ln_post_b, ln_post_w) if 'proj' in nn.get_parameters(): visual_proj = nn.parameter.get_parameter_or_create( name="proj", shape=(v_width, embed_dim)).reshape( (1, v_width, -1)) x = F.batch_matmul(x, visual_proj) x = x.reshape((-1, embed_dim)) return x
def crop(tensor, target_times): shape = tensor.shape[2] diff = shape - target_times if diff == 0: return tensor crop_start = diff // 2 crop_end = diff - crop_start return F.slice(tensor, start=(0, 0, crop_start), stop=(tensor.shape[0], tensor.shape[1], shape - crop_end), step=(1, 1, 1))
def test_slice_arguments(indices): import nnabla.functions as F init, start, stop, step = indices x = nn.Variable(init) y = F.slice(x, start, stop, step) if step[0] > 0: z = x[:, :5] else: z = x[::-1, :-6:-1] assert y.parent.arguments == z.parent.arguments
def downsampling_block(x, i): with nn.parameter_scope(('ds_block-%2d' % i)): ds = af( conv(x, (num_initial_filters + num_initial_filters * i), (filter_size, ), (7, ), name='conv')) ds_slice = F.slice(ds, start=(0, 0, 0), stop=ds.shape, step=(1, 1, 2)) # Decimate by factor of 2 #ds_slice = F.average_pooling(ds, kernel=(1, 1,), stride=(1, 2,), pad=(0, 0,)) return ds, ds_slice
def network(self, x_in, name='LSTM', n_hidden=32): hlist = [] for x_i in F.split(x_in, axis=1): self._h, self._c = self._lstm_cell(name, n_hidden, x_i, self._h, self._c) with nn.parameter_scope(name + '_Affine_2'): self._h = PF.affine(self._h, (self._cols_size,)) hlist.append(self._h) h = F.stack(*hlist, axis=1) h = F.slice(h, start=[0, h.shape[1]-self._x_output_length, 0], stop=[self._batch_size, h.shape[1], self._cols_size], step=[1, 1, 1]) return h
def slice_data_grad_backward(inputs, start=None, stop=None, step=None): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] gdy = F.slice(gdx, start, stop, step) return gdy
def test_slice_forward_special(seed, inshape, start, stop, step, ctx, fname): x_data = np.random.rand(*inshape) # Numpy s = [slice(start[axis], stop[axis], step[axis]) for axis in range(len(start))] x_data_key = ref_slice(x_data, start, stop, step) # NNabla with nn.context_scope(ctx): x = nn.Variable.from_numpy_array(x_data) x_key = F.slice(x, start, stop, step) x_key.forward() assert_allclose(x_data_key, x_key.d)
def network(x, d1, c1, d2, c2, test=False): # Input:x -> 1 # OneHot -> 687 h = F.one_hot(x, (687, )) # LSTM1 -> 200 with nn.parameter_scope('LSTM1'): h = network_LSTM(h, d1, c1, 687, 100, test) # Slice -> 100 h1 = F.slice(h, (0, ), (100, ), (1, )) # h2:CellOut -> 100 h2 = F.slice(h, (100, ), (200, ), (1, )) # LSTM2 -> 128 with nn.parameter_scope('LSTM2'): h3 = network_LSTM(h1, d2, c2, 100, 64, test) # h4:DelayOut h4 = F.identity(h1) # Slice_2 -> 64 h5 = F.slice(h3, (0, ), (64, ), (1, )) # h6:CellOut_2 -> 64 h6 = F.slice(h3, (64, ), (128, ), (1, )) # Affine_2 -> 687 h7 = PF.affine(h5, (687, ), name='Affine_2') # h8:DelayOut_2 h8 = F.identity(h5) # h7:Softmax h7 = F.softmax(h7) return h2, h4, h6, h8, h7
def chunk(x, num_chunk, axis): """ Split `x` to `num_chunk` arrays along specified axis. """ shape = x.shape C = shape[axis] num_elems = (C + num_chunk - 1) // num_chunk ret = [] for i in range(num_chunk): start = [0 for _ in shape] stop = [s for s in shape] start[axis] = i * num_elems stop[axis] = start[axis] + num_elems segment = F.slice(x, start=start, stop=stop) assert len(segment.shape) == len(x.shape) ret.append(segment) return ret
def test_slice_forward_special_case(seed, inshape, start, stop, step, empty_case, ctx, func_name): if empty_case: pytest.skip("Empty-NdArray raises error as NNabla specification") x_data = np.random.rand(*inshape) # Numpy s = [ slice(start[axis], stop[axis], step[axis]) for axis in range(len(start)) ] x_data_key = ref_slice(x_data, start, stop, step) # NNabla with nn.context_scope(ctx): x = nn.Variable.from_numpy_array(x_data) x_key = F.slice(x, start, stop, step) x_key.forward() assert np.allclose(x_data_key, x_key.d)
def image_preprocess(image, img_size=224, data_size=320, test=False): h, w = image.shape[2:] image = image / 255.0 if test: _img_size = data_size * 0.875 # Ratio of size is 87.5% hs = (h - _img_size) / 2 ws = (w - _img_size) / 2 he = (h + _img_size) / 2 we = (w + _img_size) / 2 image = F.slice(image, (0, ws, hs), (3, we, he), (1, 1, 1)) image = F.image_augmentation(image, (3, img_size, img_size), min_scale=0.8, max_scale=0.8) else: size = min(h, w) min_size = img_size * 1.1 max_size = min_size * 2 min_scale = min_size / size max_scale = max_size / size image = F.image_augmentation(image, (3, img_size, img_size), pad=(0, 0), min_scale=min_scale, max_scale=max_scale, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True, flip_ud=False, brightness=0.0, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.0) image = image - 0.5 return image
def augment(batch, aug_list, p_aug=1.0): if isinstance(p_aug, float): p_aug = nn.Variable.from_numpy_array(p_aug * np.ones((1,))) if "flip" in aug_list: rnd = F.rand(shape=[batch.shape[0], ]) batch_aug = F.random_flip(batch, axes=(2, 3)) batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "lrflip" in aug_list: rnd = F.rand(shape=[batch.shape[0], ]) batch_aug = F.random_flip(batch, axes=(3,)) batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "translation" in aug_list and batch.shape[2] >= 8: rnd = F.rand(shape=[batch.shape[0], ]) # Currently nnabla does not support random_shift with border_mode="noise" mask = np.ones((1, 3, batch.shape[2], batch.shape[3])) mask[:, :, :, 0] = 0 mask[:, :, :, -1] = 0 mask[:, :, 0, :] = 0 mask[:, :, -1, :] = 0 batch_int = F.concatenate( batch, nn.Variable().from_numpy_array(mask), axis=0) batch_int_aug = F.random_shift(batch_int, shifts=( batch.shape[2]//8, batch.shape[3]//8), border_mode="nearest") batch_aug = F.slice(batch_int_aug, start=( 0, 0, 0, 0), stop=batch.shape) mask_var = F.slice(batch_int_aug, start=( batch.shape[0], 0, 0, 0), stop=batch_int_aug.shape) batch_aug = batch_aug * F.broadcast(mask_var, batch_aug.shape) batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "color" in aug_list: rnd = F.rand(shape=[batch.shape[0], ]) rnd_contrast = 1.0 + 0.5 * \ (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1] ) - 1.0) # from 0.5 to 1.5 rnd_brightness = 0.5 * \ (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1] ) - 1.0) # from -0.5 to 0.5 rnd_saturation = 2.0 * \ F.rand(shape=[batch.shape[0], 1, 1, 1]) # from 0.0 to 2.0 # Brightness batch_aug = batch + rnd_brightness # Saturation mean_s = F.mean(batch_aug, axis=1, keepdims=True) batch_aug = rnd_saturation * (batch_aug - mean_s) + mean_s # Contrast mean_c = F.mean(batch_aug, axis=(1, 2, 3), keepdims=True) batch_aug = rnd_contrast * (batch_aug - mean_c) + mean_c batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "cutout" in aug_list and batch.shape[2] >= 16: batch = F.random_erase(batch, prob=p_aug.d[0], replacements=(0.0, 0.0)) return batch
def decoder(target_action, target_action_type, target_node_type, target_parent_rule, target_parent_index, query_embed, query_embed_mask, rule_num, token_num, node_type_num, embedding_size, node_type_embedding_size, state_size, hidden_size, previous_action_embed=None, initial_state=None, initial_cell=None, hist=None, dropout=0.0, train=True): """ target_action: (batch_size, max_action_length, 3) target_action_type: (batch_size, max_action_length, 3) target_node_type: (batch_size, max_action_length) target_parent_rule: (batch_size, max_action_length) target_parent_index: (batch_size, max_action_length) """ batch_size, max_action_length, _ = target_action.shape # Node type ebedding with nn.parameter_scope("node_type_embedding"): target_node_type_embed = embedding(target_node_type, node_type_num, node_type_embedding_size, mask_zero=False, init=I.NormalInitializer(0.01)) # Previous action embedding ## (batch_size, max_action_length) target_apply_rule, target_gen_token, target_copy_token = split( target_action, axis=2) with nn.parameter_scope("rule_embedding"): # (batch_size, max_action_length, embedding_size) target_apply_rule_embed = embedding(target_apply_rule, rule_num, embedding_size, mask_zero=False, init=I.NormalInitializer(0.01)) target_apply_rule_embed = F.reshape( target_apply_rule_embed, (batch_size, max_action_length, 1, embedding_size)) with nn.parameter_scope("token_embedding"): # (batch_size, max_action_length, embedding_size) target_gen_token_embed = embedding(target_gen_token, token_num, embedding_size, mask_zero=False, init=I.NormalInitializer(0.01)) target_gen_token_embed = F.reshape( target_gen_token_embed, (batch_size, max_action_length, 1, embedding_size)) target_copy_token = F.reshape(target_copy_token, (batch_size, max_action_length, 1, 1)) target_copy_token = F.broadcast( target_copy_token, (batch_size, max_action_length, 1, embedding_size)) target_copy_token *= 0 # (batch_size, max_action_length, 3, embedding_size) target_action_embed = concatenate(target_apply_rule_embed, target_gen_token_embed, target_copy_token, axis=2) target_action_type2 = F.reshape(target_action_type, (batch_size, max_action_length, 3, 1)) target_action_type2 = F.broadcast( target_action_type2, (batch_size, max_action_length, 3, embedding_size)) # (batch_size, max_action_length, 3, embedding_size) target_action_embed = target_action_embed * target_action_type2 # (batch_size, max_action_length, embedding_size) target_action_embed = F.sum(target_action_embed, axis=2) # Shift action if previous_action_embed is None: previous_action_embed = nn.Variable((batch_size, 1, embedding_size), need_grad=False) previous_action_embed.data.zero() # (batch_size, max_action_length + 1, embedding_size) target_action_embed = concatenate(previous_action_embed, target_action_embed, axis=1) # (batch_size, max_action_length, embedding_size) target_action_embed = F.slice( target_action_embed, start=[0, 0, 0], stop=[batch_size, max_action_length, embedding_size]) # Parent action embedding parent_rule_mask = 1 - F.equal_scalar(target_parent_rule, 0) # (batch_size, max_action_length) parent_rule_mask = F.reshape(parent_rule_mask, (batch_size, max_action_length, 1)) parent_rule_mask = F.broadcast( parent_rule_mask, (batch_size, max_action_length, embedding_size)) with nn.parameter_scope("rule_embedding"): target_parent_rule_embed = embedding(target_parent_rule, rule_num, embedding_size, mask_zero=False) target_parent_rule_embed = parent_rule_mask * target_parent_rule_embed # (batch_size, max_action_length, embedding_size * 2 + node_type_embedding_size) decoder_input = concatenate(target_action_embed, target_node_type_embed, target_parent_rule_embed, axis=2) target_action_mask = 1 - F.equal_scalar(F.sum( target_action_type, axis=2), 0) # (batch_size, max_action_length) with nn.parameter_scope("decoder"): decoder_hidden_states, decoder_cells, ctx_vectors, new_hist = cond_att_lstm( decoder_input, target_parent_index, target_action_mask, query_embed, query_embed_mask, state_size, hidden_size, initial_state=initial_state, initial_cell=initial_cell, hist=hist, dropout=dropout, train=train) return target_action_embed, decoder_hidden_states, decoder_cells, ctx_vectors, target_action_mask, new_hist
def LSTMAttentionDecoder(inputs=None, encoder_output=None, initial_state=None, return_sequences=False, return_state=False, inference_params=None, name='lstm'): if inputs is None: assert inference_params is not None, 'if inputs is None, inference_params must not be None.' else: sentence_length = inputs.shape[1] assert type(initial_state) is tuple or type(initial_state) is list, \ 'initial_state must be a typle or a list.' assert len(initial_state) == 2, \ 'initial_state must have only two states.' c0, h0 = initial_state assert c0.shape == h0.shape, 'shapes of initial_state must be same.' batch_size, units = c0.shape cell = c0 hidden = h0 hs = [] if inference_params is None: xs = F.split(F.slice(inputs, stop=(batch_size, sentence_length - 1, units)), axis=1) pad = nn.Variable.from_numpy_array( np.array([w2i_source['pad']] * batch_size)) xs = [ PF.embed( pad, vocab_size_source, embedding_size, name='enc_embeddings') ] + list(xs) compute_context = GlobalAttention(encoder_output, 1024) for x in xs: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) context = compute_context(hidden) h_t = F.tanh( PF.affine(F.concatenate(context, hidden, axis=1), 1024, with_bias=False, name='Wc')) hs.append(h_t) else: assert batch_size == 1, 'batch size of inference mode must be 1.' embed_weight, output_weight, output_bias = inference_params pad = nn.Variable.from_numpy_array( np.array([w2i_source['pad']] * batch_size)) x = PF.embed(pad, vocab_size_source, embedding_size, name='enc_embeddings') compute_context = GlobalAttention(encoder_output, 1024) word_index = 0 ret = [] i = 0 while i2w_target[word_index] != '。' and i < 20: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) context = compute_context(hidden) h_t = F.tanh( PF.affine(F.concatenate(context, hidden, axis=1), 1024, with_bias=False, name='Wc')) output = F.affine(h_t, output_weight, bias=output_bias) word_index = np.argmax(output.d[0]) ret.append(word_index) x = nn.Variable.from_numpy_array( np.array([word_index], dtype=np.int32)) x = F.embed(x, embed_weight) i += 1 return ret if return_sequences: ret = F.stack(*hs, axis=1) else: ret = hs[-1] if return_state: return ret, cell, hidden else: return ret
def synthesis(self, w_mixed, constant_bc, seed=-1, noises_in=None): batch_size = w_mixed.shape[0] if noises_in is None: noise = F.randn(shape=(batch_size, 1, 4, 4), seed=seed) else: noise = noises_in[0] w = F.reshape(F.slice(w_mixed, start=(0, 0, 0), stop=(w_mixed.shape[0], 1, w_mixed.shape[2]), step=(1, 1, 1)), (w_mixed.shape[0], w_mixed.shape[2]), inplace=False) h = styled_conv_block(constant_bc, w, noise, res=self.resolutions[0], outmaps=self.feature_map_dim, namescope="Conv") torgb = styled_conv_block(h, w, noise=None, res=self.resolutions[0], outmaps=3, inmaps=self.feature_map_dim, kernel_size=1, pad_size=0, demodulate=False, namescope="ToRGB", act=F.identity) # initial feature maps outmaps = self.feature_map_dim inmaps = self.feature_map_dim downsize_index = 4 if self.resolutions[-1] in [512, 1024] else 3 # resolution 8 x 8 - 1024 x 1024 for i in range(1, len(self.resolutions)): i1 = (2 + i) * 2 - 5 i2 = (2 + i) * 2 - 4 i3 = (2 + i) * 2 - 3 w_ = F.reshape(F.slice(w_mixed, start=(0, i1, 0), stop=(w_mixed.shape[0], i1 + 1, w_mixed.shape[2]), step=(1, 1, 1)), w.shape, inplace=False) if i > downsize_index: outmaps = outmaps // 2 curr_shape = (batch_size, 1, self.resolutions[i], self.resolutions[i]) if noises_in is None: noise = F.randn(shape=curr_shape, seed=seed) else: noise = noises_in[2 * i - 1] h = styled_conv_block(h, w_, noise, res=self.resolutions[i], outmaps=outmaps, inmaps=inmaps, kernel_size=3, up=True, namescope="Conv0_up") w_ = F.reshape(F.slice(w_mixed, start=(0, i2, 0), stop=(w_mixed.shape[0], i2 + 1, w_mixed.shape[2]), step=(1, 1, 1)), w.shape, inplace=False) if i > downsize_index: inmaps = inmaps // 2 if noises_in is None: noise = F.randn(shape=curr_shape, seed=seed) else: noise = noises_in[2 * i] h = styled_conv_block(h, w_, noise, res=self.resolutions[i], outmaps=outmaps, inmaps=inmaps, kernel_size=3, pad_size=1, namescope="Conv1") w_ = F.reshape(F.slice(w_mixed, start=(0, i3, 0), stop=(w_mixed.shape[0], i3 + 1, w_mixed.shape[2]), step=(1, 1, 1)), w.shape, inplace=False) curr_torgb = styled_conv_block(h, w_, noise=None, res=self.resolutions[i], outmaps=3, inmaps=inmaps, kernel_size=1, pad_size=0, demodulate=False, namescope="ToRGB", act=F.identity) torgb = F.add2(curr_torgb, upsample_2d(torgb, k=[1, 3, 3, 1])) return torgb
def LSTMDecoder(inputs=None, initial_state=None, return_sequences=False, return_state=False, inference_params=None, name='lstm'): if inputs is None: assert inference_params is not None, 'if inputs is None, inference_params must not be None.' else: sentence_length = inputs.shape[1] assert type(initial_state) is tuple or type(initial_state) is list, \ 'initial_state must be a typle or a list.' assert len(initial_state) == 2, \ 'initial_state must have only two states.' c0, h0 = initial_state assert c0.shape == h0.shape, 'shapes of initial_state must be same.' batch_size, units = c0.shape cell = c0 hidden = h0 hs = [] if inference_params is None: xs = F.split(F.slice(inputs, stop=(batch_size, sentence_length - 1, units)), axis=1) xs = [nn.Variable.from_numpy_array(np.ones(xs[0].shape))] + list(xs) for x in xs: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) hs.append(hidden) else: assert batch_size == 1, 'batch size of inference mode must be 1.' embed_weight, output_weight, output_bias = inference_params x = nn.Variable.from_numpy_array(np.ones((1, embed_weight.shape[1]))) word_index = 0 ret = [] i = 0 while i2w_target[word_index] != period and i < 20: with nn.parameter_scope(name): cell, hidden = lstm_cell(x, cell, hidden) output = F.affine(hidden, output_weight, bias=output_bias) word_index = np.argmax(output.d[0]) ret.append(word_index) x = nn.Variable.from_numpy_array( np.array([word_index], dtype=np.int32)) x = F.embed(x, embed_weight) i += 1 return ret if return_sequences: ret = F.stack(*hs, axis=1) else: ret = hs[-1] if return_state: return ret, cell, hidden else: return ret
def ssd_loss(_ssd_confs, _ssd_locs, _label, _alpha=1): # input # _ssd_confs : type=nn.Variable, prediction of class. shape=(batch_size, default boxes, class num + 1) # _ssd_locs : type=nn.Variable, prediction of location. shape=(batch_size, default boxes, 4) # _label : type=nn.Variable, shape=(batch_size, default boxes, class num + 1 + 4) # _alpha : type=float, hyperparameter. this is weight of loc_loss. # output # loss : type=nn.Variable def smooth_L1(__pred_locs, __label_locs): # input # __pred_locs : type=nn.Variable, # __label_locs : type=nn.Variable, # output # _loss : type=nn.Variable, loss of location. return F.mul_scalar(F.huber_loss(__pred_locs, __label_locs), 0.5) # _label_conf : type=nn.Variable, label of class. shape=(batch_size, default boxes, class num + 1) (after one_hot) # _label_loc : type=nn.Variable, label of location. shape=(batch_size, default boxes, 4) label_conf = F.slice( _label, start=(0,0,4), stop=_label.shape, step=(1,1,1) ) label_loc = F.slice( _label, start=(0,0,0), stop=(_label.shape[0], _label.shape[1], 4), step=(1,1,1) ) # conf ssd_pos_conf, ssd_neg_conf = ssd_separate_conf_pos_neg(_ssd_confs) label_conf_pos, _ = ssd_separate_conf_pos_neg(label_conf) # pos pos_loss = F.sum( F.mul2( F.softmax(ssd_pos_conf, axis=2), label_conf_pos ) , axis=2 ) # neg neg_loss = F.sum(F.log(ssd_neg_conf), axis=2) conf_loss = F.sum(F.sub2(pos_loss, neg_loss), axis=1) # loc pos_label = F.sum(label_conf_pos, axis=2) # =1 (if there is sonething), =0 (if there is nothing) loc_loss = F.sum(F.mul2(F.sum(smooth_L1(_ssd_locs, label_loc), axis=2), pos_label), axis=1) # [2019/07/18] label_match_default_box_num = F.slice( _label, start=(0,0,_label.shape[2] - 1), stop=_label.shape, step=(1,1,1) ) label_match_default_box_num = F.sum(label_match_default_box_num, axis=1) label_match_default_box_num = F.r_sub_scalar(label_match_default_box_num, _label.shape[1]) label_match_default_box_num = F.reshape(label_match_default_box_num, (label_match_default_box_num.shape[0],), inplace=False) # label_match_default_box_num : type=nn.Variable, inverse number of default boxes that matches with pos. # loss loss = F.mul2(F.add2(conf_loss, F.mul_scalar(loc_loss, _alpha)), label_match_default_box_num) loss = F.mean(loss) return loss