def normalize(x, resize_size): mean_std = MeanStd(resize_size, x.shape[0]) ch_mean = mean_std.ch_mean ch_std = mean_std.ch_std x = F.sub2(x, ch_mean) x = F.div2(x, ch_std) return x
def f_layer_normalization(inp, beta, gamma): use_axis = [x for x in range(1, inp.ndim)] inp = F.sub2(inp, F.mean(inp, axis=use_axis, keepdims=True)) inp = F.div2( inp, F.pow_scalar( F.mean(F.pow_scalar(inp, 2), axis=use_axis, keepdims=True), 0.5)) return inp * F.broadcast(gamma, inp.shape) + F.broadcast(beta, inp.shape)
def minibatch_stddev(x, eps=1e-8): b, _, h, w = x.shape mean = F.mean(x, axis=0, keepdims=True) std = F.pow_scalar( F.mean(F.pow_scalar(F.sub2(x, F.broadcast(mean, x.shape)), 2.), axis=0, keepdims=True) + eps, 0.5) std_chanel = F.broadcast(F.mean(std, keepdims=True), (b, 1, h, w)) x = F.concatenate(x, std_chanel, axis=1) return x
def __call__(self, gen_rgb_out): out = conv_layer(gen_rgb_out, inmaps=3, outmaps=self.channels[0], kernel_size=1, name_scope='Discriminator/Convinitial') inmaps = self.channels[0] for i in range(1, len(self.resolutions)): res = out.shape[2] outmaps = self.channels[i] out = res_block(out, res=res, outmaps=outmaps, inmaps=inmaps) inmaps = outmaps N, C, H, W = out.shape group = min(N, self.stddev_group) stddev_mean = F.reshape( out, (group, -1, self.stddev_feat, C // self.stddev_feat, H, W), inplace=False) # mean = F.mean(stddev_mean, axis=0, keepdims=True) mean = F.mul_scalar(F.sum(stddev_mean, axis=0, keepdims=True), 1.0/stddev_mean.shape[0], inplace=False) stddev_mean = F.mean(F.pow_scalar(F.sub2(stddev_mean, F.broadcast( mean, stddev_mean.shape)), 2.), axis=0, keepdims=False) stddev_mean = F.pow_scalar(F.add_scalar( stddev_mean, 1e-8, inplace=False), 0.5, inplace=False) stddev_mean = F.mean(stddev_mean, axis=[2, 3, 4], keepdims=True) stddev_mean = F.reshape( stddev_mean, stddev_mean.shape[:2]+stddev_mean.shape[3:], inplace=False) out = F.concatenate(out, F.tile(stddev_mean, (group, 1, H, W)), axis=1) out = conv_layer(out, inmaps=out.shape[1], outmaps=self.channels[-1], kernel_size=3, name_scope='Discriminator/Convfinal') out = F.reshape(out, (N, -1), inplace=False) # Linear Layers lrmul = 1 scale = 1/(out.shape[1]**0.5)*lrmul W, bias = weight_init_fn( (out.shape[-1], self.channels[-1]), weight_var='Discriminator/final_linear_1/affine') out = F.affine(out, W*scale, bias*lrmul) out = F.mul_scalar(F.leaky_relu( out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False) scale = 1/(out.shape[1]**0.5)*lrmul W, bias = weight_init_fn( (out.shape[-1], 1), weight_var='Discriminator/final_linear_2/affine') out = F.affine(out, W*scale, bias*lrmul) return out
def propagate(h, edges, state_size=None, w_initializer=None, u_initializer1=None, u_initializer2=None, bias_initializer=None, edge_initializers=None): """ Propagate vertex representations Arguments: h -- the input vertex representations (nnabla.Variable with shape (|V|, D)) edges -- the dictionary that represents the graph edge ({label, [in, out]}) state_size -- (optional) the size of hidden state (h.shape[1] is used if this argument is None) w_initializer -- (optional) u_initializer1 -- (optional) u_initializer2 -- (optional) bias_initializer -- (optional) edge_initializers -- (optional) Return value - Return a variable with shape (|V|, D) """ if state_size is None: state_size = h.shape[1] h_size = h.shape[1] with nn.parameter_scope("activate"): a = activate(h, edges, state_size, bias_initializer=bias_initializer, edge_initializers=edge_initializers) with nn.parameter_scope("W_zr"): ws = PF.affine(a, (3, h_size), with_bias=False, w_init=w_initializer) (z1, r1, h_hat1) = split(ws, axis=1) with nn.parameter_scope("U_zr"): us = PF.affine(h, (2, state_size), with_bias=False, w_init=u_initializer1) (z2, r2) = split(us, axis=1) z = F.sigmoid(F.add2(z1, z2)) r = F.sigmoid(F.add2(r1, r2)) with nn.parameter_scope("U"): h_hat2 = PF.affine(F.mul2(r, h), state_size, with_bias=False, w_init=u_initializer2) h_hat = F.tanh(F.add2(h_hat1, h_hat2)) return F.add2(F.sub2(h, F.mul2(z, h)), F.mul2(z, h_hat))
def __call__(self, x): with nn.parameter_scope("vgg19"): results = list() input = deprocess(x) x_in = F.sub2(input * 255.0, self.mean) features = get_vgg_feat(x_in) for i in range(len(features)): orig_deep_feature = features[i] orig_len = (F.sum( (orig_deep_feature**2), axis=[3], keepdims=True) + 1e-12)**0.5 results.append(orig_deep_feature / orig_len) return results
def __rsub__(self, other): """ Element-wise subtraction. Part of the implementation of the subtraction operator. Args: other (float or ~nnabla.Variable): Internally calling :func:`~nnabla.functions.sub2` or :func:`~nnabla.functions.r_sub_scalar` according to the type. Returns: :class:`nnabla.Variable` """ import nnabla.functions as F if isinstance(other, Variable): return F.sub2(other, self) return F.r_sub_scalar(self, other)
def __sub__(self, other): """ Element-wise subtraction. Implements the subtraction operator expression ``A - B``, together with :func:`~nnabla.variable.__rsub__` . When a scalar is specified for ``other``, this function performs an element-wise operation for all elements in ``self``. Args: other (float or ~nnabla.Variable): Internally calling :func:`~nnabla.functions.sub2` or :func:`~nnabla.functions.add_scalar` according to the type. Returns: :class:`nnabla.Variable` """ import nnabla.functions as F if isinstance(other, Variable): return F.sub2(self, other) return F.add_scalar(self, -other)
def __call__(self, x): with nn.parameter_scope("VGG19"): self.x = F.div2(F.sub2(x, self.mean), self.std) return vgg_prediction(self.x, finetune=True)
def ssd_loss(_ssd_confs, _ssd_locs, _label, _alpha=1): # input # _ssd_confs : type=nn.Variable, prediction of class. shape=(batch_size, default boxes, class num + 1) # _ssd_locs : type=nn.Variable, prediction of location. shape=(batch_size, default boxes, 4) # _label : type=nn.Variable, shape=(batch_size, default boxes, class num + 1 + 4) # _alpha : type=float, hyperparameter. this is weight of loc_loss. # output # loss : type=nn.Variable def smooth_L1(__pred_locs, __label_locs): # input # __pred_locs : type=nn.Variable, # __label_locs : type=nn.Variable, # output # _loss : type=nn.Variable, loss of location. return F.mul_scalar(F.huber_loss(__pred_locs, __label_locs), 0.5) # _label_conf : type=nn.Variable, label of class. shape=(batch_size, default boxes, class num + 1) (after one_hot) # _label_loc : type=nn.Variable, label of location. shape=(batch_size, default boxes, 4) label_conf = F.slice( _label, start=(0,0,4), stop=_label.shape, step=(1,1,1) ) label_loc = F.slice( _label, start=(0,0,0), stop=(_label.shape[0], _label.shape[1], 4), step=(1,1,1) ) # conf ssd_pos_conf, ssd_neg_conf = ssd_separate_conf_pos_neg(_ssd_confs) label_conf_pos, _ = ssd_separate_conf_pos_neg(label_conf) # pos pos_loss = F.sum( F.mul2( F.softmax(ssd_pos_conf, axis=2), label_conf_pos ) , axis=2 ) # neg neg_loss = F.sum(F.log(ssd_neg_conf), axis=2) conf_loss = F.sum(F.sub2(pos_loss, neg_loss), axis=1) # loc pos_label = F.sum(label_conf_pos, axis=2) # =1 (if there is sonething), =0 (if there is nothing) loc_loss = F.sum(F.mul2(F.sum(smooth_L1(_ssd_locs, label_loc), axis=2), pos_label), axis=1) # [2019/07/18] label_match_default_box_num = F.slice( _label, start=(0,0,_label.shape[2] - 1), stop=_label.shape, step=(1,1,1) ) label_match_default_box_num = F.sum(label_match_default_box_num, axis=1) label_match_default_box_num = F.r_sub_scalar(label_match_default_box_num, _label.shape[1]) label_match_default_box_num = F.reshape(label_match_default_box_num, (label_match_default_box_num.shape[0],), inplace=False) # label_match_default_box_num : type=nn.Variable, inverse number of default boxes that matches with pos. # loss loss = F.mul2(F.add2(conf_loss, F.mul_scalar(loc_loss, _alpha)), label_match_default_box_num) loss = F.mean(loss) return loss
def __call__(self, gen_rgb_out, patch_switch=False, index=0): out = conv_layer(gen_rgb_out, inmaps=3, outmaps=self.channels[0], kernel_size=1, name_scope='Discriminator/Convinitial') inmaps = self.channels[0] out_list = [out] for i in range(1, len(self.resolutions)): res = out.shape[2] outmaps = self.channels[i] out = res_block(out, res=res, outmaps=outmaps, inmaps=inmaps) inmaps = outmaps out_list.append(out) if patch_switch: GV_class = GetVariablesOnGraph(out) GF_class = GetFunctionFromInput(out, func_type_list=['LeakyReLU']) feature_dict = OrderedDict() for key in GV_class.coef_dict_on_graph: if ('res_block' in key and '/W' in key) and ('Conv1' in key or 'Conv2' in key): feature_var = GF_class.functions[key][0].outputs[ 0].function_references[0].outputs[0] if feature_var.shape[2:] in ((32, 32), (16, 16)): feature_dict[key] = GF_class.functions[key][0].outputs[ 0].function_references[0].outputs[0] N, C, H, W = out.shape group = min(N, self.stddev_group) stddev_mean = F.reshape( out, (group, -1, self.stddev_feat, C // self.stddev_feat, H, W), inplace=False) mean = F.mul_scalar(F.sum(stddev_mean, axis=0, keepdims=True), 1.0 / stddev_mean.shape[0], inplace=False) stddev_mean = F.mean(F.pow_scalar( F.sub2(stddev_mean, F.broadcast(mean, stddev_mean.shape)), 2.), axis=0, keepdims=False) stddev_mean = F.pow_scalar(F.add_scalar(stddev_mean, 1e-8, inplace=False), 0.5, inplace=False) stddev_mean = F.mean(stddev_mean, axis=[2, 3, 4], keepdims=True) stddev_mean = F.reshape(stddev_mean, stddev_mean.shape[:2] + stddev_mean.shape[3:], inplace=False) out = F.concatenate(out, F.tile(stddev_mean, (group, 1, H, W)), axis=1) out = conv_layer(out, inmaps=out.shape[1], outmaps=self.channels[-1], kernel_size=3, name_scope='Discriminator/Convfinal') out = F.reshape(out, (N, -1), inplace=False) # Linear Layers lrmul = 1 scale = 1 / (out.shape[1]**0.5) * lrmul W, bias = weight_init_fn( (out.shape[-1], self.channels[-1]), weight_var='Discriminator/final_linear_1/affine') out = F.affine(out, W * scale, bias * lrmul) out = F.mul_scalar(F.leaky_relu(out, alpha=0.2, inplace=False), np.sqrt(2), inplace=False) scale = 1 / (out.shape[1]**0.5) * lrmul W, bias = weight_init_fn( (out.shape[-1], 1), weight_var='Discriminator/final_linear_2/affine') out = F.affine(out, W * scale, bias * lrmul) if patch_switch: return out, list(feature_dict.values())[index] else: return out
def __call__(self, input): out = F.mul_scalar(input, self._scale) out = F.sub2(out, self._mean) out = F.div2(out, self._std) return out
def train(): parser = argparse.ArgumentParser() parser.add_argument("--num-train-examples", type=int, default=1600) parser.add_argument("--num-valid-examples", type=int, default=100) parser.add_argument("--accum-grad", type=int, default=32) parser.add_argument("--max-iter", type=int, default=6400) parser.add_argument("--valid-interval", type=int, default=100) parser.add_argument("--context", type=str, default="cpu") parser.add_argument("--device-id", type=int, default=0) args = parser.parse_args() from nnabla.ext_utils import get_extension_context extension_module = args.context ctx = get_extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # prepare dataset tdataset = [] for i in range(args.num_train_examples): V, E = random_graph(rng) deg = degrees(V, E) tdataset.append(([V], [utils.from_adjacency_list(E)], [deg])) vdataset = [] for i in range(args.num_valid_examples): V, E = random_graph(rng) deg = degrees(V, E) vdataset.append(([V], [utils.from_adjacency_list(E)], [deg])) # prepare data iterator tdata = data_iterator(SimpleDataSource2(tdataset, shuffle=True), 1, False, False, False) vdata = data_iterator(SimpleDataSource2(vdataset, shuffle=False), 1, False, False, False) # prepare monitors monitor = M.Monitor("./degree") tloss = M.MonitorSeries("Training Loss", monitor, interval=10) verror = M.MonitorSeries("Validation Error", monitor, interval=10) # prepare solver solver = S.Adam() # training loop for i in range(args.max_iter): l = 0 for b in range(args.accum_grad): # read data V, E, degree = tdata.next() V = V[0][0] E = E[0][0] degree = degree[0][0] # predict output = predict(V, E) # initialize solver if i == 0 and b == 0: solver.set_parameters(nn.get_parameters()) # calculate loss label = nn.Variable(degree.shape) label.data.data = degree label = F.reshape(label, (len(V), 1)) loss = F.mean(F.squared_error(output, label)) # training loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) l += loss.data.data solver.update() tloss.add(i, l / args.accum_grad) l = 0 if i % args.valid_interval == 0: # validation # read data e = 0 n = 0 for b in range(vdata.size): V, E, degree = vdata.next() V = V[0][0] E = E[0][0] degree = degree[0][0] output = predict(V, E) label = nn.Variable(degree.shape) label.data.data = degree label = F.reshape(label, (len(V), 1)) error = F.sum(F.less_scalar(F.abs(F.sub2(output, label)), 0.5)) error.forward() e += error.data.data n += len(V) verror.add(i, e / n)