def test_dense_fc_layer(): batch_size = 3 fan_in = 4 fan_out = 2 epsilon = 1e-6 # ---------- forward layer = DenseLayer(name='test', shape=[fan_in, fan_out], l2reg=0.01) X = np.random.randn(batch_size, fan_in) y = layer.forward(X) assert y.shape == (batch_size, fan_out) # ---------- backward # 最终的loss选择用np.sum,从而prev_grads是全1矩阵,得到的derived_grads就是本层自身的gradients dX = layer.backward(prev_grads=np.ones((batch_size, fan_out))) # ---------- test grads on W var_grads = [('W', layer._W, layer._dW), ('b', layer._b, layer._db), ('input', X, dX)] for name, variable, grad in var_grads: print("\n************* checking numerical gradients on '{}', ......". format(name)) numeric_grad = calc_numeric_grads( variable=variable, epsilon=epsilon, loss_fn=lambda: np.sum(layer.forward(X)) + layer.l2reg_loss) print("========== derived gradients = \n{}".format(grad)) print("========== numeric gradients = \n{}".format(numeric_grad)) is_equal = np.allclose(grad, numeric_grad) assert is_equal print("Equal = {}".format(is_equal))
def __init__(self, opt): super(Discriminator, self).__init__() self.opt = opt self.dir_name = 'discriminator' self.resolution = opt.resolution self.resolution_log2 = int(np.log2(self.resolution)) self.mbstd_group_size = 4 self.mbstd_num_features = 1 self.d_block_dict = nn.ModuleDict() for res in range(self.resolution_log2, 2, -1): if res == self.resolution_log2: self.fromrgb = FromRGB(res) self.d_block_dict[str(res)] = Block(res) self.minibatch_stddev_layer = MiniBatchStddevLayer(self.mbstd_group_size, self.mbstd_num_features) # x: [32, 513, 4, 4] in_feature_map = self.cliped_features(1) + 1 self.conv2d_layer = Conv2dLayer(in_feature_map=in_feature_map, out_feature_map=self.cliped_features(1), kernel=3, padding=1) self.fused_bias_act = FusedBiasActivation(channel=self.cliped_features(1), act='LeakyRelu') in_channel = self.cliped_features(0) * 4**2 self.dense_layer1 = DenseLayer(in_channel=in_channel, feature_map=self.cliped_features(0)) self.fused_bias_act1 = FusedBiasActivation(channel=self.cliped_features(0), act='LeakyRelu') self.dense_layer2 = DenseLayer(in_channel=self.cliped_features(0), feature_map=1) self.fused_bias_act2 = FusedBiasActivation(channel=1)
def __init__(self, dense_fields, vocab_infos, embed_fields, hidden_units, L2, optimizer): """ :param dense_fields: a list of tuple (field_name, field's input-dim) :param vocab_infos: a list of tuple, each tuple is (vocab_name, vocab_size, embed_size) :param embed_fields: a list of tuple (field_name, vocab_name) :param hidden_units: a list of ints, n_units for each hidden layer :param L2: L2 regularization for hidden dense layer :param optimizer: optimizer instance to update the weights """ self._optimizer = optimizer # ***************** dense input layer self._dense_combine_layer = DenseInputCombineLayer(dense_fields) # ***************** embedding layers self._embed_combine_layer = EmbeddingCombineLayer(vocab_infos) for field_name, vocab_name in embed_fields: self._embed_combine_layer.add_embedding(vocab_name=vocab_name, field_name=field_name) self._optimize_layers = [self._embed_combine_layer] # ***************** MLP prev_out_dim = self._dense_combine_layer.output_dim + self._embed_combine_layer.output_dim self._hidden_layers = [] for layer_idx, n_units in enumerate(hidden_units, start=1): # ----------- add hidden layer hidden_layer = DenseLayer(name="hidden{}".format(layer_idx), shape=[prev_out_dim, n_units], l2reg=L2) self._hidden_layers.append(hidden_layer) self._optimize_layers.append(hidden_layer) logging.info("{}-th hidden layer, weight shape={}".format( layer_idx, hidden_layer.shape)) # ----------- add activation layer self._hidden_layers.append(ReLU()) # ----------- update previous dimension prev_out_dim = n_units # final logit layer final_logit_layer = DenseLayer(name="final_logit", shape=[prev_out_dim, 1], l2reg=L2) logging.info("final logit layer, weight shape={}".format( final_logit_layer.shape)) self._hidden_layers.append(final_logit_layer) self._optimize_layers.append(final_logit_layer)
def __init__(self, x_channel, feature_map, style_in_dim, style_out_dim, kernel=3, padding=0, is_demodulate=True, is_up=False): super(ModulateConv, self).__init__() self.feature_map = feature_map self.padding = padding self.stride = 1 self.is_demodulate = is_demodulate self.is_up = is_up # out_c, in_c, kh, kw w, runtime_coef = self.get_weight_and_runtime_coef( shape=[self.feature_map, x_channel, kernel, kernel], gain=1, use_wscale=True, lrmul=1) self.weight = nn.Parameter(w) self.runtime_coef = runtime_coef # self.dense_layer = DenseLayer(in_channel=style_in_dim, feature_map=style_out_dim) self.dense_layer = DenseLayer(in_channel=style_in_dim, feature_map=x_channel) # self.fused_bias_act = FusedBiasActivation(channel=style_out_dim, act='Linear') self.fused_bias_act = FusedBiasActivation(channel=x_channel, act='Linear') if self.is_up: self.upsample_conv_2d = UpSampleConv2d()
def __init__(self, dlaten_size, opt): super(MappingNetwork, self).__init__() self.mapping_layers = 8 self.out_feature = 512 resolution_log2 = int(np.log2(opt.resolution)) self.num_layers = resolution_log2 * 2 - 2 self.dense_layers = nn.ModuleDict() self.fused_bias_acts = nn.ModuleDict() for layer_idx in range(self.mapping_layers): self.dense_layers[str(layer_idx)] = DenseLayer(dlaten_size, self.out_feature, lmul=0.01) self.fused_bias_acts[str(layer_idx)] = FusedBiasActivation(dlaten_size, lrmul=0.01, act='LeakyRelu')
def model_fn(data_in): seq_in = data_in['sequence'] targets = data_in['target'] seq_in_one_hot = tf.one_hot(seq_in, depth=vocab_size) #build the dnc cell cell = DNCCell(StatelessCell("linear", features=8), memory_size=8, word_size=8, num_reads=1, num_writes=1) output, _, final_loop_state = dynamic_rnn( seq_in_one_hot, cell, visualization.loop_state_fn, visualization.initial_loop_state()) logits = DenseLayer("logits", vocab_size, activation=None)(output) preds = tf.argmax(tf.nn.softmax(logits), -1) #return dictionary of tensors to keep track of args_dict = {} with tf.variable_scope('Loss'): cross_entorpy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets) #no loss at end of sequences in batch (Padding) # loss_weights_t = tf.clip_by_value(tf.cast(targets, tf.float32), 0.0, 1.0) # cross_entorpy = cross_entorpy * loss_weights_t loss = tf.reduce_mean(cross_entorpy) args_dict['loss'] = loss with tf.variable_scope('train'): o = tf.train.RMSPropOptimizer(learn_rate, momentum) gvs = o.compute_gradients(loss, var_list=tf.trainable_variables()) '''clip gradients''' gradients, variables = zip(*gvs) gradients, _ = tf.clip_by_global_norm(gradients, norm_clip) capped_gvs = zip(gradients, variables) args_dict['optimizer'] = o.apply_gradients(capped_gvs) #track loss average every 100 steps args_dict['avg_loss'] = average_tracking.average_tracker(loss) #track loop state in tensorboard args_dict['mem_view'] = visualization.assemble_mem_view( final_loop_state, [seq_in, preds, targets], vocab_size) return args_dict
def __init__(self, layer_types, layer_shapes, layers=None, cost_func=QuadCost): self.layer_types = layer_types self.layer_shapes = layer_shapes self.num_layers = len(layer_types) self.cost_func = cost_func if layers is not None: self.layers = layers else: self.layers = [] for lt, ls in zip(layer_types, layer_shapes): if lt == "conv": self.layers.append(ConvLayer(image_shape=ls[0], kernel_shape=ls[1])) elif lt == "dense": self.layers.append(DenseLayer(layer_shape=ls[0]))
def __init__(self, layer_types, layer_shapes, conv_layer_types=None, layers=None): self.layer_types = layer_types self.layer_shapes = layer_shapes self.num_genes = 0 self.conv_layer_types = conv_layer_types if layers is not None: self.layers = layers for typ, shpe in zip(layer_types, layer_shapes): if typ == "conv": self.num_genes += shpe[1][0] elif typ == "dense": self.num_genes += shpe[0][0] elif typ == "soft": self.num_genes += shpe[0][0] else: self.layers = [] cntr = 0 n_conv_layer_types = -1 if conv_layer_types is not None: n_conv_layer_types = len(conv_layer_types) for typ, shpe in zip(layer_types, layer_shapes): if typ == "conv": if cntr <= n_conv_layer_types: self.layers.append(ConvLayer(image_shape=shpe[0], filter_shape=shpe[1], filter_method=conv_layer_types[cntr][0], zero_padding=conv_layer_types[cntr][1])) cntr += 1 else: self.layers.append(ConvLayer(image_shape=shpe[0], filter_shape=shpe[1])) self.num_genes += shpe[1][0] elif typ == "dense": self.layers.append(DenseLayer(layer_shape=shpe[0])) self.num_genes += shpe[0][0] elif typ == "soft": self.layers.append(SoftmaxLayer(layer_shape=shpe[0])) self.num_genes += shpe[0][0]
def __init__(self, memory_size=128, word_size=20, num_reads=1, num_writes=1, name='memory_access'): """ constructs a memory matrix with read heads and write heads as described in the DNC paper http://www.nature.com/nature/journal/vaop/ncurrent/full/nature20101.html Args: memory_size: The number of memory slots (N in the DNC paper). word_size: The width of each memory slot (W in the DNC paper) num_reads: The number of read heads (R in the DNC paper). num_writes: The number of write heads (fixed at 1 in the paper). name: The name of the module. """ self._memory_size = memory_size self._word_size = word_size self._num_reads = num_reads self._num_writes = num_writes self._linkage = TemporalLinkage(memory_size, num_writes) self._freeness = Freeness(memory_size) num_read_modes = 1 + 2 * num_writes self.interface_size = ( (num_writes * word_size) * 3) + (num_reads * num_read_modes) + ( num_reads * word_size) + (num_writes * 3) + (num_reads * 2) # flatten channel should be 1.... self.interface_linear = DenseLayer('write_vectors', self.interface_size, activation=None, use_bias=True)
def crossover(father, mother, alpha=0.5): layers = [] for lt, ls, fl, ml in zip(father.get_layer_types(), father.get_layer_shapes(), father.get_layers(), mother.get_layers()): if lt == "conv": lyr = ConvLayer(image_shape=ls[0], filter_shape=ls[1]) # Loop for each filter for i in range(ls[1][0]): parent = ml # Randomly pick either mother or father gene based on alpha if random.random() < alpha: parent = fl lyr.set_filter(index=i, filtr=deepcopy(parent.get_filter(i))) layers.append(lyr) elif lt == "dense" or lt == "soft": weights = [] biases = [] lyr = DenseLayer(layer_shape=ls[0]) if lt == "soft": lyr = SoftmaxLayer(layer_shape=ls[0]) # Loop for each neuron for i in range(ls[0][0]): parent = ml # Randomly pick either mother or father gene based on alpha if random.random() < alpha: parent = fl weights.append(deepcopy(parent.get_weights(i))) biases.append(deepcopy(parent.get_biases(i))) lyr.set_weights_biases(weights, biases) layers.append(lyr) child = Individual(deepcopy(father.get_layer_types()), deepcopy(father.get_layer_shapes()), deepcopy(father.get_conv_layer_types()), layers) child = mutate_individual(child) return child
def __init__(self, n_channel=3, n_classes=2, image_size=64): self.images = tf.placeholder( dtype=tf.float32, shape=[None, image_size, image_size, n_channel], name='images') self.labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels') self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.global_step = tf.Variable(0, dtype=tf.int32, name='global_step') #cnn网络 conv_layer1 = ConvLayer( input_shape=(None, image_size, image_size, n_channel), n_size=3, #卷积核3*3 n_filter=64, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv1') pool_layer1 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool1') conv_layer2 = ConvLayer(input_shape=(None, int(image_size / 2), int(image_size / 2), 64), n_size=3, n_filter=128, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv2') pool_layer2 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool2') conv_layer3 = ConvLayer(input_shape=(None, int(image_size / 4), int(image_size / 4), 128), n_size=3, n_filter=256, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv3') pool_layer3 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool3') dense_layer1 = DenseLayer(input_shape=(None, int(image_size / 8) * int(image_size / 8) * 256), hidden_dim=1024, activation='relu', dropout=True, keep_prob=self.keep_prob, batch_normal=True, weight_decay=1e-4, name='dense1') dense_layer2 = DenseLayer(input_shape=(None, 1024), hidden_dim=n_classes, activation='none', dropout=False, keep_prob=None, batch_normal=False, weight_decay=1e-4, name='dense2') #数据流 hidden_conv1 = conv_layer1.get_output(input=self.images) hidden_pool1 = pool_layer1.get_output(input=hidden_conv1) hidden_conv2 = conv_layer2.get_output(input=hidden_pool1) hidden_pool2 = pool_layer2.get_output(input=hidden_conv2) hidden_conv3 = conv_layer3.get_output(input=hidden_pool2) hidden_pool3 = pool_layer3.get_output(input=hidden_conv3) input_dense1 = tf.reshape( hidden_pool3, [-1, int(image_size / 8) * int(image_size / 8) * 256]) output_dense1 = dense_layer1.get_output(input=input_dense1) logits = dense_layer2.get_output(input=output_dense1) logits = tf.multiply(logits, 1, name='logits') # 目标/损失函数 self.objective = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)) tf.add_to_collection('losses', self.objective) self.avg_loss = tf.add_n(tf.get_collection('losses')) # 优化器 lr = tf.cond( tf.less(self.global_step, 500), lambda: tf.constant(0.01), lambda: tf.cond(tf.less(self.global_step, 1000), lambda: tf. constant(0.001), lambda: tf.constant(0.0001))) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.avg_loss, global_step=self.global_step) # 观察值 correct_prediction = tf.equal(self.labels, tf.argmax(logits, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
def load_population(file_name): counter = 0 file = open(file_name, 'r') #read line by line arr = file.read().splitlines() pop_size = int(arr[counter]) counter += 1 initial_pop = [] clt = [] for i in range(pop_size): layer_types = [] layer_shapes = [] layers = [] conv_layer_types = [] num_layers = int(arr[counter]) counter += 1 for j in range(num_layers): type = arr[counter] layer_types.append(type) counter += 1 if type == "conv": filter_method = arr[counter] counter += 1 zero_padding = int(arr[counter]) counter += 1 conv_layer_types.append((filter_method, zero_padding)) image_shape = (int(arr[counter]), int(arr[counter + 1]), int(arr[counter + 2])) counter += 3 filter_shape = (int(arr[counter]), int(arr[counter + 1]), int(arr[counter + 2]), int(arr[counter + 3])) counter += 4 layer_shapes.append([image_shape, filter_shape]) filters = [] for i in range(filter_shape[0]): weights = np.zeros(filter_shape[1:]) bias = 0 for i in range(filter_shape[1]): for j in range(filter_shape[2]): for k in range(filter_shape[3]): weights[i][j][k] = float(arr[counter]) counter += 1 bias = float(arr[counter]) counter += 1 filters.append(Filter(filter_shape[1:], weights, bias)) clt = conv_layer_types layers.append( ConvLayer(image_shape, filter_shape, filter_method, zero_padding, filters)) elif type == "dense" or type == "soft": shpe = (int(arr[counter]), int(arr[counter + 1])) layer_shapes.append([shpe]) counter += 2 weights = np.zeros(shpe) biases = np.zeros(shpe[0]) for cl in range(shpe[0]): for pl in range(shpe[1]): weights[cl][pl] = float(arr[counter]) counter += 1 for cl in range(shpe[0]): biases[cl] = float(arr[counter]) counter += 1 if type == "dense": layers.append(DenseLayer(shpe, weights, biases)) elif type == "soft": layers.append(SoftmaxLayer(shpe, weights, biases)) initial_pop.append(Individual(layer_types, layer_shapes, clt, layers)) population = Population(pop_size, initial_pop[0].get_layer_types(), initial_pop[0].get_layer_shapes(), clt, initial_pop) return population
''' x_data_t = seq_in_ph[:, :-1] targets_t = seq_in_ph[:, 1:] #inputs shifted over in time dimension #embed inputs embeddings = Embeddings('embeddings', char_dictionary.vocab_size, embedding_size=256) seq_in = embeddings(x_data_t) cell = cells.MultiRNNCell( [ cells.LSTMSavedState(256, batch_size) for i in range(2) ] ) cell_reset_op = cell.reset_state() output = dynamic_rnn(seq_in, cell, batch_size) #logits logits = DenseLayer("logits", char_dictionary.vocab_size)(output) #output softmax = tf.nn.softmax(logits) ''' add some noise to the output so it's not the same ALL teh time ''' # noise scaled by temperature (-1,+1) * temp noise = tf.random_uniform(tf.shape(softmax), minval=-temperature, maxval=temperature, dtype=tf.float32) # if the model is sure it's not a certain class, noise affects it less (might not work when model is well trained...) noise = noise * softmax # offset softmax values by the noise generated = tf.argmax(softmax + noise, -1)