def dc_gan(self): gen_lr, dis_lr = 2e-3, 5e-4 tconv1 = TrasposedConv((128, 7, 7), k_size=4, k_num=128, stride=2, padding=1, lr=gen_lr) tconv2 = TrasposedConv(tconv1.out_shape, k_size=4, k_num=128, stride=2, padding=1, lr=gen_lr) tconv3 = TrasposedConv(tconv2.out_shape, k_size=7, k_num=1, stride=1, padding=3, lr=gen_lr) self.generator = NN([ FullyConnect([self.gen_input], tconv1.in_shape, lr=gen_lr), BatchNormalization(tconv1.in_shape, lr=gen_lr), Activation(act_type='ReLU'), tconv1, BatchNormalization(tconv1.out_shape, lr=gen_lr), Activation(act_type='ReLU'), tconv2, BatchNormalization(tconv2.out_shape, lr=gen_lr), Activation(act_type='ReLU'), tconv3, BatchNormalization(tconv3.out_shape, lr=gen_lr), Activation(act_type='Tanh') ]) conv1 = Conv((1, 28, 28), k_size=7, k_num=128, stride=1, padding=3, lr=dis_lr) conv2 = Conv(conv1.out_shape, k_size=4, k_num=128, stride=2, padding=1, lr=dis_lr) conv3 = Conv(conv2.out_shape, k_size=4, k_num=128, stride=2, padding=1, lr=dis_lr) self.discriminator = NN([ conv1, Activation(act_type='LeakyReLU'), conv2, BatchNormalization(conv2.out_shape, lr=dis_lr), Activation(act_type='LeakyReLU'), conv3, BatchNormalization(conv3.out_shape, lr=dis_lr), Activation(act_type='LeakyReLU'), FullyConnect(conv3.out_shape, [1], lr=dis_lr), Activation(act_type='Sigmoid') ])
def __init__(self, eps=1): self.n_episodes = 1000 self.batch_size = 32 self.n_epochs = 300 self.training_size = self.n_epochs * self.batch_size self.gamma = 0.95 self.eps = eps self.eps_decay = 0.99 lr = 0.01 self.policy_net, self.target_net = [ NN([ Conv((2, n_size, n_size), k_size=n_connect, k_num=16, optimizer='RMSProp'), Activation(act_type='ReLU'), FullyConnect( [16, n_size - n_connect + 1, n_size - n_connect + 1], [16], lr=lr, optimizer='RMSProp'), Activation(act_type='ReLU'), FullyConnect([16], [n_size * n_size], lr=lr, optimizer='RMSProp'), ]) for _ in range(2) ] self.states = np.zeros((0, 2, n_size, n_size)) self.next_states = np.zeros((0, 2, n_size, n_size)) self.actions = np.zeros(0).astype(int) self.rewards = np.zeros(0) self.unfinish_mask = np.zeros(0)
def train(self, x, y): lr = self.lr conv1 = Conv(in_shape=x.shape[1:4], k_num=6, k_size=5, lr=lr) bn1 = BatchNormalization(in_shape=conv1.out_shape, lr=lr) relu1 = Activation(act_type="ReLU") pool1 = MaxPooling(in_shape=conv1.out_shape, k_size=2) conv2 = Conv(in_shape=pool1.out_shape, k_num=16, k_size=3, lr=lr) bn2 = BatchNormalization(in_shape=conv2.out_shape, lr=lr) relu2 = Activation(act_type="ReLU") pool2 = MaxPooling(in_shape=conv2.out_shape, k_size=2) fc = FullyConnect(pool2.out_shape, [self.n_labels], lr=lr) softmax = Softmax() nn = NN([ conv1, bn1, relu1, pool1, conv2, bn2, relu2, pool2, fc, softmax ]) nn.fit(x, y) return nn
def __init__(self, x_shape, label_num): self.batch_size, lr = 32, 1e-3 # Conv > Normalization > Activation > Dropout > Pooling conv1 = Conv(in_shape=x_shape, k_num=6, k_size=5, lr=lr) bn1 = BatchNormalization(in_shape=conv1.out_shape, lr=lr) relu1 = Activation(act_type="ReLU") pool1 = MaxPooling(in_shape=conv1.out_shape, k_size=2) conv2 = Conv(in_shape=pool1.out_shape, k_num=16, k_size=3, lr=lr) bn2 = BatchNormalization(in_shape=conv2.out_shape, lr=lr) relu2 = Activation(act_type="ReLU") pool2 = MaxPooling(in_shape=conv2.out_shape, k_size=2) fc1 = FullyConnect(pool2.out_shape, [120], lr=lr) bn3 = BatchNormalization(in_shape=[120], lr=lr) relu3 = Activation(act_type="ReLU") fc2 = FullyConnect([120], [label_num], lr=lr) softmax = Softmax() self.layers = [ conv1, bn1, relu1, pool1, conv2, bn2, relu2, pool2, fc1, bn3, relu3, fc2, softmax ]
def gradient_check(conv=True): if conv: layera = Conv(in_shape=[16, 32, 28], k_num=12, k_size=3) layerb = Conv(in_shape=[16, 32, 28], k_num=12, k_size=3) else: layera = FullyConnect(in_shape=[16, 32, 28], out_dim=12) layerb = FullyConnect(in_shape=[16, 32, 28], out_dim=12) act_layer = Activation(act_type='Tanh') layerb.w = layera.w.copy() layerb.b = layera.b.copy() eps = 1e-4 x = np.random.randn(10, 16, 32, 28) * 10 for i in range(100): idxes = tuple((np.random.uniform(0, 1, 4) * x.shape).astype(int)) x_a = x.copy() x_b = x.copy() x_a[idxes] += eps x_b[idxes] -= eps out = act_layer.forward(layera.forward(x)) gradient = layera.gradient(act_layer.gradient(np.ones(out.shape))) delta_out = (act_layer.forward(layera.forward(x_a)) - act_layer.forward(layerb.forward(x_b))).sum() # the output should be in the order of eps*eps print(idxes, (delta_out / eps / 2 - gradient[idxes]) / eps / eps)