def __init__(self, class_num): self.train = True self.loss = 0 self.conv1 = ConvolutionLayer(kernel=(3, 3), channels=32, stride=(1, 1), padding=(0, 0)) self.conv2 = ConvolutionLayer(kernel=(3, 3), channels=64, stride=(2, 2), padding=(1, 1)) self.conv3 = ConvolutionLayer(kernel=(3, 3), channels=64, stride=(1, 1), padding=(0, 0)) self.conv4 = ConvolutionLayer(kernel=(3, 3), channels=128, stride=(2, 2), padding=(1, 1)) self.conv5 = ConvolutionLayer(kernel=(3, 3), channels=128, stride=(1, 1), padding=(0, 0)) self.fc1 = FullyConnectLayer(channels=(3200, 100)) self.fc2 = FullyConnectLayer(channels=(100, class_num)) self.bn1 = BatchNormalization2d(32) self.bn2 = BatchNormalization2d(64) self.bn3 = BatchNormalization2d(128) self.dropout1 = DropOut()
def __init__(self, input_dim, features_nonezero, adj,hidden1=32, embedding_dim=16): super(GAE, self).__init__() self.input_dim = input_dim self.hidden1 = hidden1 self.embedding_dim = embedding_dim self.features_nonezero = features_nonezero indices= np.array(adj[0]) values = np.array(adj[1]) dense_shape = np.array(adj[2]) sparse_adj = tf.SparseTensor(indices = indices, values = values, dense_shape = dense_shape) self.adj = tf.cast(sparse_adj, dtype=tf.float32) # GAE encoder with 1 sparseconvolution layer, 1 convolution layers self.conv1 = ConvolutionSparseLayer(self.input_dim, self.hidden1, self.adj, self.features_nonezero, dropout=0, activation='relu', input_shape=(None,self.input_dim)) self.conv2 = ConvolutionLayer(self.hidden1, self.embedding_dim, self.adj, dropout=0, activation='relu') # GAE decoder self.reconstruct = InnerProductDecoder(self.embedding_dim,dropout=0,act=tf.nn.sigmoid)
def get_network(self): self._read_config() input_layer = None layers = [] prev_layer = None for data in self._layers: if data["type"] == "input": input_size = self._input_size * self._input_size output_size = int(data["output_size"]) layer = InputLayer(input_size, output_size) elif data["type"] == "dense": if "output_size" in data: output_size = int(data["output_size"]) else: output_size = self._output_size activation_function_str = data["af"] activation_function = self._lookup_activation_function( activation_function_str) activation_function_d = self._lookup_activation_function_d( activation_function_str) learning_rate = float(data["la"]) layer = DenseLayer(prev_layer.get_output_shape(), output_size, activation_function, activation_function_d, learning_rate) elif data["type"] == "convolution": if prev_layer == None: input_shape = (self._input_size, self._input_size, 1) else: input_shape = prev_layer.get_output_shape() kernel_n = int(data["kernel_n"]) kernel_m = int(data["kernel_m"]) channels_out = int(data["channels"]) output_shape = (kernel_n, kernel_m, channels_out) v_stride = int(data["stride_n"]) h_stride = int(data["stride_m"]) padding = int(data["padding"]) la = float(data["la"]) layer = ConvolutionLayer(input_shape, output_shape, h_stride, v_stride, padding, la) if input_layer == None: input_layer = layer else: layers.append(layer) prev_layer = layer network = Network(input_layer, layers) return network
def __init__(self, layers, decay=0.001, learning_rate=0.01): mapping = { "input": lambda x: InputLayer(x), "fc": lambda x: FullyConnectedLayer(x), "convolution": lambda x: ConvolutionLayer(x), "pool": lambda x: PoolingLayer(x), "squaredloss": lambda x: SquaredLossLayer(x), "softmax": lambda x: SoftmaxLossLayer(x), "relu": lambda x: ReLuLayer(x), } self.layers = [] self.decay = decay self.learning_rate = learning_rate prev_layer = None for layer in layers: layer["input_shape"] = layer.get("input_shape", None) or prev_layer.output_shape layer["decay"] = self.decay layer = mapping[layer["type"]](layer) self.layers.append(layer) prev_layer = layer
def __init__(self, input_size=(1, 28, 28), activation_type=ActivationType.ReLU, hidden_size=50, output_size=10): # basic paramters self.__activation_type__ = activation_type self.params = {} self.layers = [] # set layers channel_num = input_size[0] for i, param in enumerate([ { 'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1 }, { 'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1 }, { 'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1 }, { 'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1 }, { 'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1 }, { 'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1 }, { 'pre_node_num': 64 * 4 * 4, 'next_node_num': hidden_size }, { 'pre_node_num': hidden_size, 'next_node_num': output_size }, ]): # layer 1 ~ 6 Convolution Layer & ReLU Layer if i + 1 in range(1, 7): # create convolution layer convolution_layer = ConvolutionLayer( index=i + 1, activation_type=self.__activation_type__, filter_num=param['filter_num'], channel_num=channel_num, filter_size=param['filter_size'], stride=param['stride'], padding=param['pad']) self.layers.append(convolution_layer) self.layers.append( self.activationLayerFromType(activation_type, index=i + 1)) # layer 2, 4, 6 Pooling Layer if i + 1 in (2, 4, 6): self.layers.append( PoolingLayer(index=i + 1, pool_h=2, pool_w=2, stride=2)) # update next channel num channel_num = convolution_layer.filter_num layer = convolution_layer # layer 7, 8 Hidden Layer & ReLU Layer & Dropout Layer if i + 1 in (7, 8): hidden_layer = HiddenLayer( index=i + 1, activation_type=self.__activation_type__, pre_node_num=param['pre_node_num'], next_node_num=param['next_node_num']) self.layers.append(hidden_layer) if i + 1 == 7: self.layers.append( self.activationLayerFromType(activation_type, index=i + 1)) self.layers.append(DropoutLayer(index=i + 1, dropout_ratio=0.5)) layer = hidden_layer # set W,b self.params['W{}'.format(i + 1)] = layer.W self.params['b{}'.format(i + 1)] = layer.b print('layer {} created'.format(i + 1)) if Config.IS_DEBUG: print('W{} shape : {}'.format( i + 1, self.params['W{}'.format(i + 1)].shape)) print('b{} shape : {}'.format( i + 1, self.params['W{}'.format(i + 1)].shape)) # output created layer structures for layer in self.layers: print(layer.name) # keep weight required layer indexes self.weight_layer_indexes = [] for j, layer in enumerate(self.layers): if isinstance(layer, (ConvolutionLayer, HiddenLayer)): self.weight_layer_indexes.append(j) self.debug('weight_layer_indexes {}'.format(self.weight_layer_indexes)) print('{} layers created'.format(len(self.layers))) # last layer SoftmaxWithLoss Layer self.lastLayer = SoftmaxWithLossLayer()
def __init__(self, input_size=(1, 28, 28), activation_type=ActivationType.ReLU, filter_num=30, filter_size=5, filter_padding=0, filter_stride=1, hidden_size=100, output_size=10): self.__activation_type__ = activation_type convolution_output_size =\ (input_size[1] - filter_size + 2*filter_padding) /\ filter_stride + 1 pooling_output_size =\ int(filter_num * (convolution_output_size / 2) * (convolution_output_size / 2)) convolution_layer_1 = ConvolutionLayer( index=1, activation_type=self.__activation_type__, filter_num=filter_num, channel_num=input_size[0], filter_size=filter_size) # set params self.params = {} hidden_layer_1 = HiddenLayer(index=1, activation_type=self.__activation_type__, pre_node_num=pooling_output_size, next_node_num=hidden_size) hidden_layer_2 = HiddenLayer(index=2, activation_type=self.__activation_type__, pre_node_num=hidden_size, next_node_num=output_size) self.params['W1'] = convolution_layer_1.W self.params['b1'] = convolution_layer_1.b self.params['W2'] = hidden_layer_1.W self.params['b2'] = hidden_layer_1.b self.params['W3'] = hidden_layer_2.W self.params['b3'] = hidden_layer_2.b # save init weights self.init_weights = [] self.init_weights.append(self.params['W1']) self.init_weights.append(self.params['W2']) self.init_weights.append(self.params['W3']) # set layers self.layers = OrderedDict() self.layers['Convolution1'] = convolution_layer_1 self.layers['ReLU1'] = self.activationLayer(index=1) self.layers['Pooling1'] = PoolingLayer(index=1, pool_h=2, pool_w=2, stride=2) self.layers['Hidden1'] = hidden_layer_1 self.layers['ReLU2'] = self.activationLayer(index=2) self.layers['Hidden2'] = hidden_layer_2 self.lastLayer = SoftmaxWithLossLayer()
def train(self, train_data, dev_data, test_data, maxlen): # tr = tracker.SummaryTracker() rng = np.random.RandomState(3435) train_x, train_y = self.shared_dataset(train_data) dev_x, dev_y = self.shared_dataset(dev_data) test_x, test_y = self.shared_dataset(test_data) test_len = len(test_data[0]) n_train_batches = len(train_data[0]) // self.batch_size n_val_batches = len(dev_data[0]) // self.batch_size n_test_batches = test_len // self.batch_size input_width = self.hidden_sizes[0] x = T.matrix('x') y = T.ivector('y') index = T.lscalar() Words = theano.shared(value=self.word_vectors, name="Words", borrow=True) layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (self.batch_size, maxlen, input_width)) lstm = LSTM(dim=input_width, batch_size=self.batch_size, number_step=maxlen, params=self.lstm_params) leyer0_output = lstm.feed_foward(layer0_input) conv_outputs = list() conv_nnets = list() params = list() output = T.cast(layer0_input.flatten(), dtype=floatX) conv_input = output.reshape((self.batch_size, 1, maxlen, input_width)) for fter in self.filter_sizes: pheight = maxlen - fter + 1 conv = ConvolutionLayer(rng=rng, filter_shape=(self.kernel, 1, fter, input_width), input_shape=(self.batch_size, 1, maxlen, input_width), poolsize=(pheight, 1), name="conv" + str(fter)) #=>batch size * 1 * 100 * width output = conv.predict(conv_input) layer1_input = output.flatten(2) params += conv.params conv_outputs.append(layer1_input) conv_nnets.append(conv) conv_nnets_output = T.concatenate(conv_outputs, axis=1) # lstm.mean_pooling_input(leyer0_output) hidden_layer = HiddenLayer( rng, hidden_sizes=[self.kernel * 3, self.hidden_sizes[0]], input_vectors=conv_nnets_output, activation=utils.Tanh, name="Hidden_Tanh") hidden_layer.predict() hidden_layer_relu = HiddenLayer( rng, hidden_sizes=[self.hidden_sizes[0], self.hidden_sizes[0]], input_vectors=hidden_layer.output) hidden_layer_relu.predict() # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b) full_connect = FullConnectLayer( rng, layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]], input_vector=hidden_layer_relu.output) full_connect.predict() cost = full_connect.negative_log_likelihood(y) params += hidden_layer.params + hidden_layer_relu.params + full_connect.params # params = hidden_layer.params + hidden_layer_relu.params + full_connect.params params_length = len(params) #init value for e_grad time 0, e_delta time 0 and delta at time 0 e_grad, e_delta_prev, delta = self.init_hyper_values(params_length) # e_grad_d, e_delta_prev_d, delta_d = self.init_hyper_values(params_length, name="D") #apply gradient grads = T.grad(cost, params) #dropout hidden layer # hidden_layer_dropout.dropout() # hidden_layer_dropout.predict() # full_connect.setInput(hidden_layer_dropout.output) # full_connect.predict() # cost_d = full_connect.negative_log_likelihood(y) #apply gradient to cost_d e_grad, e_delta_prev, delta = self.adadelta(grads, e_grad, e_delta_prev) # e_grad_d, e_delta_prev_d, delta_d = self.adadelta(grads_d, e_grad_d, e_delta_prev_d, delta_d) # grads_d = T.grad(cost_d, params) grads = delta # grad_d = delta_d updates = [(p, p - d) for p, d in zip(params, grads)] # updates = [(p, p - d - d_) for p, d, d_ in zip(params, grads, grads_d)] # updates = [(p, p - self.learning_rate * d) for p, d in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_x[(index * self.batch_size):((index + 1) * self.batch_size)], y: train_y[(index * self.batch_size):((index + 1) * self.batch_size)] }) val_model = theano.function( [index], full_connect.errors(y), givens={ x: dev_x[index * self.batch_size:(index + 1) * self.batch_size], y: dev_y[index * self.batch_size:(index + 1) * self.batch_size], }) test_model = theano.function( inputs=[index], outputs=full_connect.errors(y), givens={ x: test_x[index * self.batch_size:(index + 1) * self.batch_size], y: test_y[index * self.batch_size:(index + 1) * self.batch_size] }) validation_frequency = min(n_train_batches, self.patience // 2) val_batch_lost = 1. best_batch_lost = 1. best_test_lost = 1. stop_count = 0 epoch = 0 done_loop = False current_time_step = 0 improve_threshold = 0.995 iter_list = range(n_train_batches) while (epoch < self.epochs and done_loop is not True): epoch_cost_train = 0. epoch += 1 batch_train = 0 print("Start epoch: %i" % epoch) start = time.time() random.shuffle(iter_list) for mini_batch, m_b_i in zip(iter_list, xrange(n_train_batches)): current_time_step = (epoch - 1) * n_train_batches + m_b_i epoch_cost_train += train_model(mini_batch) batch_train += 1 if (current_time_step + 1) % validation_frequency == 0: val_losses = [val_model(i) for i in xrange(n_val_batches)] val_losses = np.array(val_losses) val_batch_lost = np.mean(val_losses) if val_batch_lost < best_batch_lost: if best_batch_lost * improve_threshold > val_batch_lost: self.patience = max( self.patience, current_time_step * self.patience_frq) best_batch_lost = val_batch_lost # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] current_test_lost = np.mean(test_losses) print(( 'epoch %i minibatch %i test accuracy of %i example is: %.5f' ) % (epoch, m_b_i, test_len, (1 - current_test_lost) * 100.)) if best_test_lost > current_test_lost: best_test_lost = current_test_lost if self.patience <= current_time_step: print(self.patience) done_loop = True break print('epoch: %i, training time: %.2f secs; with avg cost: %.5f' % (epoch, time.time() - start, epoch_cost_train / batch_train)) print('Best test accuracy is: %.5f' % (1 - best_test_lost)) utils.save_layer_params(lstm, 'lstm_cb') utils.save_layer_params(hidden_layer, 'hidden_cb') utils.save_layer_params(hidden_layer_relu, 'hidden_relu_cb') utils.save_layer_params(full_connect, 'full_connect_cb') for index, conv in enumerate(conv_nnets): utils.save_layer_params(conv, 'convolution_%s' % index)
def build_test_model(self, data): rng = np.random.RandomState(3435) lstm_params, hidden_params, hidden_relu_params, full_connect_params, convs = self.load_trained_params( ) data_x, data_y, maxlen = data test_len = len(data_x) n_test_batches = test_len // self.batch_size x = T.matrix('x') y = T.ivector('y') index = T.lscalar() Words = theano.shared(value=self.word_vectors, name="Words", borrow=True) input_width = self.hidden_sizes[0] layer0_input = Words[T.cast(x.flatten(), dtype="int32")].reshape( (self.batch_size, maxlen, input_width)) lstm = LSTM(dim=input_width, batch_size=self.batch_size, number_step=maxlen, params=lstm_params) leyer0_output = lstm.feed_foward(layer0_input) conv_outputs = list() conv_nnets = list() params = list() output = T.cast(layer0_input.flatten(), dtype=floatX) conv_input = output.reshape((self.batch_size, 1, maxlen, input_width)) for it, p_conv in enumerate(convs): pheight = maxlen - self.filter_sizes[it] + 1 conv = ConvolutionLayer(rng=rng, filter_shape=(self.kernel, 1, self.filter_sizes[it], input_width), input_shape=(self.batch_size, 1, maxlen, input_width), poolsize=(pheight, 1), name="conv" + str(self.filter_sizes[it]), W=p_conv[0], b=p_conv[1]) #=>batch size * 1 * 100 * width output = conv.predict(conv_input) layer1_input = output.flatten(2) params += conv.params conv_outputs.append(layer1_input) conv_nnets.append(conv) conv_nnets_output = T.concatenate(conv_outputs, axis=1) hidden_layer = HiddenLayer( rng, hidden_sizes=[self.kernel * 3, self.hidden_sizes[0]], input_vectors=conv_nnets_output, activation=utils.Tanh, name="Hidden_Tanh", W=hidden_params[0], b=hidden_params[1]) hidden_layer.predict() hidden_layer_relu = HiddenLayer( rng, hidden_sizes=[self.hidden_sizes[0], self.hidden_sizes[0]], input_vectors=hidden_layer.output, W=hidden_relu_params[0], b=hidden_relu_params[1]) hidden_layer_relu.predict() # hidden_layer_dropout = HiddenLayerDropout(rng, hidden_sizes=self.hidden_sizes[:2], input_vectors=lstm.output, W=hidden_layer.W, b=hidden_layer.b) full_connect = FullConnectLayer( rng, layers_size=[self.hidden_sizes[0], self.hidden_sizes[-1]], input_vector=hidden_layer_relu.output, W=full_connect_params[0], b=full_connect_params[1]) full_connect.predict() test_data_x = theano.shared(np.asarray(data_x, dtype=floatX), borrow=True) test_data_y = theano.shared(np.asarray(data_y, dtype='int32'), borrow=True) errors = 0. if test_len == 1: test_model = theano.function( [index], outputs=full_connect.get_predict(), on_unused_input='ignore', givens={ x: test_data_x[index * self.batch_size:(index + 1) * self.batch_size], y: test_data_y[index * self.batch_size:(index + 1) * self.batch_size] }) index = 0 avg_errors = test_model(index) else: test_model = theano.function( [index], outputs=full_connect.errors(y), givens={ x: test_data_x[index * self.batch_size:(index + 1) * self.batch_size], y: test_data_y[index * self.batch_size:(index + 1) * self.batch_size] }) for i in xrange(n_test_batches): errors += test_model(i) avg_errors = errors / n_test_batches return avg_errors
class CNN: def __init__(self, class_num): self.train = True self.loss = 0 self.conv1 = ConvolutionLayer(kernel=(3, 3), channels=32, stride=(1, 1), padding=(0, 0)) self.conv2 = ConvolutionLayer(kernel=(3, 3), channels=64, stride=(2, 2), padding=(1, 1)) self.conv3 = ConvolutionLayer(kernel=(3, 3), channels=64, stride=(1, 1), padding=(0, 0)) self.conv4 = ConvolutionLayer(kernel=(3, 3), channels=128, stride=(2, 2), padding=(1, 1)) self.conv5 = ConvolutionLayer(kernel=(3, 3), channels=128, stride=(1, 1), padding=(0, 0)) self.fc1 = FullyConnectLayer(channels=(3200, 100)) self.fc2 = FullyConnectLayer(channels=(100, class_num)) self.bn1 = BatchNormalization2d(32) self.bn2 = BatchNormalization2d(64) self.bn3 = BatchNormalization2d(128) self.dropout1 = DropOut() def forward(self, x): batch = x.shape[0] for bn in [self.bn1, self.bn2, self.bn3, self.dropout1]: bn.train = self.train self.block1 = [self.conv1.forward, self.bn1.forward, self.relu] self.block2 = [ self.conv2.forward, self.conv3.forward, self.bn2.forward, self.relu, self.dropout1.forward ] self.block3 = [ self.conv4.forward, self.conv5.forward, self.bn3.forward, self.relu ] self.block4 = [self.fc1.forward, self.relu, self.fc2.forward] for block in [self.block1, self.block2, self.block3]: for item in block: x = item(x) # print(x[0].reshape(-1)) x = x.reshape(batch, -1) for item in self.block4: x = item(x) # print(x[0].reshape(-1)) return x def backward(self, err): err = self.fc2.backward(err) err *= np.where(self.fc2.inputs > 0, 1.0, 0.0) err = self.fc1.backward(err) err *= np.where(self.fc1.inputs > 0, 1.0, 0.0) err = err.reshape(128, 128, 5, 5) err = self.bn3.backward(err) err = self.conv5.backward(err) err = self.conv4.backward(err) err = self.dropout1.backward(err) err *= np.where(self.conv4.inputs > 0, 1.0, 0.0) err = self.bn2.backward(err) err = self.conv3.backward(err) err = self.conv2.backward(err) err *= np.where(self.conv2.inputs > 0, 1.0, 0.0) err = self.bn1.backward(err) _ = self.conv1.backward(err) def save(self, saved_dir): np.save(saved_dir + 'conv1.npy', self.conv1.weight) np.save(saved_dir + 'conv2.npy', self.conv2.weight) np.save(saved_dir + 'conv3.npy', self.conv3.weight) np.save(saved_dir + 'conv4.npy', self.conv4.weight) np.save(saved_dir + 'conv5.npy', self.conv5.weight) np.save(saved_dir + 'fc1.npy', self.fc1.weight) np.save(saved_dir + 'fc2.npy', self.fc2.weight) np.save(saved_dir + 'bn.npy', np.array([self.bn1.gamma, self.bn1.beta, self.bn2.gamma, self.bn2.beta, \ self.bn3.gamma, self.bn3.beta])) def load(self, load_dir): self.conv1.weight = np.load(load_dir + 'conv1.npy', allow_pickle=True) self.conv2.weight = np.load(load_dir + 'conv2.npy', allow_pickle=True) self.conv3.weight = np.load(load_dir + 'conv3.npy', allow_pickle=True) self.conv4.weight = np.load(load_dir + 'conv4.npy', allow_pickle=True) self.conv5.weight = np.load(load_dir + 'conv5.npy', allow_pickle=True) self.fc1.weight = np.load(load_dir + 'fc1.npy', allow_pickle=True) self.fc2.weight = np.load(load_dir + 'fc2.npy', allow_pickle=True) bn_paras = np.load(load_dir + 'bn.npy', allow_pickle=True) self.bn1.gamma = bn_paras[0] self.bn1.beta = bn_paras[1] self.bn2.gamma = bn_paras[2] self.bn2.beta = bn_paras[3] self.bn3.gamma = bn_paras[4] self.bn3.beta = bn_paras[5] @staticmethod def relu(x): return np.where(x > 0, x, 0) @staticmethod def maxpooling(x): res = np.zeros( (x.shape[0], x.shape[1], x.shape[2] // 2, x.shape[3] // 2)) for a in range(res.shape[0]): for b in range(res.shape[1]): for c in range(res.shape[2]): for d in range(res.shape[3]): res[a, b, c, d] = np.max(x[a, b, c * 2 + 2, d * 2 + 2].reshape(-1)) return res @staticmethod def loss_(vector, label): _loss = np.zeros_like(vector) vector_ = np.zeros_like(vector) loss_c = np.zeros_like(vector) for i in range(vector.shape[0]): for j in range(vector.shape[1]): vector_[i, j] = np.exp(vector[i, j]) / sum(np.exp(vector[i])) _loss[i, j] += -(vector_[i, j]) try: _loss[i, int(label[i])] += (1.0 + 2 * (vector_[i, int(label[i])])) loss_c[i, int(label[i])] += -log(vector_[i, int(label[i])]) except ValueError: print(vector[i]) exit() return _loss, loss_c