class Inception(Function): """Inception module of GoogLeNet. It applies four different functions to the input array and concatenates their outputs along the channel dimension. Three of them are 2D convolutions of sizes 1x1, 3x3 and 5x5. Convolution paths of 3x3 and 5x5 sizes have 1x1 convolutions (called projections) ahead of them. The other path consists of 1x1 convolution (projection) and 3x3 max pooling. The output array has the same spatial size as the input. In order to satisfy this, Inception module uses appropriate padding for each convolution and pooling. See: `Going Deeper with Convolutions <http://arxiv.org/abs/1409.4842>`_. Args: in_channels (int): Number of channels of input arrays. out1 (int): Output size of 1x1 convolution path. proj3 (int): Projection size of 3x3 convolution path. out3 (int): Output size of 3x3 convolution path. proj5 (int): Projection size of 5x5 convolution path. out5 (int): Output size of 5x5 convolution path. proj_pool (int): Projection size of max pooling path. Returns: Variable: Output variable. Its array has the same spatial size and the same minibatch size as the input array. The channel dimension has size ``out1 + out3 + out5 + proj_pool``. .. note:: This function inserts the full computation graph of the Inception module behind the input array. This function itself is not inserted into the computation graph. """ def __init__(self, in_channels, out1, proj3, out3, proj5, out5, proj_pool): self.f = FunctionSet( conv1=Convolution2D(in_channels, out1, 1), proj3=Convolution2D(in_channels, proj3, 1), conv3=Convolution2D(proj3, out3, 3, pad=1), proj5=Convolution2D(in_channels, proj5, 1), conv5=Convolution2D(proj5, out5, 5, pad=2), projp=Convolution2D(in_channels, proj_pool, 1), ) def forward(self, x): self.x = Variable(x[0]) out1 = self.f.conv1(self.x) out3 = self.f.conv3(relu(self.f.proj3(self.x))) out5 = self.f.conv5(relu(self.f.proj5(self.x))) pool = self.f.projp(max_pooling_2d(self.x, 3, stride=1, pad=1)) self.y = relu(concat((out1, out3, out5, pool), axis=1)) return self.y.data, def backward(self, x, gy): self.y.grad = gy[0] self.y.backward() return self.x.grad, def to_gpu(self, device=None): return self.f.to_gpu(device) def to_cpu(self): return self.f.to_cpu() @property def parameters(self): return self.f.parameters @parameters.setter def parameters(self, params): self.f.parameters = params @property def gradients(self): return self.f.gradients @gradients.setter def gradients(self, grads): self.f.gradients = grads
class Inception(Function): """Inception module of GoogLeNet. It applies four different functions to the input array and concatenates their outputs along the channel dimension. Three of them are 2D convolutions of sizes 1x1, 3x3 and 5x5. Convolution paths of 3x3 and 5x5 sizes have 1x1 convolutions (called projections) ahead of them. The other path consists of 1x1 convolution (projection) and 3x3 max pooling. The output array has the same spatial size as the input. In order to satisfy this, Inception module uses appropriate padding for each convolution and pooling. See: `Going Deeper with Convolutions <http://arxiv.org/abs/1409.4842>`_. Args: in_channels (int): Number of channels of input arrays. out1 (int): Output size of 1x1 convolution path. proj3 (int): Projection size of 3x3 convolution path. out3 (int): Output size of 3x3 convolution path. proj5 (int): Projection size of 5x5 convolution path. out5 (int): Output size of 5x5 convolution path. proj_pool (int): Projection size of max pooling path. Returns: Variable: Output variable. Its array has the same spatial size and the same minibatch size as the input array. The channel dimension has size ``out1 + out3 + out5 + proj_pool``. .. note:: This function inserts the full computation graph of the Inception module behind the input array. This function itself is not inserted into the computation graph. """ def __init__(self, in_channels, out1, proj3, out3, proj5, out5, proj_pool): self.f = FunctionSet( conv1 = Convolution2D(in_channels, out1, 1), proj3 = Convolution2D(in_channels, proj3, 1), conv3 = Convolution2D(proj3, out3, 3, pad=1), proj5 = Convolution2D(in_channels, proj5, 1), conv5 = Convolution2D(proj5, out5, 5, pad=2), projp = Convolution2D(in_channels, proj_pool, 1), ) def forward(self, x): self.x = Variable(x[0]) out1 = self.f.conv1(self.x) out3 = self.f.conv3(relu(self.f.proj3(self.x))) out5 = self.f.conv5(relu(self.f.proj5(self.x))) pool = self.f.projp(max_pooling_2d(self.x, 3, stride=1, pad=1)) self.y = relu(concat((out1, out3, out5, pool), axis=1)) return self.y.data, def backward(self, x, gy): self.y.grad = gy[0] self.y.backward() return self.x.grad, def to_gpu(self, device=None): return self.f.to_gpu(device) def to_cpu(self): return self.f.to_cpu() @property def parameters(self): return self.f.parameters @parameters.setter def parameters(self, params): self.f.parameters = params @property def gradients(self): return self.f.gradients @gradients.setter def gradients(self, grads): self.f.gradients = grads
class CNN: file_names = None def __init__(self): self.optimizer = optimizers.Adam() self.model_name = "cnn_nantyara" if os.path.exists(self.model_name): self.load_model() else: self.crete_model() self.optimizer.setup(self.model.collect_parameters()) def crete_model(self): self.model = FunctionSet( conv1=F.Convolution2D(3, 32, 3), bn1=F.BatchNormalization(32), conv2=F.Convolution2D(32, 64, 3, pad=1), bn2=F.BatchNormalization(64), conv3=F.Convolution2D(64, 64, 3, pad=1), fl4=F.Linear(1024, 256), fl5=F.Linear(256, 2), ) def get_data(self, ifpath, image_categories, reshape_size=(3, 32, 32)): x = [] x_apd = x.append y = [] y_apd = y.append for i_category, category in enumerate(image_categories): for i_num in xrange(1, self.get_num_of_images(ifpath, category)): image = np.array(Image.open(ifpath + "/" + category + str(i_num) + ".jpeg"), dtype=np.float32).reshape( reshape_size ) x_apd(image) y_apd(i_category) self.N = len(x) return x, np.array(y, dtype=np.int32) def get_data_for_predict(self, ifpath, image_name, reshape_size=(3, 32, 32)): image = np.array(Image.open(ifpath + "/" + image_name), dtype=np.float32) image = cv2.resize(image, (reshape_size[1], reshape_size[2])) # print image.shape image = image.reshape(reshape_size) return [image] def forward(self, x_data, y_data, train=True): x, t = Variable(np.array(x_data)), Variable(y_data) h1 = F.max_pooling_2d(F.relu(self.model.bn1(self.model.conv1(x))), 2) h2 = F.max_pooling_2d(F.relu(self.model.bn2(self.model.conv2(h1))), 2) h3 = F.max_pooling_2d(F.relu(self.model.conv3(h2)), 2) h4 = F.dropout(F.relu(self.model.fl4(h3)), train=train) y = self.model.fl5(h4) if train: return F.softmax_cross_entropy(y, t), F.accuracy(y, t) else: res = [d for data in F.softmax(y).data for d in data] # print res return np.array(res).argmax() if len([r for r in res if r > 0.5]) > 0 else "unknown" def get_num_of_images(self, path, image_name): cmd = "ls images|grep %s|wc -l" % (image_name) return int(subprocess.check_output(cmd, shell=True)) def dump_model(self): self.model.to_cpu() with open(self.model_name, "wb") as f: pickle.dump(self.model, f, -1) def load_model(self): with open(self.model_name, "rb") as f: self.model = pickle.load(f) def fit(self, x_train, y_train, epoch=20, batchsize=100): for epoch in xrange(1, epoch + 1): print "epoch", epoch # training sum_accuracy = 0 sum_loss = 0 for i in xrange(0, self.N, batchsize): self.optimizer.zero_grads() loss, acc = self.forward(x_train[i : i + batchsize], y_train[i : i + batchsize]) loss.backward() self.optimizer.update() print "train mean loss=%s, accuracy =%s" % (str(loss.data), str(acc.data)) self.dump_model() def predict(self, x): y = self.forward(x, np.zeros(1, dtype=np.int32), train=False) sys.stdout.write(str(self.file_names[y]) if y != "unknonw" else "unknonw")