Example #1
0
class Inception(Function):
    """Inception module of GoogLeNet.

    It applies four different functions to the input array and concatenates
    their outputs along the channel dimension. Three of them are 2D convolutions
    of sizes 1x1, 3x3 and 5x5. Convolution paths of 3x3 and 5x5 sizes have 1x1
    convolutions (called projections) ahead of them. The other path consists of
    1x1 convolution (projection) and 3x3 max pooling.

    The output array has the same spatial size as the input. In order to satisfy
    this, Inception module uses appropriate padding for each convolution and
    pooling.

    See: `Going Deeper with Convolutions <http://arxiv.org/abs/1409.4842>`_.

    Args:
        in_channels (int): Number of channels of input arrays.
        out1 (int): Output size of 1x1 convolution path.
        proj3 (int): Projection size of 3x3 convolution path.
        out3 (int): Output size of 3x3 convolution path.
        proj5 (int): Projection size of 5x5 convolution path.
        out5 (int): Output size of 5x5 convolution path.
        proj_pool (int): Projection size of max pooling path.

    Returns:
        Variable: Output variable. Its array has the same spatial size and the
            same minibatch size as the input array. The channel dimension has
            size ``out1 + out3 + out5 + proj_pool``.

    .. note::

       This function inserts the full computation graph of the Inception module behind
       the input array. This function itself is not inserted into the
       computation graph.

    """
    def __init__(self, in_channels, out1, proj3, out3, proj5, out5, proj_pool):
        self.f = FunctionSet(
            conv1=Convolution2D(in_channels, out1, 1),
            proj3=Convolution2D(in_channels, proj3, 1),
            conv3=Convolution2D(proj3, out3, 3, pad=1),
            proj5=Convolution2D(in_channels, proj5, 1),
            conv5=Convolution2D(proj5, out5, 5, pad=2),
            projp=Convolution2D(in_channels, proj_pool, 1),
        )

    def forward(self, x):
        self.x = Variable(x[0])
        out1 = self.f.conv1(self.x)
        out3 = self.f.conv3(relu(self.f.proj3(self.x)))
        out5 = self.f.conv5(relu(self.f.proj5(self.x)))
        pool = self.f.projp(max_pooling_2d(self.x, 3, stride=1, pad=1))
        self.y = relu(concat((out1, out3, out5, pool), axis=1))

        return self.y.data,

    def backward(self, x, gy):
        self.y.grad = gy[0]
        self.y.backward()
        return self.x.grad,

    def to_gpu(self, device=None):
        return self.f.to_gpu(device)

    def to_cpu(self):
        return self.f.to_cpu()

    @property
    def parameters(self):
        return self.f.parameters

    @parameters.setter
    def parameters(self, params):
        self.f.parameters = params

    @property
    def gradients(self):
        return self.f.gradients

    @gradients.setter
    def gradients(self, grads):
        self.f.gradients = grads
Example #2
0
class Inception(Function):
    """Inception module of GoogLeNet.

    It applies four different functions to the input array and concatenates
    their outputs along the channel dimension. Three of them are 2D convolutions
    of sizes 1x1, 3x3 and 5x5. Convolution paths of 3x3 and 5x5 sizes have 1x1
    convolutions (called projections) ahead of them. The other path consists of
    1x1 convolution (projection) and 3x3 max pooling.

    The output array has the same spatial size as the input. In order to satisfy
    this, Inception module uses appropriate padding for each convolution and
    pooling.

    See: `Going Deeper with Convolutions <http://arxiv.org/abs/1409.4842>`_.

    Args:
        in_channels (int): Number of channels of input arrays.
        out1 (int): Output size of 1x1 convolution path.
        proj3 (int): Projection size of 3x3 convolution path.
        out3 (int): Output size of 3x3 convolution path.
        proj5 (int): Projection size of 5x5 convolution path.
        out5 (int): Output size of 5x5 convolution path.
        proj_pool (int): Projection size of max pooling path.

    Returns:
        Variable: Output variable. Its array has the same spatial size and the
            same minibatch size as the input array. The channel dimension has
            size ``out1 + out3 + out5 + proj_pool``.

    .. note::

       This function inserts the full computation graph of the Inception module behind
       the input array. This function itself is not inserted into the
       computation graph.

    """
    def __init__(self, in_channels, out1, proj3, out3, proj5, out5, proj_pool):
        self.f = FunctionSet(
            conv1 = Convolution2D(in_channels, out1,      1),
            proj3 = Convolution2D(in_channels, proj3,     1),
            conv3 = Convolution2D(proj3,       out3,      3, pad=1),
            proj5 = Convolution2D(in_channels, proj5,     1),
            conv5 = Convolution2D(proj5,       out5,      5, pad=2),
            projp = Convolution2D(in_channels, proj_pool, 1),
        )

    def forward(self, x):
        self.x = Variable(x[0])
        out1 = self.f.conv1(self.x)
        out3 = self.f.conv3(relu(self.f.proj3(self.x)))
        out5 = self.f.conv5(relu(self.f.proj5(self.x)))
        pool = self.f.projp(max_pooling_2d(self.x, 3, stride=1, pad=1))
        self.y = relu(concat((out1, out3, out5, pool), axis=1))

        return self.y.data,

    def backward(self, x, gy):
        self.y.grad = gy[0]
        self.y.backward()
        return self.x.grad,

    def to_gpu(self, device=None):
        return self.f.to_gpu(device)

    def to_cpu(self):
        return self.f.to_cpu()

    @property
    def parameters(self):
        return self.f.parameters

    @parameters.setter
    def parameters(self, params):
        self.f.parameters = params

    @property
    def gradients(self):
        return self.f.gradients

    @gradients.setter
    def gradients(self, grads):
        self.f.gradients = grads
Example #3
0
class CNN:
    file_names = None

    def __init__(self):
        self.optimizer = optimizers.Adam()
        self.model_name = "cnn_nantyara"
        if os.path.exists(self.model_name):
            self.load_model()
        else:
            self.crete_model()
        self.optimizer.setup(self.model.collect_parameters())

    def crete_model(self):
        self.model = FunctionSet(
            conv1=F.Convolution2D(3, 32, 3),
            bn1=F.BatchNormalization(32),
            conv2=F.Convolution2D(32, 64, 3, pad=1),
            bn2=F.BatchNormalization(64),
            conv3=F.Convolution2D(64, 64, 3, pad=1),
            fl4=F.Linear(1024, 256),
            fl5=F.Linear(256, 2),
        )

    def get_data(self, ifpath, image_categories, reshape_size=(3, 32, 32)):
        x = []
        x_apd = x.append
        y = []
        y_apd = y.append

        for i_category, category in enumerate(image_categories):
            for i_num in xrange(1, self.get_num_of_images(ifpath, category)):
                image = np.array(Image.open(ifpath + "/" + category + str(i_num) + ".jpeg"), dtype=np.float32).reshape(
                    reshape_size
                )
                x_apd(image)
                y_apd(i_category)

        self.N = len(x)
        return x, np.array(y, dtype=np.int32)

    def get_data_for_predict(self, ifpath, image_name, reshape_size=(3, 32, 32)):
        image = np.array(Image.open(ifpath + "/" + image_name), dtype=np.float32)
        image = cv2.resize(image, (reshape_size[1], reshape_size[2]))
        # print image.shape
        image = image.reshape(reshape_size)
        return [image]

    def forward(self, x_data, y_data, train=True):
        x, t = Variable(np.array(x_data)), Variable(y_data)
        h1 = F.max_pooling_2d(F.relu(self.model.bn1(self.model.conv1(x))), 2)
        h2 = F.max_pooling_2d(F.relu(self.model.bn2(self.model.conv2(h1))), 2)
        h3 = F.max_pooling_2d(F.relu(self.model.conv3(h2)), 2)
        h4 = F.dropout(F.relu(self.model.fl4(h3)), train=train)
        y = self.model.fl5(h4)

        if train:
            return F.softmax_cross_entropy(y, t), F.accuracy(y, t)
        else:
            res = [d for data in F.softmax(y).data for d in data]
            # print res
            return np.array(res).argmax() if len([r for r in res if r > 0.5]) > 0 else "unknown"

    def get_num_of_images(self, path, image_name):
        cmd = "ls images|grep %s|wc -l" % (image_name)
        return int(subprocess.check_output(cmd, shell=True))

    def dump_model(self):
        self.model.to_cpu()
        with open(self.model_name, "wb") as f:
            pickle.dump(self.model, f, -1)

    def load_model(self):
        with open(self.model_name, "rb") as f:
            self.model = pickle.load(f)

    def fit(self, x_train, y_train, epoch=20, batchsize=100):
        for epoch in xrange(1, epoch + 1):
            print "epoch", epoch
            # training
            sum_accuracy = 0
            sum_loss = 0
            for i in xrange(0, self.N, batchsize):
                self.optimizer.zero_grads()
                loss, acc = self.forward(x_train[i : i + batchsize], y_train[i : i + batchsize])
                loss.backward()
                self.optimizer.update()
            print "train mean loss=%s, accuracy =%s" % (str(loss.data), str(acc.data))
        self.dump_model()

    def predict(self, x):
        y = self.forward(x, np.zeros(1, dtype=np.int32), train=False)
        sys.stdout.write(str(self.file_names[y]) if y != "unknonw" else "unknonw")