Esempio n. 1
0
 def forward(self):
     N = self.num_output
     K = np.prod(self.bottom.shape[1:])
     M = self.bottom.shape[0]
     sgemm(False, True, 1.0, self.bottom, 0, K, self.weights, 0, K, 0.0,
           self.top, 0, N, M, N, K)
     sgemm(False, False, 1.0, self.bias_multiplier, 0, 1, self.bias, 0, N,
           1.0, self.top, 0, N, M, N, 1)
Esempio n. 2
0
 def backward(self):
     N = self.num_output
     K = np.prod(self.bottom.shape[1:])
     M = self.bottom.shape[0]
     sgemm(True, False, 1.0, self.top_diff, 0, N, self.bottom, 0, K, 0.0,
           self.weights_diff, 0, K, N, K, M)
     sgemv(True, M, N, 1.0, self.top_diff, 0, N, self.bias_multiplier, 0, 1,
           0.0, self.bias_diff, 0, 1)
     sgemm(False, False, 1.0, self.top_diff, 0, N, self.weights, 0,
           K, 0.0, self.bottom_diff, 0, K, M, K, N)
 def launch(self, symbol_table, wait_for):
     bottom = symbol_table[sources[0].name]
     weights = symbol_table[sources[1].name]
     bias = symbol_table[sources[2].name]
     top = symbol_table[sinks[0].name]
     evt = sgemm(False, True, 1.0, bottom, 0, K, weights,
                 0, K, 0.0, top, 0, N, M, N, K, wait_for=wait_for)
     evt = sgemm(False, False, 1.0, bias_multiplier, 0,
                 1, bias, 0, N, 1.0, top, 0, N, M, N,
                 1, wait_for=evt)
     return [evt]
Esempio n. 4
0
            def launch(self, symbol_table, wait_for):
                queue = queues[0]
                bottom = symbol_table[sources[0]]
                bot_offset = np.prod(bottom.shape[1:])
                top_diff = symbol_table[sources[1]]
                top_offset = np.prod(top_diff.shape[1:])
                weights = symbol_table[sources[2]]
                bottom_diff = symbol_table[sinks[0]]
                bottom_diff.fill(0)
                bottom_diff.sync_ocl()
                weights_diff = symbol_table[sinks[1]]
                weights_diff.fill(0)
                weights_diff.sync_ocl()
                bias_diff = symbol_table[sinks[2]]
                bias_diff.fill(0)
                bias_diff.sync_ocl()
                for i in range(bottom.shape[0]):
                    n = np.prod(top_diff.shape[2:])
                    sgemv(False, top_diff.shape[1],
                          n, 1.0, top_diff, i *
                          top_offset, n, bias_multiplier, 0, 1, 1.0,
                          bias_diff, 0, 1)
                    im2col(bottom.ocl_buf, col_data.ocl_buf, i
                           * bot_offset).on(queue, im2col_global_size)
                    m = top_diff.shape[1]
                    n = col_data.shape[0]
                    k = col_data.shape[1]

                    sgemm(False, True, 1.0, top_diff, i *
                          top_offset, k, col_data, 0, k, 1.0,
                          weights_diff, 0, n, m, n, k)

                    m = weights.shape[1]
                    n = col_data.shape[1]
                    k = weights.shape[0]

                    sgemm(True, False, 1.0, weights, 0, m,
                          top_diff, i * top_offset, n, 0.0,
                          col_data, 0, n,
                          m, n, k)
                    col2im(col_data.ocl_buf,
                           bottom_diff.ocl_buf, i *
                           bot_offset).on(queue, col2im_global_size)
 def launch(self, symbol_table, wait_for):
     bottom = symbol_table[sources[0].name]
     weights = symbol_table[sources[1].name]
     bias = symbol_table[sources[2].name]
     top = symbol_table[sinks[0].name]
     evt = sgemm(False,
                 True,
                 1.0,
                 bottom,
                 0,
                 K,
                 weights,
                 0,
                 K,
                 0.0,
                 top,
                 0,
                 N,
                 M,
                 N,
                 K,
                 wait_for=wait_for)
     evt = sgemm(False,
                 False,
                 1.0,
                 bias_multiplier,
                 0,
                 1,
                 bias,
                 0,
                 N,
                 1.0,
                 top,
                 0,
                 N,
                 M,
                 N,
                 1,
                 wait_for=evt)
     return [evt]
Esempio n. 6
0
 def launch(self, symbol_table, wait_for):
     bottom = symbol_table[sources[0].name]
     bot_offset = np.prod(bottom.shape[1:])
     weights = symbol_table[sources[1].name]
     bias = symbol_table[sources[2].name]
     top = symbol_table[sinks[0].name]
     top_offset = np.prod(top.shape[1:])
     m = weights.shape[0]
     n = np.prod(top.shape[2:])
     k = np.prod(weights.shape[1:])
     # cl.clFinish(queues[0])
     evts = []
     if is_1x1:
         for i in range(bottom.shape[0]):
             evt = sgemm(False, False, 1.0, weights, 0, k,
                         bottom, i * bot_offset, n, 0.0,
                         top, i * top_offset, n, m, n,
                         k, queues[i % len(queues)], wait_for=wait_for)
             evt = sgemm(False, False, 1.0, bias, 0, 1,
                         bias_multiplier, 0, n, 1.0, top, i *
                         top_offset, n, m, n, 1, queues[i % len(queues)], wait_for=evt)
             evts.append(evt)
     else:
         for i in range(bottom.shape[0]):
             evt = im2col(bottom.ocl_buf,
                         col_datas[i % len(queues)].ocl_buf,
                         i * bot_offset
                         ).on(queues[i % len(queues)], (padded, ),
                             wait_for=wait_for)
             evt = sgemm(False, False, 1.0, weights, 0, k,
                         col_datas[i % len(queues)],
                         0, n, 0.0, top, i * top_offset, n, m, n,
                         k, queues[i % len(queues)], wait_for=evt)
             evt = sgemm(False, False, 1.0, bias, 0, 1,
                         bias_multiplier, 0, n, 1.0, top, i *
                         top_offset, n, m, n, 1, queues[i % len(queues)], wait_for=evt)
             evts.append(evt)
     return evts
Esempio n. 7
0
            def launch(self, symbol_table, wait_for):
                queue = queues[0]
                bottom = symbol_table[sources[0]]
                bot_offset = np.prod(bottom.shape[1:])
                top_diff = symbol_table[sources[1]]
                top_offset = np.prod(top_diff.shape[1:])
                weights = symbol_table[sources[2]]
                bottom_diff = symbol_table[sinks[0]]
                bottom_diff.fill(0)
                bottom_diff.sync_ocl()
                weights_diff = symbol_table[sinks[1]]
                weights_diff.fill(0)
                weights_diff.sync_ocl()
                bias_diff = symbol_table[sinks[2]]
                bias_diff.fill(0)
                bias_diff.sync_ocl()
                for i in range(bottom.shape[0]):
                    n = np.prod(top_diff.shape[2:])
                    sgemv(False, top_diff.shape[1], n, 1.0, top_diff,
                          i * top_offset, n, bias_multiplier, 0, 1, 1.0,
                          bias_diff, 0, 1)
                    im2col(bottom.ocl_buf, col_data.ocl_buf,
                           i * bot_offset).on(queue, im2col_global_size)
                    m = top_diff.shape[1]
                    n = col_data.shape[0]
                    k = col_data.shape[1]

                    sgemm(False, True, 1.0, top_diff, i * top_offset, k,
                          col_data, 0, k, 1.0, weights_diff, 0, n, m, n, k)

                    m = weights.shape[1]
                    n = col_data.shape[1]
                    k = weights.shape[0]

                    sgemm(True, False, 1.0, weights, 0, m, top_diff,
                          i * top_offset, n, 0.0, col_data, 0, n, m, n, k)
                    col2im(col_data.ocl_buf, bottom_diff.ocl_buf,
                           i * bot_offset).on(queue, col2im_global_size)
def forward(data):
    global fc6, fc7, fc8
    conv1 = ConvForward(data, conv1_filters, conv1_bias, kernel_size=(11, 11), padding=(0, 0), stride=(4, 4))
    conv1 = ReluForward(conv1)
    norm1, norm1_scale = LrnForward(conv1, alpha=alpha, beta=beta, local_size=local_size, k=1)
    pool1, pool1_mask = PoolForward(norm1, kernel_size=(3, 3), padding=(0, 0), stride=(2, 2))

    conv2 = ConvForward(pool1, conv2_filters, conv2_bias, kernel_size=(5, 5), padding=(2, 2), stride=(1, 1))
    conv2 = ReluForward(conv2)
    norm2, norm2_scale = LrnForward(conv2, alpha=alpha, beta=beta, local_size=local_size, k=1)
    pool2, pool2_mask = PoolForward(norm2, kernel_size=(3, 3), padding=(0, 0), stride=(2, 2))

    conv3 = ConvForward(pool2, conv3_filters, conv3_bias, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1))
    conv3 = ReluForward(conv3)

    conv4 = ConvForward(conv3, conv4_filters, conv4_bias, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1))
    conv4 = ReluForward(conv4)

    conv5 = ConvForward(conv4, conv5_filters, conv5_bias, kernel_size=(3, 3), padding=(1, 1), stride=(1, 1))
    conv5 = ReluForward(conv5)
    pool5, pool5_mask = PoolForward(conv5, kernel_size=(3, 3), padding=(0, 0), stride=(2, 2))

    N = fc6.shape[1]
    K = np.prod(pool5.shape[1:])
    M = pool5.shape[0]
    sgemm(False, True, 1.0, pool5, 0, K, fc6_filters, 0, K, 0.0, fc6, 0, N, M, N, K)
    sgemm(False, False, 1.0, fc6_bias_multiplier, 0, 1, fc6_bias, 0, N, 1.0, fc6, 0, N, M, N, 1)

    fc6 = ReluForward(fc6)

    N = fc7.shape[1]
    K = np.prod(fc6.shape[1:])
    M = fc6.shape[0]
    sgemm(False, True, 1.0, fc6, 0, K, fc7_filters, 0, K, 0.0, fc7, 0, N, M, N, K)
    sgemm(False, False, 1.0, fc7_bias_multiplier, 0, 1, fc7_bias, 0, N, 1.0, fc7, 0, N, M, N, 1)
    fc7 = ReluForward(fc7)

    N = fc8.shape[1]
    K = np.prod(fc7.shape[1:])
    M = fc7.shape[0]
    sgemm(False, True, 1.0, fc7, 0, K, fc8_filters, 0, K, 0.0, fc8, 0, N, M, N, K)
    sgemm(False, False, 1.0, fc8_bias_multiplier, 0, 1, fc8_bias, 0, N, 1.0, fc8, 0, N, M, N, 1)
    prob = SoftmaxForward(fc8)
    return prob
Esempio n. 9
0
def forward(data):
    global fc6, fc7, fc8
    conv1 = ConvForward(data,
                        conv1_filters,
                        conv1_bias,
                        kernel_size=(11, 11),
                        padding=(0, 0),
                        stride=(4, 4))
    conv1 = ReluForward(conv1)
    norm1, norm1_scale = LrnForward(conv1,
                                    alpha=alpha,
                                    beta=beta,
                                    local_size=local_size,
                                    k=1)
    pool1, pool1_mask = PoolForward(norm1,
                                    kernel_size=(3, 3),
                                    padding=(0, 0),
                                    stride=(2, 2))

    conv2 = ConvForward(pool1,
                        conv2_filters,
                        conv2_bias,
                        kernel_size=(5, 5),
                        padding=(2, 2),
                        stride=(1, 1))
    conv2 = ReluForward(conv2)
    norm2, norm2_scale = LrnForward(conv2,
                                    alpha=alpha,
                                    beta=beta,
                                    local_size=local_size,
                                    k=1)
    pool2, pool2_mask = PoolForward(norm2,
                                    kernel_size=(3, 3),
                                    padding=(0, 0),
                                    stride=(2, 2))

    conv3 = ConvForward(pool2,
                        conv3_filters,
                        conv3_bias,
                        kernel_size=(3, 3),
                        padding=(1, 1),
                        stride=(1, 1))
    conv3 = ReluForward(conv3)

    conv4 = ConvForward(conv3,
                        conv4_filters,
                        conv4_bias,
                        kernel_size=(3, 3),
                        padding=(1, 1),
                        stride=(1, 1))
    conv4 = ReluForward(conv4)

    conv5 = ConvForward(conv4,
                        conv5_filters,
                        conv5_bias,
                        kernel_size=(3, 3),
                        padding=(1, 1),
                        stride=(1, 1))
    conv5 = ReluForward(conv5)
    pool5, pool5_mask = PoolForward(conv5,
                                    kernel_size=(3, 3),
                                    padding=(0, 0),
                                    stride=(2, 2))

    N = fc6.shape[1]
    K = np.prod(pool5.shape[1:])
    M = pool5.shape[0]
    sgemm(False, True, 1.0, pool5, 0, K, fc6_filters, 0, K, 0.0, fc6, 0, N, M,
          N, K)
    sgemm(False, False, 1.0, fc6_bias_multiplier, 0, 1, fc6_bias, 0, N, 1.0,
          fc6, 0, N, M, N, 1)

    fc6 = ReluForward(fc6)

    N = fc7.shape[1]
    K = np.prod(fc6.shape[1:])
    M = fc6.shape[0]
    sgemm(False, True, 1.0, fc6, 0, K, fc7_filters, 0, K, 0.0, fc7, 0, N, M, N,
          K)
    sgemm(False, False, 1.0, fc7_bias_multiplier, 0, 1, fc7_bias, 0, N, 1.0,
          fc7, 0, N, M, N, 1)
    fc7 = ReluForward(fc7)

    N = fc8.shape[1]
    K = np.prod(fc7.shape[1:])
    M = fc7.shape[0]
    sgemm(False, True, 1.0, fc7, 0, K, fc8_filters, 0, K, 0.0, fc8, 0, N, M, N,
          K)
    sgemm(False, False, 1.0, fc8_bias_multiplier, 0, 1, fc8_bias, 0, N, 1.0,
          fc8, 0, N, M, N, 1)
    prob = SoftmaxForward(fc8)
    return prob
Esempio n. 10
0
 def launch(self, symbol_table, wait_for):
     bottom = symbol_table[sources[0].name]
     bot_offset = np.prod(bottom.shape[1:])
     weights = symbol_table[sources[1].name]
     bias = symbol_table[sources[2].name]
     top = symbol_table[sinks[0].name]
     top_offset = np.prod(top.shape[1:])
     m = weights.shape[0]
     n = np.prod(top.shape[2:])
     k = np.prod(weights.shape[1:])
     # cl.clFinish(queues[0])
     evts = []
     if is_1x1:
         for i in range(bottom.shape[0]):
             evt = sgemm(False,
                         False,
                         1.0,
                         weights,
                         0,
                         k,
                         bottom,
                         i * bot_offset,
                         n,
                         0.0,
                         top,
                         i * top_offset,
                         n,
                         m,
                         n,
                         k,
                         queues[i % len(queues)],
                         wait_for=wait_for)
             evt = sgemm(False,
                         False,
                         1.0,
                         bias,
                         0,
                         1,
                         bias_multiplier,
                         0,
                         n,
                         1.0,
                         top,
                         i * top_offset,
                         n,
                         m,
                         n,
                         1,
                         queues[i % len(queues)],
                         wait_for=evt)
             evts.append(evt)
     else:
         for i in range(bottom.shape[0]):
             evt = im2col(bottom.ocl_buf,
                          col_datas[i % len(queues)].ocl_buf,
                          i * bot_offset).on(
                              queues[i % len(queues)],
                              (padded, ),
                              wait_for=wait_for)
             evt = sgemm(False,
                         False,
                         1.0,
                         weights,
                         0,
                         k,
                         col_datas[i % len(queues)],
                         0,
                         n,
                         0.0,
                         top,
                         i * top_offset,
                         n,
                         m,
                         n,
                         k,
                         queues[i % len(queues)],
                         wait_for=evt)
             evt = sgemm(False,
                         False,
                         1.0,
                         bias,
                         0,
                         1,
                         bias_multiplier,
                         0,
                         n,
                         1.0,
                         top,
                         i * top_offset,
                         n,
                         m,
                         n,
                         1,
                         queues[i % len(queues)],
                         wait_for=evt)
             evts.append(evt)
     return evts