Beispiel #1
0
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pycuda.driver as drv
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
from scikits.cuda import cublas

print context.get_device().name()

handle = cublas.cublasCreate()

start, end = (drv.Event(), drv.Event())


def cublas_dot(A, B, C, alpha=1.0, beta=0.0, repeat=1):

    lda = max(A.strides) // 4
    ldb = max(B.strides) // 4
    ldc = max(C.strides) // 4

    opA = 't' if A.is_trans else 'n'
    opB = 't' if B.is_trans else 'n'
    op = opB + opA
Beispiel #2
0
# 
#    http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pycuda.driver as drv
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
from scikits.cuda import cublas

print(context.get_device().name())

start, end = (drv.Event(), drv.Event())

handle = cublas.cublasCreate()

def cublas_dot(A, B, C, alpha=1.0, beta=0.0, repeat=1):

    lda = max(A.strides) // 4
    ldb = max(B.strides) // 4
    ldc = max(C.strides) // 4

    opA = 't' if A.is_trans else 'n'
    opB = 't' if B.is_trans else 'n'
    op  = opB + opA
Beispiel #3
0
# 
#    http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy         as np
import pycuda.driver as drv
from nervanagpu      import NervanaGPU
from pycuda.autoinit import context
from operator        import mul

print context.get_device().name()

np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .3f" % x})

dtype  = np.float16
cpu    = 1
repeat = 1

ng = NervanaGPU(stochastic_round=False, bench=True)

pool = ng.pool_layer(
    "max",
    64,         # N
    64,1,64,64, # C,D,H,W
    4,1,2,2,    # J,T,R,S
    0,0,0,0,    # padding
Beispiel #4
0
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import pycuda.driver as drv
from pycuda.autoinit import context
from nervanagpu import NervanaGPU
from nervanagpu.layers import DataLayer, ConvLayer, PoolLayer, FullLayer
print(context.get_device().name())

# Compare results here:
# https://github.com/soumith/convnet-benchmarks

# number of full iterations
loops = 10
# show bechmark details for each layer
layer_bench = 0
# show layer stats after each operation
print_stats = 0

ng = NervanaGPU(bench=layer_bench)

# don't learn, just benchmark
momentum = 0.0