for m, n, k, at, bt in config: dimA = (k,m) if at else (m,k) dimB = (n,k) if bt else (k,n) dimC = (m,n) opA = 'T' if at else 'N' opB = 'T' if bt else 'N' op = opA + opB dtype_data = list() for dtype in ( np.float32, np.float16 ): #np.float32, np.float16, A = ng.empty(dimA, dtype=dtype) B = ng.empty(dimB, dtype=dtype) C = ng.empty(dimC, dtype=dtype) if at: A = A.T if bt: B = B.T data = matmul(A, B, C, bench=True) # if dtype is np.float16: # print "" # for d in sorted(data): # print "%7.3f %5.0f %22s %5d" % d cublas = data.pop() openai = sorted(data)[0]
inception = True # find the size of the largest buffers so they can be shared if layer.sizeF > max_weights: max_weights = layer.sizeF max_weight_layer = layer if layer.sizeI > max_deltas and type(prev_layer) is not DataLayer: max_deltas = layer.sizeI max_delta_layer = layer prev_layer = layer layers.append(layer) # Init shared buffers (assumes consistent dtype for now) shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype)) shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype)) if inception: shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype)) shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype)) shared_updates = ng.empty(max_weight_layer.dimF, dtype=np.float32) for i, layer in enumerate(layers): if verbose: neon_logger.display(layer) # Intitalize buffers. Alernate shared delta buffer. # One layer can't have the same buffer for both error in and error out. layer.init_activations() layer.init_weights(shared=shared_updates, zeros=zeros)
dimI2 = layer.dimO dimO = layer.dimF opts = update_opts func = layer.update_conv else: raise TypeError("Unknown Kernel Class") if not compound: opts = [ dict() ] if ones: vals = 1.0 else: vals = (0.5 - ng.rand()) * 2 devI1 = ng.empty(dimI1, dtype=dtype) devI2 = ng.empty(dimI2, dtype=dtype) devO = ng.empty(dimO, dtype=dtype) devI1[:] = vals devI2[:] = vals devO[:] = vals cpuI1 = nc.array(devI1.get(), dtype=np.float64) cpuI2 = nc.array(devI2.get(), dtype=np.float64) cpuO = nc.array(devO.get(), dtype=np.float64) if compound and opts is not update_opts: devB = ng.empty((dimO[0], 1), dtype=np.float32) devS = ng.empty((dimO[0], 1), dtype=np.float32) devB[:] = vals devS[:] = vals cpuB = nc.array(devB.get(), dtype=np.float64)