def _VerifyValues(self, input_sizes, filter_sizes, stride, padding, expected): """Tests that tf.nn.conv2d produces the expected value. Args: input_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols, input_depth, output_depth]. stride: Stride. padding: Padding type. expected: Expected output. """ total_size_1 = np.prod(input_sizes) total_size_2 = np.prod(filter_sizes) x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(input_sizes) x2 = np.arange(1, total_size_2 + 1, dtype=np.float32).reshape(filter_sizes) strides = [1, stride, stride, 1] with self.test_session() as sess: with self.test_scope(): t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes) t2 = array_ops.placeholder(dtypes.float32, shape=filter_sizes) out = nn_ops.conv2d( t1, t2, strides=strides, padding=padding, data_format="NHWC") value = sess.run(out, {t1: x1, t2: x2}) self.assertArrayNear(expected, np.ravel(value), 1e-3)
def __init__(self, filter_shape, image_shape, poolsize=(2, 2), activation_fn=sigmoid): """`filter_shape` is a tuple of length 4, whose entries are the number of filters, the number of input feature maps, the filter height, and the filter width. `image_shape` is a tuple of length 4, whose entries are the mini-batch size, the number of input feature maps, the image height, and the image width. `poolsize` is a tuple of length 2, whose entries are the y and x pooling sizes. """ self.filter_shape = filter_shape self.image_shape = image_shape self.poolsize = poolsize self.activation_fn=activation_fn # initialize weights and biases n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize)) self.w = theano.shared( np.asarray( np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape), dtype=theano.config.floatX), borrow=True) self.b = theano.shared( np.asarray( np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)), dtype=theano.config.floatX), borrow=True) self.params = [self.w, self.b]
def na_op(x, y): try: result = expressions.evaluate( op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, pd.Series)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) xrav = xrav[mask] yrav = yrav[mask] if np.prod(xrav.shape) and np.prod(yrav.shape): result[mask] = op(xrav, yrav) elif hasattr(x,'size'): result = np.empty(x.size, dtype=x.dtype) mask = notnull(xrav) xrav = xrav[mask] if np.prod(xrav.shape): result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between objects " "of type {x} and {y}".format(op=name,x=type(x),y=type(y))) result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = com._fill_zeros(result, x, y, name, fill_zeros) return result
def total_tensor_depth(tensor=None, tensor_shape=None): """Returns the size of a tensor without the first (batch) dimension""" if tensor is None and tensor_shape is None: raise ValueError('a tensor or a tensor shape is required.') if tensor_shape: return int(np.prod(tensor_shape[1:])) return int(np.prod(get_shape(tensor)[1:]))
def init_conv_filters(self, numpy_rng, D, poolsize): ''' Convolutional Filters ''' # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(self.filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" pooling size fan_out = (self.filter_shape[0] * np.prod(self.filter_shape[2:]) / np.prod(poolsize)) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( init_conv_weights(-W_bound, W_bound, \ self.filter_shape, numpy_rng),borrow=True, name='W_conv') #b_values = np.zeros((self.filter_shape[0],), dtype=theano.config.floatX) #self.b = theano.shared(value=b_values, borrow=True, name='b_conv') c_values = np.zeros((self.filter_shape[1],), dtype=theano.config.floatX) self.c = theano.shared(value=c_values, borrow=True, name='b_conv') self.params = [self.W, self.c]
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def reshape(self, *shape): if len(shape) == 1 and isinstance(shape[0], (list, tuple)): shape = shape[0] if self.elemstrides == (1,): size = int(np.prod(shape)) if size != self.size: raise ShapeMismatch(shape, self.shape) elemstrides = [1] for si in reversed(shape[1:]): elemstrides = [si * elemstrides[0]] + elemstrides return SignalView( base=self.base, shape=shape, elemstrides=elemstrides, offset=self.offset) elif self.size == 1: # -- scalars can be reshaped to any number of (1, 1, 1...) size = int(np.prod(shape)) if size != self.size: raise ShapeMismatch(shape, self.shape) elemstrides = [1] * len(shape) return SignalView( base=self.base, shape=shape, elemstrides=elemstrides, offset=self.offset) else: # -- there are cases where reshaping can still work # but there are limits too, because we can only # support view-based reshapes. So the strides have # to work. raise NotImplementedError('reshape of strided view')
def test_tied_biases(): x = tensor.tensor4('x') num_channels = 4 num_filters = 3 batch_size = 5 filter_size = (3, 3) conv = Convolutional(filter_size, num_filters, num_channels, weights_init=Constant(1.), biases_init=Constant(2.), tied_biases=True) conv.initialize() y = conv.apply(x) func = function([x], y) # Tied biases allows to pass images of different sizes x_val_1 = numpy.ones((batch_size, num_channels, 10, 12), dtype=theano.config.floatX) x_val_2 = numpy.ones((batch_size, num_channels, 23, 19), dtype=theano.config.floatX) assert_allclose(func(x_val_1), numpy.prod(filter_size) * num_channels * numpy.ones((batch_size, num_filters, 8, 10)) + 2) assert_allclose(func(x_val_2), numpy.prod(filter_size) * num_channels * numpy.ones((batch_size, num_filters, 21, 17)) + 2)
def sample(self, n, d=None, rng=np.random): if d is not None and np.prod(self.options.shape[1:]) != d: raise ValueError("Options must be of dimensionality %d " "(got %d)" % (d, np.prod(self.options.shape[1:]))) i = np.searchsorted(np.cumsum(self.p), rng.rand(n)) return self.options[i]
def _get_or_reshape(name, shared_data_arrays, arg_shape, arg_type, context, logger): """Internal helper to get a memory block or re-use by re-shaping""" if name in shared_data_arrays: arg_arr = shared_data_arrays[name] if np.prod(arg_arr.shape) >= np.prod(arg_shape): # nice, we can directly re-use this data blob assert arg_arr.dtype == arg_type arg_arr = arg_arr.reshape(arg_shape) else: logger.warning(('bucketing: data "%s" has a shape %s' % (name, arg_shape)) + (', which is larger than already allocated ') + ('shape %s' % (arg_arr.shape,)) + ('. Need to re-allocate. Consider putting ') + ('default_bucket_key to') + (' be the bucket taking the largest input for better ') + ('memory sharing.')) arg_arr = nd.zeros(arg_shape, context, dtype=arg_type) # replace existing shared array because the new one is bigger shared_data_arrays[name] = arg_arr else: arg_arr = nd.zeros(arg_shape, context, dtype=arg_type) shared_data_arrays[name] = arg_arr return arg_arr
def process(self, image, out=None): # 0.25 is the default value used in Ng's paper alpha = self.specs.get('alpha', 0.25) # check if we would like to do two-side thresholding. Default yes. if self.specs.get('twoside', True): # concatenate, and make sure the output is C_CONTIGUOUS # for the temporary product, we check if we can utilize the # buffer to save allocation time product = mathutil.dot_image(image, self.dictionary.T) imshape = product.shape[:-1] N = product.shape[-1] product.resize((np.prod(imshape), N)) if out is None: out = np.empty((np.prod(imshape), N*2)) else: out.resize((np.prod(imshape), N*2)) out[:,:N] = product out[:,N:] = -product out.resize(imshape + (N*2,)) elif self.specs['twoside'] == 'abs': out = mathutil.dot_image(image, self.dictionary.T, out=out) np.abs(out, out=out) else: out = mathutil.dot_image(image, self.dictionary.T, out=out) # do threshold out -= alpha np.clip(out, 0., np.inf, out=out) return out
def test_valid_1_3_11_12(): seed_rng() shapes = get_valid_shapes() version = [1, 3, 11, 12] verbose = 0 random = True print_ = False ones = False if ones: random = False shapes2 = [] for id, (ishape, kshape, subshape, istride, kstride) in enumerate(shapes): oshape = [ishape[0]] + [kshape[0]] + list(numpy.asarray(ishape[2:]) - numpy.asarray(kshape[2:]) + numpy.asarray([1, 1])) if oshape[3] > device_prop['maxThreadsDim0']: continue if ((numpy.prod(ishape[2:]) + numpy.prod(kshape[2:])) * 4 > (16 * 1024 - 150)): continue if subshape == (1, 1): shapes2.append((ishape, kshape, subshape, istride, kstride)) shapes = shapes2 exec_conv(version, shapes, verbose, random, 'valid', print_=print_, ones=ones, rtol=1.1e-5)
def checker(input_var,desire_size): if input_var is None: print('input_variable does not exist!') if desire_size is None: print('desire_size does not exist!') dd=numpy.size(desire_size) dims = numpy.shape(input_var) # print('dd=',dd,'dims=',dims) if numpy.isnan(numpy.sum(input_var[:])): print('input has NaN') if numpy.ndim(input_var) < dd: print('input signal has too few dimensions') if dd > 1: if dims[0:dd] != desire_size[0:dd]: print(dims[0:dd]) print(desire_size) print('input signal has wrong size1') elif dd == 1: if dims[0] != desire_size: print(dims[0]) print(desire_size) print('input signal has wrong size2') if numpy.mod(numpy.prod(dims),numpy.prod(desire_size)) != 0: print('input signal shape is not multiples of desired size!')
def check_log_prob(self, is_gpu): smp = self.sample_for_test() if is_gpu: log_prob1 = self.gpu_dist.log_prob(cuda.to_gpu(smp)).data else: log_prob1 = self.cpu_dist.log_prob(smp).data if self.continuous: scipy_prob = self.scipy_dist.logpdf else: scipy_prob = self.scipy_dist.logpmf if self.scipy_onebyone: onebyone_smp = smp.reshape( (int(numpy.prod(self.sample_shape)), numpy.prod(self.shape), int(numpy.prod(self.event_shape)))) onebyone_smp = numpy.swapaxes(onebyone_smp, 0, 1) onebyone_smp = onebyone_smp.reshape((-1,) + self.sample_shape + self.event_shape) log_prob2 = [] for one_params, one_smp in zip( self.scipy_onebyone_params_iter(), onebyone_smp): log_prob2.append(scipy_prob(one_smp, **one_params)) log_prob2 = numpy.vstack(log_prob2) log_prob2 = log_prob2.reshape(numpy.prod(self.shape), -1).T log_prob2 = log_prob2.reshape(self.sample_shape + self.shape) else: log_prob2 = scipy_prob(smp, **self.scipy_params) array.assert_allclose(log_prob1, log_prob2)
def annotate_bn(self, var, id, var_type, mb_size, size, norm_ax): var_shape = np.array((1,) + size) out_dim = np.prod(var_shape) / np.prod(var_shape[list(norm_ax)]) # Flatten the var - shared variable updating is not trivial otherwise, # as theano seems to believe a row vector is a matrix and will complain # about the updates orig_shape = var.shape var = var.flatten() # Here we add the name and role, the variables will later be identified # by these values var.name = id + '_%s_clean' % var_type add_role(var, BNPARAM) shared_var = self.shared(np.zeros(out_dim), name='shared_%s' % var.name, role=None) # Update running average estimates. When the counter is reset to 1, it # will clear its memory cntr, c_up = self.counter() one = np.float32(1) run_avg = lambda new, old: one / cntr * new + (one - one / cntr) * old if var_type == 'mean': new_value = run_avg(var, shared_var) elif var_type == 'var': mb_size = T.cast(mb_size, 'float32') new_value = run_avg(mb_size / (mb_size - one) * var, shared_var) else: raise NotImplemented('Unknown batch norm var %s' % var_type) # Add the counter update to the annotated update if it is the first # instance of a counter self.annotate_update([(shared_var, new_value)] + c_up, var) return var.reshape(orig_shape)
def _VerifyValues(self, input_sizes=None, filter_sizes=None, strides=None, dilations=None, padding=None, data_format_src="NHWC", data_format_dst="NHWC", expected=None): """Tests that tf.nn.conv2d produces the expected value. Args: input_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_sizes: Filter tensor dimensions in [kernel_rows, kernel_cols, input_depth, output_depth]. strides: Strides. dilations: RHS dilations. padding: Padding type. data_format_src: Data format input is in. data_format_dst: Data format verification will run and input is converted to. expected: Expected output. """ total_size_1 = np.prod(input_sizes) total_size_2 = np.prod(filter_sizes) x1 = np.arange(1, total_size_1 + 1, dtype=np.float32).reshape(input_sizes) x2 = np.arange(1, total_size_2 + 1, dtype=np.float32).reshape(filter_sizes) strides = [1] + strides + [1] if dilations is None: dilations = [1, 1] dilations = [1] + dilations + [1] # Convert between data formats. expected = test_utils.ConvertBetweenDataFormats(expected, data_format_src, data_format_dst) x1 = test_utils.ConvertBetweenDataFormats(x1, data_format_src, data_format_dst) input_sizes = test_utils.PermuteDimsBetweenDataFormats( input_sizes, data_format_src, data_format_dst) strides = test_utils.PermuteDimsBetweenDataFormats(strides, data_format_src, data_format_dst) dilations = test_utils.PermuteDimsBetweenDataFormats( dilations, data_format_src, data_format_dst) with self.test_session() as sess: t1 = array_ops.placeholder(dtypes.float32, shape=input_sizes) t2 = array_ops.placeholder(dtypes.float32, shape=filter_sizes) with self.test_scope(): out = nn_ops.conv2d( t1, t2, strides=strides, padding=padding, data_format=data_format_dst, dilations=dilations) value = sess.run(out, {t1: x1, t2: x2}) self.assertAllClose(expected, value, 1e-3)
def boardProd(chessboard, Q, K, R, B, N): # remove 0 chessboard[Q[0]] = 1 chessboard[K[0]] = 1 chessboard[R[0]] = 1 chessboard[B[0]] = 1 chessboard[N[0]] = 1 # remove -1 negind = np.where(chessboard < 0) chessboard[negind] = 1 # -1 is taken care of by abs rprodcurr = abs(np.prod(chessboard % 10, axis=1)) cprodcurr = abs(np.prod(chessboard % 10, axis=0)) # put back 0 and -1 chessboard[Q[0]] = 0 chessboard[K[0]] = 0 chessboard[R[0]] = 0 chessboard[B[0]] = 0 chessboard[N[0]] = 0 chessboard[negind] = -1 return rprodcurr, cprodcurr
def __reshape__(self, newshape, order = 'C'): if(order is not 'C'): raise NotImplementedError if isinstance(newshape,numbers.Number): newshape = (newshape,) # Replace a possible -1 with the if -1 in newshape: newshape = list(newshape) i = newshape.index(-1) newshape[i] = 1 if -1 in newshape: raise ValueError('Only one -1 allowed in shape') newshape[i] = self.size/numpy.prod(newshape) if self.size != numpy.prod(newshape): raise ValueError('total size of new array must be unchanged') if len(newshape) != 0: # No need to modify the af_array for empty shapes # af_shape = numpy.array(pu.c2f(newshape), dtype=pu.dim_t) # s = arrayfire.Array() # arrayfire.backend.get().af_moddims(ctypes.pointer(s.arr), self.d_array.arr, af_shape.size, ctypes.c_void_p(af_shape.ctypes.data)) # self.d_array = s if tuple(newshape) == self.shape: # No need to do anything return af_shape = numpy.array(pu.c2f(newshape), dtype=pu.dim_t) s = arrayfire.Array() arrayfire.backend.get().af_moddims(ctypes.pointer(s.arr), self.d_array.arr, af_shape.size, ctypes.c_void_p(af_shape.ctypes.data)) self.d_array = s self.h_array.shape = newshape self._shape = tuple(newshape)
def test_update_data_with_unaligned_original(self): in_shape = self.input_shapes['2d'] out_shape = self.output_shapes['2d'] input_dtype_alignment = self.get_input_dtype_alignment() axes=(-1,) a, b = self.create_test_arrays(in_shape, out_shape) # Offset from 16 byte aligned to guarantee it's not # 16 byte aligned a__ = empty_aligned( numpy.prod(in_shape)*a.itemsize + input_dtype_alignment, dtype='int8', n=16) a_ = a__[input_dtype_alignment:].view(dtype=self.input_dtype).reshape(*in_shape) a_[:] = a b__ = empty_aligned( numpy.prod(out_shape)*b.itemsize + input_dtype_alignment, dtype='int8', n=16) b_ = b__[input_dtype_alignment:].view(dtype=self.output_dtype).reshape(*out_shape) b_[:] = b fft, ifft = self.run_validate_fft(a_, b_, axes, force_unaligned_data=True) self.run_validate_fft(a, b_, axes, fft=fft, ifft=ifft) self.run_validate_fft(a_, b, axes, fft=fft, ifft=ifft) self.run_validate_fft(a_, b_, axes, fft=fft, ifft=ifft)
def analyze_param(net, layers): # plt.figure() print '\n=============analyze_param start===============' total_nonzero = 0 total_allparam = 0 percentage_list = [] for i, layer in enumerate(layers): i += 1 W = net.params[layer][0].data b = net.params[layer][1].data # plt.subplot(3, 1, i); # numBins = 2 ^ 8 # plt.hist(W.flatten(), numBins, color='blue', alpha=0.8) # plt.show() print 'W(%d) range = [%f, %f]' % (i, min(W.flatten()), max(W.flatten())) print 'W(%d) mean = %f, std = %f' % (i, np.mean(W.flatten()), np.std(W.flatten())) non_zero = (np.count_nonzero(W.flatten()) + np.count_nonzero(b.flatten())) all_param = (np.prod(W.shape) + np.prod(b.shape)) this_layer_percentage = non_zero / float(all_param) total_nonzero += non_zero total_allparam += all_param print 'non-zero W and b cnt = %d' % non_zero print 'total W and b cnt = %d' % all_param print 'percentage = %f\n' % (this_layer_percentage) percentage_list.append(this_layer_percentage) print '=====> summary:' print 'non-zero W and b cnt = %d' % total_nonzero print 'total W and b cnt = %d' % total_allparam print 'percentage = %f' % (total_nonzero / float(total_allparam)) print '=============analyze_param ends ===============' return (total_nonzero / float(total_allparam), percentage_list)
def na_op(x, y): try: result = expressions.evaluate( op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, pd.Series)): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notnull(xrav) & notnull(yrav) xrav = xrav[mask] yrav = yrav[mask] if np.prod(xrav.shape) and np.prod(yrav.shape): result[mask] = op(xrav, yrav) else: result = np.empty(x.size, dtype=x.dtype) mask = notnull(xrav) xrav = xrav[mask] if np.prod(xrav.shape): result[mask] = op(xrav, y) result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = com._fill_zeros(result, x, y, name, fill_zeros) return result
def prod(x, axis=None, keepdims=False): if isinstance(axis, list): for a in axis: x = np.prod(x, axis=a, keepdims=keepdims) return x else: return np.prod(x, axis=axis, keepdims=keepdims)
def _compute_shape(self, include_bc): """ Precomputes the shape of a mesh, both as a grid and as a vector. The shape as a grid means the result is a tuple containing the number of nodes in each dimension. As a vector means a column vector (an nx1 `~numpy.ndarray`) where n is the total degrees of freedom. Parameters ---------- include_bc : bool Indicates if the boundary padding is included. """ sh = [] for i in range(self.dim): p = self.parameters[i] n = p.n if include_bc: n += p.lbc.n n += p.rbc.n sh.append(n) # self._shapes dict has a tuple for an index: (include_bc, as_grid) self._shapes[(include_bc, True)] = sh self._shapes[(include_bc, False)] = (int(np.prod(np.array(sh))), 1) # precompute the degrees of freedom dict too self._dofs[include_bc] = int(np.prod(np.array(sh)))
def dwts2(x, p, e, Id, Jd, Ip, Jp): q = zeros(p) ix = zeros(p, dtype=int) ix[(arange(p) + p - e) % p] = arange(p) d = x[ix[arange(1, p)]] - x[ix[0]] if p == 3: d1_2 = d[0] - d[1] q[ix[0]] = 2.0 / (d[0] * d[1]) q[ix[1]] = 2.0 / (d[0] * d1_2) q[ix[2]] = -2.0 / (d[1] * d1_2) else: # compute difference terms dd = d[Id[:, 0]] - d[Id[:, 1]] # compute permutation terms dp = prod(d[Ip], axis=1) # compute weights q[ix[0]] = 2.0 * sum(dp) / prod(d) for i in range(p - 1): fac = 2.0 * (-1) ** (p + i + 1) num = sum(dp[Jp[i, :]]) # all dp without i den = d[i] * prod(dd[Jd[i, :]]) # all dd with i q[ix[i + 1]] = fac * num / den return q
def _initialize_theta(self): filter_shape = self.filter_shape image_shape = self.image_shape poolsize = self.poolsize conv_in = np.prod(filter_shape[1:]) conv_out = filter_shape[0] * np.prod(filter_shape[2:]) pool_out = conv_out / poolsize**2 conv_map_size = image_shape[-1] - filter_shape[-1] + 1 assert conv_map_size > 0 pool_map_size = int(conv_map_size / poolsize) assert pool_map_size > 0 self.conv_w = shared( nn_random_paramters(conv_in, conv_out, shape=filter_shape)) self.conv_b = shared( nn_random_paramters(conv_in, conv_out, shape=(filter_shape[0], 1, 1))) self.pool_w = shared( nn_random_paramters(conv_out, pool_out, shape=(filter_shape[0], 1, 1))) self.pool_b = shared( nn_random_paramters(conv_out, pool_out, shape=(filter_shape[0], 1, 1))) self.output_shape = (image_shape[0], filter_shape[0], pool_map_size, pool_map_size) return [self.conv_w, self.conv_b, self.pool_w, self.pool_b]
def varying_weight_orderplots(p,wkey,xvals): """ recieves a wTOPopulation (weighted 2 objectives) and makes a plot for each weight setting value given in xvals; works with both, weighted ranking or weighted sum of objectives, depending on argument wkey being 'r' or 's' """ # remember old setting if wkey=='s': xold=p.sumcoeffs[0] elif wkey=='r': xold=p.rankweights[0] # make the plots for i,x in enumerate(xvals): if wkey=='s': p.set_sumcoeffs([x,1-x]); p.update_scores(); p.sort() if wkey=='r': p.set_rankweights([x,1-x]); p.update_overall_ranks(); p.sort_for('overall_rank') sqdft,sqdlt=p.ranking_triangles_twoobj(x,1,wkey) ttxt='weighting factors: '+str(p.sumcoeffs)+'\n' ttxt+='sqdft = '+str(sqdft)+', sqdlf = '+str(sqdlt) ttxt+=', crit 1 '+str(prod(sqdft)/prod(sqdlt))+'\n' r1,r2=p.correlations_criterion(x,1,wkey) ttxt+='$r_{P,1}$ = '+str(r1)+', $r_{P,2}$ = '+str(r2) ttxt+=', $crit(r_{P,1},r_{P,2})$ = '+str(abs(r1-r2)*max(abs(r1),abs(r2))) MOorderplot(p,join(p.path,'plots2'),title=ttxt, picname='var_'+wkey+'w_orderplot_nc'+str(p.ncase)+'_g'+str(p.gg).zfill(3)+'_op'+str(i).zfill(2)+'.png') # restore old order if wkey=='s': p.set_sumcoeffs([xold,1-xold]); p.update_scores(); p.sort() if wkey=='r': p.set_rankweights([xold,1-xold]); p.update_overall_ranks(); p.sort_for('overall_rank')
def test_neibs_bad_shape_wrap_centered(self): shape = (2, 3, 10, 10) for dtype in self.dtypes: images = shared(numpy.arange( numpy.prod(shape), dtype=dtype ).reshape(shape)) for neib_shape in [(3, 2), (2, 3)]: neib_shape = T.as_tensor_variable(neib_shape) f = function([], images2neibs(images, neib_shape, mode="wrap_centered"), mode=self.mode) self.assertRaises(TypeError, f) for shape in [(2, 3, 2, 3), (2, 3, 3, 2)]: images = shared(numpy.arange(numpy.prod(shape)).reshape(shape)) neib_shape = T.as_tensor_variable((3, 3)) f = function([], images2neibs(images, neib_shape, mode="wrap_centered"), mode=self.mode) self.assertRaises(TypeError, f) # Test a valid shapes shape = (2, 3, 3, 3) images = shared(numpy.arange(numpy.prod(shape)).reshape(shape)) neib_shape = T.as_tensor_variable((3, 3)) f = function([], images2neibs(images, neib_shape, mode="wrap_centered"), mode=self.mode) f()
def make_workspace_ijv(self): module = C.ConvertToImage() shape = (14, 16) r = np.random.RandomState() r.seed(0) i = r.randint(0, shape[0], size = np.prod(shape)) j = r.randint(0, shape[1], size = np.prod(shape)) v = r.randint(1, 8, size = np.prod(shape)) order = np.lexsort((i, j, v)) ijv = np.column_stack((i, j, v)) ijv = ijv[order, :] same = np.all(ijv[:-1, :] == ijv[1:, :], 1) ijv = ijv[~same, :] pipeline = cpp.Pipeline() object_set = cpo.ObjectSet() image_set_list = cpi.ImageSetList() image_set = image_set_list.get_image_set(0) workspace = cpw.Workspace(pipeline, module, image_set, object_set, cpmeas.Measurements(), image_set_list) objects = cpo.Objects() objects.set_ijv(ijv, shape) object_set.add_objects(objects, OBJECTS_NAME) self.assertGreater(len(objects.get_labels()), 1) module.image_name.value = IMAGE_NAME module.object_name.value = OBJECTS_NAME return (workspace, module, ijv)
def case(S, n_trials=50): S = to_super(S) left_dims, right_dims = S.dims # Assume for the purposes of the test that S maps square operators to square operators. in_dim = np.prod(right_dims[0]) out_dim = np.prod(left_dims[0]) S_dual = to_super(S.dual_chan()) primals = [] duals = [] for idx_trial in range(n_trials): X = rand_dm_ginibre(out_dim) X.dims = left_dims X = operator_to_vector(X) Y = rand_dm_ginibre(in_dim) Y.dims = right_dims Y = operator_to_vector(Y) primals.append((X.dag() * S * Y)[0, 0]) duals.append((X.dag() * S_dual.dag() * Y)[0, 0]) np.testing.assert_array_almost_equal(primals, duals)
def testContribSignalSTFT(self): ws = 512 hs = 128 dims = (ws * 20,) shape = BATCH_DIMS + dims data = np.arange(np.prod(shape)) / np.prod(dims) np.random.seed(123) np.random.shuffle(data) data = np.reshape(data.astype(np.float32), shape) window = sps.get_window("hann", ws) expected = sps.stft( data, nperseg=ws, noverlap=ws - hs, boundary=None, window=window)[2] expected = np.swapaxes(expected, -1, -2) expected *= window.sum() # scipy divides by window sum with self.test_session() as sess: with self.test_scope(): ph = array_ops.placeholder( dtypes.as_dtype(data.dtype), shape=data.shape) out = signal.stft(ph, ws, hs) grad = gradients_impl.gradients(out, ph, grad_ys=array_ops.ones_like(out)) # For gradients, we simply verify that they compile & execute. value, _ = sess.run([out, grad], {ph: data}) self.assertAllClose(expected, value, rtol=RTOL, atol=ATOL)
def train(sess, model, eval_model, train_set, valid_set, test_set): """Train a sketch-rnn model.""" # Setup summary writer. summary_writer = tf.summary.FileWriter(FLAGS.log_root) # Calculate trainable params. t_vars = tf.trainable_variables() count_t_vars = 0 for var in t_vars: num_param = np.prod(var.get_shape().as_list()) count_t_vars += num_param tf.logging.info('%s %s %i', var.name, str(var.get_shape()), num_param) tf.logging.info('Total trainable variables %i.', count_t_vars) model_summ = tf.summary.Summary() model_summ.value.add( tag='Num_Trainable_Params', simple_value=float(count_t_vars)) summary_writer.add_summary(model_summ, 0) summary_writer.flush() # setup eval stats best_valid_cost = 100000000.0 # set a large init value valid_cost = 0.0 # main train loop hps = model.hps start = time.time() for _ in range(hps.num_steps): step = sess.run(model.global_step) curr_learning_rate = ((hps.learning_rate - hps.min_learning_rate) * (hps.decay_rate)**step + hps.min_learning_rate) curr_kl_weight = (hps.kl_weight - (hps.kl_weight - hps.kl_weight_start) * (hps.kl_decay_rate)**step) _, x, s = train_set.random_batch() feed = { model.input_data: x, model.sequence_lengths: s, model.lr: curr_learning_rate, model.kl_weight: curr_kl_weight } (train_cost, r_cost, kl_cost, _, train_step, _) = sess.run([ model.cost, model.r_cost, model.kl_cost, model.final_state, model.global_step, model.train_op ], feed) if step % 20 == 0 and step > 0: end = time.time() time_taken = end - start cost_summ = tf.summary.Summary() cost_summ.value.add(tag='Train_Cost', simple_value=float(train_cost)) reconstr_summ = tf.summary.Summary() reconstr_summ.value.add( tag='Train_Reconstr_Cost', simple_value=float(r_cost)) kl_summ = tf.summary.Summary() kl_summ.value.add(tag='Train_KL_Cost', simple_value=float(kl_cost)) lr_summ = tf.summary.Summary() lr_summ.value.add( tag='Learning_Rate', simple_value=float(curr_learning_rate)) kl_weight_summ = tf.summary.Summary() kl_weight_summ.value.add( tag='KL_Weight', simple_value=float(curr_kl_weight)) time_summ = tf.summary.Summary() time_summ.value.add( tag='Time_Taken_Train', simple_value=float(time_taken)) output_format = ('step: %d, lr: %.6f, klw: %0.4f, cost: %.4f, ' 'recon: %.4f, kl: %.4f, train_time_taken: %.4f') output_values = (step, curr_learning_rate, curr_kl_weight, train_cost, r_cost, kl_cost, time_taken) output_log = output_format % output_values tf.logging.info(output_log) summary_writer.add_summary(cost_summ, train_step) summary_writer.add_summary(reconstr_summ, train_step) summary_writer.add_summary(kl_summ, train_step) summary_writer.add_summary(lr_summ, train_step) summary_writer.add_summary(kl_weight_summ, train_step) summary_writer.add_summary(time_summ, train_step) summary_writer.flush() start = time.time() if step % hps.save_every == 0 and step > 0: (valid_cost, valid_r_cost, valid_kl_cost) = evaluate_model( sess, eval_model, valid_set) end = time.time() time_taken_valid = end - start start = time.time() valid_cost_summ = tf.summary.Summary() valid_cost_summ.value.add( tag='Valid_Cost', simple_value=float(valid_cost)) valid_reconstr_summ = tf.summary.Summary() valid_reconstr_summ.value.add( tag='Valid_Reconstr_Cost', simple_value=float(valid_r_cost)) valid_kl_summ = tf.summary.Summary() valid_kl_summ.value.add( tag='Valid_KL_Cost', simple_value=float(valid_kl_cost)) valid_time_summ = tf.summary.Summary() valid_time_summ.value.add( tag='Time_Taken_Valid', simple_value=float(time_taken_valid)) output_format = ('best_valid_cost: %0.4f, valid_cost: %.4f, valid_recon: ' '%.4f, valid_kl: %.4f, valid_time_taken: %.4f') output_values = (min(best_valid_cost, valid_cost), valid_cost, valid_r_cost, valid_kl_cost, time_taken_valid) output_log = output_format % output_values tf.logging.info(output_log) summary_writer.add_summary(valid_cost_summ, train_step) summary_writer.add_summary(valid_reconstr_summ, train_step) summary_writer.add_summary(valid_kl_summ, train_step) summary_writer.add_summary(valid_time_summ, train_step) summary_writer.flush() if valid_cost < best_valid_cost: best_valid_cost = valid_cost save_model(sess, FLAGS.log_root, step) end = time.time() time_taken_save = end - start start = time.time() tf.logging.info('time_taken_save %4.4f.', time_taken_save) best_valid_cost_summ = tf.summary.Summary() best_valid_cost_summ.value.add( tag='Best_Valid_Cost', simple_value=float(best_valid_cost)) summary_writer.add_summary(best_valid_cost_summ, train_step) summary_writer.flush() (eval_cost, eval_r_cost, eval_kl_cost) = evaluate_model( sess, eval_model, test_set) end = time.time() time_taken_eval = end - start start = time.time() eval_cost_summ = tf.summary.Summary() eval_cost_summ.value.add(tag='Eval_Cost', simple_value=float(eval_cost)) eval_reconstr_summ = tf.summary.Summary() eval_reconstr_summ.value.add( tag='Eval_Reconstr_Cost', simple_value=float(eval_r_cost)) eval_kl_summ = tf.summary.Summary() eval_kl_summ.value.add( tag='Eval_KL_Cost', simple_value=float(eval_kl_cost)) eval_time_summ = tf.summary.Summary() eval_time_summ.value.add( tag='Time_Taken_Eval', simple_value=float(time_taken_eval)) output_format = ('eval_cost: %.4f, eval_recon: %.4f, ' 'eval_kl: %.4f, eval_time_taken: %.4f') output_values = (eval_cost, eval_r_cost, eval_kl_cost, time_taken_eval) output_log = output_format % output_values tf.logging.info(output_log) summary_writer.add_summary(eval_cost_summ, train_step) summary_writer.add_summary(eval_reconstr_summ, train_step) summary_writer.add_summary(eval_kl_summ, train_step) summary_writer.add_summary(eval_time_summ, train_step) summary_writer.flush()
def readarray(self, size=None, offset=0, dtype=np.uint8, shape=None): """ Similar to file.read(), but returns the contents of the underlying file as a numpy array (or mmap'd array if memmap=True) rather than a string. Usually it's best not to use the `size` argument with this method, but it's provided for compatibility. """ if not hasattr(self._file, 'read'): raise EOFError if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) if size and size % dtype.itemsize != 0: raise ValueError('size %d not a multiple of %s' % (size, dtype)) if isinstance(shape, int): shape = (shape, ) if not (size or shape): warnings.warn( 'No size or shape given to readarray(); assuming a ' 'shape of (1,)', AstropyUserWarning) shape = (1, ) if size and not shape: shape = (size // dtype.itemsize, ) if size and shape: actualsize = np.prod(shape) * dtype.itemsize if actualsize < size: raise ValueError('size %d is too few bytes for a %s array of ' '%s' % (size, shape, dtype)) if actualsize < size: raise ValueError('size %d is too many bytes for a %s array of ' '%s' % (size, shape, dtype)) if self.memmap: if self._mmap is None: # Instantiate Memmap array of the file offset at 0 # (so we can return slices of it to offset anywhere else into # the file) memmap = Memmap(self._file, mode=MEMMAP_MODES[self.mode], dtype=np.uint8) # Now we immediately discard the memmap array; we are really # just using it as a factory function to instantiate the mmap # object in a convenient way (may later do away with this # usage) self._mmap = memmap.base # Prevent dorking with self._memmap._mmap by memmap.__del__ in # Numpy 1.6 (see # https://github.com/numpy/numpy/commit/dcc355a0b179387eeba10c95baf2e1eb21d417c7) memmap._mmap = None del memmap return np.ndarray(shape=shape, dtype=dtype, offset=offset, buffer=self._mmap) else: count = reduce(lambda x, y: x * y, shape) pos = self._file.tell() self._file.seek(offset) data = _array_from_file(self._file, dtype, count, '') data.shape = shape self._file.seek(pos) return data
def plan_experiment(self): use_nonzero_mask_for_normalization = self.determine_whether_to_use_mask_for_norm() print("Are we using the nonzero mask for normalizaion?", use_nonzero_mask_for_normalization) spacings = self.dataset_properties['all_spacings'] sizes = self.dataset_properties['all_sizes'] all_classes = self.dataset_properties['all_classes'] modalities = self.dataset_properties['modalities'] num_modalities = len(list(modalities.keys())) target_spacing = self.get_target_spacing() new_shapes = [np.array(i) / target_spacing * np.array(j) for i, j in zip(spacings, sizes)] max_spacing_axis = np.argmax(target_spacing) remaining_axes = [i for i in list(range(3)) if i != max_spacing_axis] self.transpose_forward = [max_spacing_axis] + remaining_axes self.transpose_backward = [np.argwhere(np.array(self.transpose_forward) == i)[0][0] for i in range(3)] # we base our calculations on the median shape of the datasets median_shape = np.median(np.vstack(new_shapes), 0) print("the median shape of the dataset is ", median_shape) max_shape = np.max(np.vstack(new_shapes), 0) print("the max shape in the dataset is ", max_shape) min_shape = np.min(np.vstack(new_shapes), 0) print("the min shape in the dataset is ", min_shape) print("we don't want feature maps smaller than ", self.unet_featuremap_min_edge_length, " in the bottleneck") # how many stages will the image pyramid have? self.plans_per_stage = list() target_spacing_transposed = np.array(target_spacing)[self.transpose_forward] median_shape_transposed = np.array(median_shape)[self.transpose_forward] print("the transposed median shape of the dataset is ", median_shape_transposed) print("generating configuration for 3d_fullres") self.plans_per_stage.append(self.get_properties_for_stage(target_spacing_transposed, target_spacing_transposed, median_shape_transposed, len(self.list_of_cropped_npz_files), num_modalities, len(all_classes) + 1)) # thanks Zakiyi (https://github.com/MIC-DKFZ/nnUNet/issues/61) for spotting this bug :-) # if np.prod(self.plans_per_stage[-1]['median_patient_size_in_voxels'], dtype=np.int64) / \ # architecture_input_voxels < HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0: architecture_input_voxels_here = np.prod(self.plans_per_stage[-1]['patch_size'], dtype=np.int64) if np.prod(median_shape) / architecture_input_voxels_here < \ self.how_much_of_a_patient_must_the_network_see_at_stage0: more = False else: more = True if more: print("generating configuration for 3d_lowres") # if we are doing more than one stage then we want the lowest stage to have exactly # HOW_MUCH_OF_A_PATIENT_MUST_THE_NETWORK_SEE_AT_STAGE0 (this is 4 by default so the number of voxels in the # median shape of the lowest stage must be 4 times as much as the network can process at once (128x128x128 by # default). Problem is that we are downsampling higher resolution axes before we start downsampling the # out-of-plane axis. We could probably/maybe do this analytically but I am lazy, so here # we do it the dumb way lowres_stage_spacing = deepcopy(target_spacing) num_voxels = np.prod(median_shape, dtype=np.float64) while num_voxels > self.how_much_of_a_patient_must_the_network_see_at_stage0 * architecture_input_voxels_here: max_spacing = max(lowres_stage_spacing) if np.any((max_spacing / lowres_stage_spacing) > 2): lowres_stage_spacing[(max_spacing / lowres_stage_spacing) > 2] \ *= 1.01 else: lowres_stage_spacing *= 1.01 num_voxels = np.prod(target_spacing / lowres_stage_spacing * median_shape, dtype=np.float64) lowres_stage_spacing_transposed = np.array(lowres_stage_spacing)[self.transpose_forward] new = self.get_properties_for_stage(lowres_stage_spacing_transposed, target_spacing_transposed, median_shape_transposed, len(self.list_of_cropped_npz_files), num_modalities, len(all_classes) + 1) architecture_input_voxels_here = np.prod(new['patch_size'], dtype=np.int64) if 2 * np.prod(new['median_patient_size_in_voxels'], dtype=np.int64) < np.prod( self.plans_per_stage[0]['median_patient_size_in_voxels'], dtype=np.int64): self.plans_per_stage.append(new) self.plans_per_stage = self.plans_per_stage[::-1] self.plans_per_stage = {i: self.plans_per_stage[i] for i in range(len(self.plans_per_stage))} # convert to dict print(self.plans_per_stage) print("transpose forward", self.transpose_forward) print("transpose backward", self.transpose_backward) normalization_schemes = self.determine_normalization_scheme() only_keep_largest_connected_component, min_size_per_class, min_region_size_per_class = None, None, None # removed training data based postprocessing. This is deprecated # these are independent of the stage plans = {'num_stages': len(list(self.plans_per_stage.keys())), 'num_modalities': num_modalities, 'modalities': modalities, 'normalization_schemes': normalization_schemes, 'dataset_properties': self.dataset_properties, 'list_of_npz_files': self.list_of_cropped_npz_files, 'original_spacings': spacings, 'original_sizes': sizes, 'preprocessed_data_folder': self.preprocessed_output_folder, 'num_classes': len(all_classes), 'all_classes': all_classes, 'base_num_features': self.unet_base_num_features, 'use_mask_for_norm': use_nonzero_mask_for_normalization, 'keep_only_largest_region': only_keep_largest_connected_component, 'min_region_size_per_class': min_region_size_per_class, 'min_size_per_class': min_size_per_class, 'transpose_forward': self.transpose_forward, 'transpose_backward': self.transpose_backward, 'data_identifier': self.data_identifier, 'plans_per_stage': self.plans_per_stage, 'preprocessor_name': self.preprocessor_name, 'conv_per_stage': self.conv_per_stage, } self.plans = plans self.save_my_plans()
def get_properties_for_stage(self, current_spacing, original_spacing, original_shape, num_cases, num_modalities, num_classes): """ Computation of input patch size starts out with the new median shape (in voxels) of a dataset. This is opposed to prior experiments where I based it on the median size in mm. The rationale behind this is that for some organ of interest the acquisition method will most likely be chosen such that the field of view and voxel resolution go hand in hand to show the doctor what they need to see. This assumption may be violated for some modalities with anisotropy (cine MRI) but we will have t live with that. In future experiments I will try to 1) base input patch size match aspect ratio of input size in mm (instead of voxels) and 2) to try to enforce that we see the same 'distance' in all directions (try to maintain equal size in mm of patch) The patches created here attempt keep the aspect ratio of the new_median_shape :param current_spacing: :param original_spacing: :param original_shape: :param num_cases: :return: """ new_median_shape = np.round(original_spacing / current_spacing * original_shape).astype(int) dataset_num_voxels = np.prod(new_median_shape) * num_cases # the next line is what we had before as a default. The patch size had the same aspect ratio as the median shape of a patient. We swapped t # input_patch_size = new_median_shape # compute how many voxels are one mm input_patch_size = 1 / np.array(current_spacing) # normalize voxels per mm input_patch_size /= input_patch_size.mean() # create an isotropic patch of size 512x512x512mm input_patch_size *= 1 / min(input_patch_size) * 512 # to get a starting value input_patch_size = np.round(input_patch_size).astype(int) # clip it to the median shape of the dataset because patches larger then that make not much sense input_patch_size = [min(i, j) for i, j in zip(input_patch_size, new_median_shape)] network_num_pool_per_axis, pool_op_kernel_sizes, conv_kernel_sizes, new_shp, \ shape_must_be_divisible_by = get_pool_and_conv_props_poolLateV2(input_patch_size, self.unet_featuremap_min_edge_length, self.unet_max_numpool, current_spacing) ref = Generic_UNet.use_this_for_batch_size_computation_3D here = Generic_UNet.compute_approx_vram_consumption(new_shp, network_num_pool_per_axis, self.unet_base_num_features, self.unet_max_num_filters, num_modalities, num_classes, pool_op_kernel_sizes, conv_per_stage=self.conv_per_stage) while here > ref: axis_to_be_reduced = np.argsort(new_shp / new_median_shape)[-1] tmp = deepcopy(new_shp) tmp[axis_to_be_reduced] -= shape_must_be_divisible_by[axis_to_be_reduced] _, _, _, _, shape_must_be_divisible_by_new = \ get_pool_and_conv_props_poolLateV2(tmp, self.unet_featuremap_min_edge_length, self.unet_max_numpool, current_spacing) new_shp[axis_to_be_reduced] -= shape_must_be_divisible_by_new[axis_to_be_reduced] # we have to recompute numpool now: network_num_pool_per_axis, pool_op_kernel_sizes, conv_kernel_sizes, new_shp, \ shape_must_be_divisible_by = get_pool_and_conv_props_poolLateV2(new_shp, self.unet_featuremap_min_edge_length, self.unet_max_numpool, current_spacing) here = Generic_UNet.compute_approx_vram_consumption(new_shp, network_num_pool_per_axis, self.unet_base_num_features, self.unet_max_num_filters, num_modalities, num_classes, pool_op_kernel_sizes, conv_per_stage=self.conv_per_stage) # print(new_shp) input_patch_size = new_shp batch_size = Generic_UNet.DEFAULT_BATCH_SIZE_3D # This is what works with 128**3 batch_size = int(np.floor(max(ref / here, 1) * batch_size)) # check if batch size is too large max_batch_size = np.round(self.batch_size_covers_max_percent_of_dataset * dataset_num_voxels / np.prod(input_patch_size, dtype=np.int64)).astype(int) max_batch_size = max(max_batch_size, self.unet_min_batch_size) batch_size = min(batch_size, max_batch_size) do_dummy_2D_data_aug = (max(input_patch_size) / input_patch_size[ 0]) > self.anisotropy_threshold plan = { 'batch_size': batch_size, 'num_pool_per_axis': network_num_pool_per_axis, 'patch_size': input_patch_size, 'median_patient_size_in_voxels': new_median_shape, 'current_spacing': current_spacing, 'original_spacing': original_spacing, 'do_dummy_2D_data_aug': do_dummy_2D_data_aug, 'pool_op_kernel_sizes': pool_op_kernel_sizes, 'conv_kernel_sizes': conv_kernel_sizes, } return plan
def DoAcquisition(self): """ Uses the e-beam to scan the rectangular grid consisted of the given number of spots and acquires the corresponding CCD image repetitions (tuple of ints): The number of CL spots are used dwell_time (float): Time to scan each spot #s escan (model.Emitter): The e-beam scanner ccd (model.DigitalCamera): The CCD detector (model.Detector): The electron detector returns (DataArray or list of DataArrays): 2D array containing the the spotted optical image, or a list of 2D images containing the optical image for each spot. (List of tuples): Coordinates of spots in electron image (Tuple of floats): Scaling of electron image """ self._save_hw_settings() self._acq_state = RUNNING self._ccd_done.clear() escan = self.escan rep = self.repetitions # Estimate the SEM and Optical FoV, taking into account that the SEM # pixels are at the center of each pixel. ccd_fov = self.get_ccd_fov() sem_fov = self.get_sem_fov() ccd_size = ((ccd_fov[2] - ccd_fov[0]), (ccd_fov[3] - ccd_fov[1])) sem_size = ((sem_fov[2] - sem_fov[0]), (sem_fov[3] - sem_fov[1])) sem_scan_size = tuple(s * (r - 1) / r for s, r in zip(sem_size, rep)) # If the scanned SEM FoV > 80% of Optical FoV, then limit the scanned area # to be sure that it can be entirely seen by the CCD. ratio = min(1, min(c * 0.8 / s for c, s in zip(ccd_size, sem_scan_size))) # In case the resolution ratio is not 1:1, use the smallest dim, to get # a squared grid min_res = min(escan.resolution.range[1]) scale = (min_res / rep[0], min_res / rep[1]) # Apply ratio scale = (scale[0] * ratio, scale[1] * ratio) if (scale[0] < 1) or (scale[1] < 1): scale = (1, 1) logging.warning("SEM field of view is too big. Scale set to %s.", scale) electron_coordinates = [] bound = (((rep[0] - 1) * scale[0]) / 2, ((rep[1] - 1) * scale[1]) / 2) # Compute electron coordinates based on scale and repetitions for i in range(rep[0]): for j in range(rep[1]): electron_coordinates.append((-bound[0] + i * scale[0], - bound[1] + j * scale[1] )) spot_dist = (scale[0] * escan.pixelSize.value[0], scale[1] * escan.pixelSize.value[1]) # Check if the exposure time to be used in the grid scan is # within the range of the camera # TODO handle similar case in the SpotAcquisition dwell_time = self.dwell_time et = numpy.prod(self.repetitions) * dwell_time max_et = self.ccd.exposureTime.range[1] try: # If the distance between e-beam spots is below the size of a spot, # use the “one image per spot” procedure if (spot_dist[0] < SPOT_SIZE) or (spot_dist[1] < SPOT_SIZE) or (et > max_et): return self._doSpotAcquisition(electron_coordinates, scale) else: return self._doWholeAcquisition(electron_coordinates, scale) finally: self._restore_hw_settings()
def _doSpotAcquisition(self, electron_coordinates, scale): """ Perform acquisition spot per spot. Slow, but works even if SEM FoV is small """ escan = self.escan ccd = self.ccd detector = self.detector dwell_time = self.dwell_time escan.scale.value = (1, 1) escan.resolution.value = (1, 1) # Set dt large enough so we unsubscribe before we even get an SEM # image (just to discard it) and start a second scan which would # cost in time. sem_dt = 2 * dwell_time escan.dwellTime.value = escan.dwellTime.clip(sem_dt) # CCD setup sem_shape = escan.shape[0:2] # sem ROI is ltrb sem_roi = (electron_coordinates[0][0] / sem_shape[0] + 0.5, electron_coordinates[0][1] / sem_shape[1] + 0.5, electron_coordinates[-1][0] / sem_shape[0] + 0.5, electron_coordinates[-1][1] / sem_shape[1] + 0.5) ccd_roi = self.sem_roi_to_ccd(sem_roi) self.configure_ccd(ccd_roi) if self.bgsub: self.bg_image = ccd.data.get(asap=False) et = dwell_time ccd.exposureTime.value = et # s readout = numpy.prod(ccd.resolution.value) / ccd.readoutRate.value tot_time = et + readout + 0.05 logging.debug("Scanning spot grid with image per spot procedure...") self._spot_images = [] for spot in electron_coordinates: self._ccd_done.clear() escan.translation.value = spot logging.debug("Scanning spot %s", escan.translation.value) try: if self._acq_state == CANCELLED: raise CancelledError() detector.data.subscribe(self._discard_data) ccd.data.subscribe(self._onSpotImage) # Wait for CCD to capture the image if not self._ccd_done.wait(2 * tot_time + 4): raise TimeoutError("Acquisition of CCD timed out") finally: detector.data.unsubscribe(self._discard_data) ccd.data.unsubscribe(self._onSpotImage) with self._acq_lock: if self._acq_state == CANCELLED: raise CancelledError() logging.debug("Scan done.") self._acq_state = FINISHED return self._spot_images, electron_coordinates, scale
def estimateOverlayTime(dwell_time, repetitions): """ Estimates overlay procedure duration """ return 6 + dwell_time * numpy.prod(repetitions) # s
def main(args): tf.logging.set_verbosity(tf.logging.INFO) model_cls = models.get_model(args.model) print(model_cls) params = default_parameters() # Import and override parameters # Priorities (low -> high): # default -> saved -> command params = merge_parameters(params, model_cls.get_parameters()) params = import_params(args.output, args.model, params) override_parameters(params, args) # Export all parameters and model specific parameters export_params(params.output, "params.json", params) export_params( params.output, "%s.json" % args.model, collect_params(params, model_cls.get_parameters()) ) #Build Graph with tf.Graph().as_default(): if not params.record: # Build input queue features = dataset.get_training_input(params.input, params) update_cycle = params.update_cycle features, init_op = cache.cache_features(features, update_cycle) #build model initializer = get_initializer(params) regularizer = tf.contrib.layers.l1_l2_regularizer( scale_l1=params.scale_l1, scale_l2=params.scale_l2) model = model_cls(params) # Create global step global_step = tf.train.get_or_create_global_step() # Multi-GPU setting sharded_losses = parallel.parallel_model( model.get_training_func(initializer, regularizer), features, params.device_list ) loss = tf.add_n(sharded_losses) / len(sharded_losses) loss = loss + tf.losses.get_regularization_loss() # Print parameters all_weights = {v.name: v for v in tf.trainable_variables()} total_size = 0 for v_name in sorted(list(all_weights)): v = all_weights[v_name] tf.logging.info("%s\tshape %s", v.name[:-2].ljust(80), str(v.shape).ljust(20)) v_size = np.prod(np.array(v.shape.as_list())).tolist() total_size += v_size tf.logging.info("Total trainable variables size: %d", total_size) learning_rate = get_learning_rate_decay(params.learning_rate, global_step, params) learning_rate = tf.convert_to_tensor(learning_rate, dtype=tf.float32) tf.summary.scalar("learning_rate", learning_rate) # Create optimizer if params.optimizer == "Adam": opt = tf.train.AdamOptimizer(learning_rate, beta1=params.adam_beta1, beta2=params.adam_beta2, epsilon=params.adam_epsilon) elif params.optimizer == "LazyAdam": opt = tf.contrib.opt.LazyAdamOptimizer(learning_rate, beta1=params.adam_beta1, beta2=params.adam_beta2, epsilon=params.adam_epsilon) else: raise RuntimeError("Optimizer %s not supported" % params.optimizer) loss, ops = optimize.create_train_op(loss, opt, global_step, params) restore_op = restore_variables(args.checkpoint) # Validation if params.validation and params.references[0]: files = [params.validation] + list(params.references) eval_inputs = dataset.sort_and_zip_files(files) eval_input_fn = dataset.get_evaluation_input else: eval_input_fn = None # Add hooks save_vars = tf.trainable_variables() + [global_step] saver = tf.train.Saver( var_list=save_vars if params.only_save_trainable else None, max_to_keep=params.keep_checkpoint_max, sharded=False ) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) multiplier = tf.convert_to_tensor([update_cycle, 1]) train_hooks = [ tf.train.StopAtStepHook(last_step=params.train_steps), tf.train.NanTensorHook(loss), tf.train.LoggingTensorHook( { "step": global_step, "loss": loss, "source": tf.shape(features["source"]) * multiplier, "target": tf.shape(features["target"]) * multiplier }, every_n_iter=1 ), tf.train.CheckpointSaverHook( checkpoint_dir=params.output, save_secs=params.save_checkpoint_secs or None, save_steps=params.save_checkpoint_steps or None, saver=saver ) ] config = session_config(params) if eval_input_fn is not None: train_hooks.append( hooks.EvaluationHook( lambda f: inference.create_inference_graph( [model], f, params ), lambda: eval_input_fn(eval_inputs, params), lambda x: decode_target_ids(x, params), params.output, config, params.keep_top_checkpoint_max, eval_secs=params.eval_secs, eval_steps=params.eval_steps ) ) def restore_fn(step_context): step_context.session.run(restore_op) def step_fn(step_context): # Bypass hook calls step_context.session.run([init_op, ops["zero_op"]]) for i in range(update_cycle - 1): step_context.session.run(ops["collect_op"]) return step_context.run_with_hooks(ops["train_op"]) # Create session, do not use default CheckpointSaverHook with tf.train.MonitoredTrainingSession( checkpoint_dir=params.output, hooks=train_hooks, save_checkpoint_secs=None, config=config) as sess: # Restore pre-trained variables sess.run_step_fn(restore_fn) while not sess.should_stop(): sess.run_step_fn(step_fn)
def select_block_grid_sizes(dev, data_shape, threads_per_block=None): """ Determine CUDA block and grid dimensions given device constraints. Determine the CUDA block and grid dimensions allowed by a GPU device that are sufficient for processing every element of an array in a separate thread. Parameters ---------- d : pycuda.driver.Device Device object to be used. data_shape : tuple Shape of input data array. Must be of length 2. threads_per_block : int, optional Number of threads to execute in each block. If this is None, the maximum number of threads per block allowed by device `d` is used. Returns ------- block_dim : tuple X, Y, and Z dimensions of minimal required thread block. grid_dim : tuple X and Y dimensions of minimal required block grid. Notes ----- Using the scheme in this function, all of the threads in the grid can be enumerated as `i = blockIdx.y*max_threads_per_block*max_blocks_per_grid+ blockIdx.x*max_threads_per_block+threadIdx.x`. For 2D shapes, the subscripts of the element `data[a, b]` where `data.shape == (A, B)` can be computed as `a = i/B` `b = mod(i,B)`. For 3D shapes, the subscripts of the element `data[a, b, c]` where `data.shape == (A, B, C)` can be computed as `a = i/(B*C)` `b = mod(i, B*C)/C` `c = mod(mod(i, B*C), C)`. For 4D shapes, the subscripts of the element `data[a, b, c, d]` where `data.shape == (A, B, C, D)` can be computed as `a = i/(B*C*D)` `b = mod(i, B*C*D)/(C*D)` `c = mod(mod(i, B*C*D)%(C*D))/D` `d = mod(mod(mod(i, B*C*D)%(C*D)), D)` It is advisable that the number of threads per block be a multiple of the warp size to fully utilize a device's computing resources. """ # Sanity checks: if np.isscalar(data_shape): data_shape = (data_shape, ) # Number of elements to process; we need to cast the result of # np.prod to a Python int to prevent PyCUDA's kernel execution # framework from getting confused when N = int(np.prod(data_shape)) # Get device constraints: max_threads_per_block, max_block_dim, max_grid_dim = get_dev_attrs(dev) if threads_per_block != None: max_threads_per_block = threads_per_block # Assume that the maximum number of threads per block is no larger # than the maximum X and Y dimension of a thread block: assert max_threads_per_block <= max_block_dim[0] assert max_threads_per_block <= max_block_dim[1] # Assume that the maximum X and Y dimensions of a grid are the # same: max_blocks_per_grid_dim = max(max_grid_dim) assert max_blocks_per_grid_dim == max_grid_dim[0] assert max_blocks_per_grid_dim == max_grid_dim[1] # Actual number of thread blocks needed: blocks_needed = N / max_threads_per_block + 1 if blocks_needed * max_threads_per_block < max_threads_per_block * max_blocks_per_grid_dim: grid_x = blocks_needed grid_y = 1 elif blocks_needed * max_threads_per_block < max_threads_per_block * max_blocks_per_grid_dim**2: grid_x = max_blocks_per_grid_dim grid_y = blocks_needed / max_blocks_per_grid_dim + 1 else: raise ValueError('array size too large') return (max_threads_per_block, 1, 1), (grid_x, grid_y)
def _fmri_postprocess_image(config, file, task=None, tr=None, beta_series=False, drop_tps=None): confound_regressors = _find_confounds(config, file) output_file_path = _build_output_directory_structure( config, file, beta_series) if os.path.exists(output_file_path): logging.info("Output File Exists! Skipping.") return 0 logging.info('Looking for: ' + confound_regressors) if not os.path.exists(confound_regressors): logging.warning('Could not find a confound file for ' + file + ". Moving onto next scan") return else: logging.info('Found confound regressors') confounds, fdts = _regression_prep(config, confound_regressors) if drop_tps is not None: confounds = confounds.iloc[:(confounds.shape[0] - (drop_tps))] logging.info('Removing last ' + str(drop_tps) + ' time points') fdts = fdts.iloc[:(fdts.shape[0] - (drop_tps))] if tr is None: image_json_path = _find_json(config, file) with open(os.path.abspath(image_json_path), "r") as json_path: image_json = json.load(json_path) tr = float(image_json['RepetitionTime']) logging.info('TR found: ' + str(tr)) image = nib.load(file) data = image.get_fdata() data = data.astype(numpy.float32) orgImageShape = data.shape coordMap = image.affine data = data.reshape( (numpy.prod(numpy.shape(data)[:-1]), data.shape[-1])) data = numpy.transpose(data) if drop_tps is not None: data = data[0:(data.shape[0] - (drop_tps)), :] orgImageShape = list(orgImageShape) orgImageShape[3] = data.shape[0] orgImageShape = tuple(orgImageShape) row_means = data.mean(axis=0) data = (data - data.mean(axis=0)) if not beta_series: regress_toggle = config.config['PostProcessingOptions']['Regress'] scrub_toggle = False if config.config['PostProcessingOptions']['Scrubbing']: logging.debug('Scrubbing Toggle Activated') scrub_toggle = True scrub_ahead = int( config.config['PostProcessingOptions']['ScrubAhead']) scrub_behind = int( config.config['PostProcessingOptions']['ScrubBehind']) scrub_contig = int( config.config['PostProcessingOptions']['ScrubContig']) fd_thres = float( config.config['PostProcessingOptions']['ScrubFDThreshold']) orig_fdts = fdts if config.config['PostProcessingOptions']['RespNotchFilter']: fdts = _notch_filter_fd(config, confound_regressors, tr, drop_tps) scrubTargets = clpipe.postprocutils.utils.scrub_setup( fdts, fd_thres, scrub_behind, scrub_ahead, scrub_contig) hp = float(config.config['PostProcessingOptions']['FilteringHighPass']) lp = float(config.config['PostProcessingOptions']['FilteringLowPass']) filter_toggle = False if hp > 0 or lp > 0: logging.info('Filtering Toggle Activated') filter_toggle = True order = int( config.config['PostProcessingOptions']['FilteringOrder']) filt = clpipe.postprocutils.utils.calc_filter(hp, lp, tr, order) confounds = clpipe.postprocutils.utils.apply_filter( filt, confounds) if scrub_toggle and filter_toggle: logging.info('Using Spectral Interpolation') ofreq = int( config.config['PostProcessingOptions']['OversamplingFreq']) hfreq = float( config.config['PostProcessingOptions']['PercentFreqSample']) logging.debug('Memory Usage Before Spectral Interpolation:' + str(psutil.virtual_memory().total >> 30) + ' GB') data = clpipe.postprocutils.spec_interpolate.spec_inter( data, tr, ofreq, scrubTargets, hfreq, binSize=config.config['PostProcessingOptions'] ["SpectralInterpolationBinSize"]) gc.collect() logging.debug('Memory Usage After Spectral Interpolation GC:' + str(psutil.virtual_memory().total >> 30) + ' GB') if filter_toggle: logging.info('Filtering Data Now') data = clpipe.postprocutils.utils.apply_filter(filt, data) if regress_toggle: logging.info('Regressing Data Now') logging.debug(str(confounds.shape)) logging.debug(str(data.shape)) data = clpipe.postprocutils.utils.regress(confounds, data) if scrub_toggle: logging.info('Scrubbing data Now') data = clpipe.postprocutils.utils.scrub_image(data, scrubTargets) data = (data + row_means) data = numpy.transpose(data) data = data.reshape(orgImageShape) data32 = numpy.float32(data) out_image = nib.Nifti1Image(data32, coordMap) output_file_path = _build_output_directory_structure(config, file) logging.info('Saving post processed data to ' + output_file_path) nib.save(out_image, output_file_path) if scrub_toggle: file_name = os.path.basename(file) sans_ext = os.path.splitext(os.path.splitext(file_name)[0])[0] toOut = numpy.column_stack([ numpy.arange(1, len(scrubTargets) + 1, 1), numpy.asarray(scrubTargets), fdts, orig_fdts ]) logging.info('Saving Scrub Targets to ' + os.path.join(os.path.dirname(output_file_path), sans_ext + "_scrubTargets.csv")) numpy.savetxt(os.path.join(os.path.dirname(output_file_path), sans_ext + "_scrubTargets.csv"), toOut, delimiter=",") else: beta_series_options = config.config['BetaSeriesOptions'][ 'TaskSpecificOptions'] avail_tasks = [x['Task'] for x in beta_series_options] logging.debug(avail_tasks) img_task = _find_image_task(file) logging.debug(img_task) if img_task not in avail_tasks: logging.info( 'Did not find beta series specification for the task ' + img_task + ' for image ' + file) return else: beta_series_options = beta_series_options[avail_tasks.index( img_task)] hp = float(config.config['BetaSeriesOptions']['FilteringHighPass']) lp = float(config.config['BetaSeriesOptions']['FilteringLowPass']) events_file = _find_events(config, file) logging.debug(events_file) if os.path.exists(events_file): confounds, fdts = _regression_prep(config, confound_regressors, beta_series) ntp = len(confounds) if tr is None: image_json_path = _find_json(config, file) with open(os.path.abspath(image_json_path), "r") as json_path: image_json = json.load(json_path) tr = float(image_json['RepetitionTime']) filter_toggle = False filt = None if hp > 0 or lp > 0: logging.info('Filtering Toggle Activated') filter_toggle = True order = int( config.config['BetaSeriesOptions']['FilteringOrder']) filt = clpipe.postprocutils.utils.calc_filter( hp, lp, tr, order) confounds = clpipe.postprocutils.utils.apply_filter( filt, confounds) filt_ev_array, valid_events = _ev_mat_prep(events_file, filt, tr, ntp, beta_series_options) image = nib.load(file) data = image.get_fdata() data = data.astype(numpy.float32) orgImageShape = data.shape coordMap = image.affine data = data.reshape( (numpy.prod(numpy.shape(data)[:-1]), data.shape[-1])) data = numpy.transpose(data) data = (data - data.mean(axis=0)) logging.debug(filt_ev_array) beta_image_2d = _beta_series_calc(data, filt_ev_array, confounds) beta_series_dims = orgImageShape[:-1] beta_series_dims = beta_series_dims + (len(valid_events), ) beta_3d = beta_image_2d.transpose().reshape(beta_series_dims) beta_image = nib.Nifti1Image(beta_3d, coordMap) output_file_path = _build_output_directory_structure( config, file, beta_series) events_output = os.path.splitext( os.path.splitext(output_file_path)[0])[0] + "_usedevents.tsv" nib.save(beta_image, output_file_path) valid_events.to_csv(events_output, sep=' ') else: logging.info("Did not find an events file for " + file) return
def apply_updates(self): assert not self._updates_applied self._updates_applied = True devices = list(self._dev_grads.keys()) total_grads = sum(len(grads) for grads in self._dev_grads.values()) print('Total grads: ' + str(total_grads)) assert len(devices) >= 1 and total_grads >= 1 ops = [] with absolute_name_scope(self.scope): # Cast gradients to FP32 and calculate partial sum within each device. dev_grads = OrderedDict() # device => [(grad, var), ...] for dev_idx, dev in enumerate(devices): with tf.name_scope('ProcessGrads%d' % dev_idx), tf.device(dev): sums = [] for gv in zip(*self._dev_grads[dev]): assert all(v is gv[0][1] for g, v in gv) g = [tf.cast(g, tf.float32) for g, v in gv] g = g[0] if len(g) == 1 else tf.add_n(g) sums.append((g, gv[0][1])) dev_grads[dev] = sums # Sum gradients across devices. if len(devices) > 1: with tf.name_scope('SumAcrossGPUs'), tf.device(None): for var_idx, grad_shape in enumerate(self._grad_shapes): g = [dev_grads[dev][var_idx][0] for dev in devices] if np.prod( grad_shape ): # nccl does not support zero-sized tensors g = nccl_ops.all_sum(g) for dev, gg in zip(devices, g): dev_grads[dev][var_idx] = ( gg, dev_grads[dev][var_idx][1]) # Apply updates separately on each device. for dev_idx, (dev, grads) in enumerate(dev_grads.items()): with tf.name_scope('ApplyGrads%d' % dev_idx), tf.device(dev): # Scale gradients as needed. if self.use_loss_scaling or total_grads > 1: with tf.name_scope('Scale'): coef = tf.constant(np.float32(1.0 / total_grads), name='coef') coef = self.undo_loss_scaling(coef) grads = [(g * coef, v) for g, v in grads] # Check for overflows. with tf.name_scope('CheckOverflow'): grad_ok = tf.reduce_all( tf.stack([ tf.reduce_all(tf.is_finite(g)) for g, v in grads ])) # Update weights and adjust loss scaling. with tf.name_scope('UpdateWeights'): opt = self._dev_opt[dev] ls_var = self.get_loss_scaling_var(dev) if not self.use_loss_scaling: ops.append( tf.cond(grad_ok, lambda: opt.apply_gradients(grads), tf.no_op)) else: ops.append( tf.cond( grad_ok, lambda: tf.group( tf.assign_add(ls_var, self. loss_scaling_inc), opt.apply_gradients(grads)), lambda: tf.group( tf.assign_sub(ls_var, self. loss_scaling_dec)))) # Report statistics on the last device. if dev == devices[-1]: with tf.name_scope('Statistics'): ops.append( autosummary(self.id + '/learning_rate', self.learning_rate)) ops.append( autosummary(self.id + '/overflow_frequency', tf.where(grad_ok, 0, 1))) if self.use_loss_scaling: ops.append( autosummary(self.id + '/loss_scaling_log2', ls_var)) # Initialize variables and group everything into a single op. self.reset_optimizer_state() init_uninited_vars(list(self._dev_ls_var.values())) return tf.group(*ops, name='TrainingOp')
def calculate_size_bits(tensor): size = TFLITE_TYPE_SIZES[tensor.Type()] assert size > 0, "Strings not handled" return np.prod(tensor.ShapeAsNumpy()) * size
def multi_scale_semantic_validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): # SCALE_LIST = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0] SCALE_LIST = [0.8] batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] id = 0 transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ]) with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # print("meta center:"+str(meta['center'])+"meta scale:"+str(meta['scale'])) # print("data_loader input shape:" + str(input.shape)) num_images = input.size(0) assert 1 == input.size(0), 'Test batch size should be 1' input = input[0].cpu().numpy() input = np.transpose(input, (1, 2, 0)) # print("transposed input:" + str(input.shape)) base_size, center, scale = get_multi_scale_size( input, config.MODEL.IMAGE_SIZE[0], 1.0, min(SCALE_LIST)) # print("transformed base_size:"+str(base_size)+", center:"+str(center)+",scale:"+str(scale)) final_heatmaps = None for idx, s in enumerate(sorted(SCALE_LIST, reverse=True)): input_size = config.MODEL.IMAGE_SIZE[0] image_resized, center, scale = resize_align_multi_scale( input, input_size, s, min(SCALE_LIST)) image_resized = transforms(image_resized) image_resized = image_resized.unsqueeze(0).cuda() print("model input shape:" + str(image_resized.shape)) PROJECT2IMAGE = False heatmap = get_multi_scale_outputs(config, model, image_resized, config.TEST.FLIP_TEST, PROJECT2IMAGE, base_size, val_dataset) print("heatmap shape" + str(idx) + ":" + str(heatmap.shape)) if final_heatmaps is None: final_heatmaps = heatmap else: final_heatmaps += heatmap final_heatmaps = final_heatmaps / float(len(SCALE_LIST)) target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) # loss = criterion(final_heatmaps, target, target_weight) # measure accuracy and record loss # losses.update(loss.item(), num_images) # _, avg_acc, cnt, pred = accuracy(final_heatmaps.cpu().numpy(), # target.cpu().numpy()) # acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds( config, final_heatmaps.clone().cpu().numpy(), c, s) all_preds[id:id + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[id:id + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[id:id + num_images, 0:2] = c[:, 0:2] all_boxes[id:id + num_images, 2:4] = s[:, 0:2] all_boxes[id:id + num_images, 4] = np.prod(s * 200, 1) all_boxes[id:id + num_images, 5] = score image_path.extend(meta['image']) id += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) # save_debug_images(config, input, meta, target, pred*4, final_heatmaps, # prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def required_model_output_shape(action_space, model_config): return np.prod(action_space.shape)
def setup_class(cls): cls.test_point, cls.model, _ = models.beta_bernoulli(cls.shape) with cls.model: strace0 = cls.backend(cls.name) strace1 = cls.backend(cls.name) if not hasattr(cls, 'sampler_vars'): cls.sampler_vars = None cls.draws = 5 if cls.sampler_vars is not None: strace0.setup(cls.draws, chain=0, sampler_vars=cls.sampler_vars) strace1.setup(cls.draws, chain=1, sampler_vars=cls.sampler_vars) else: strace0.setup(cls.draws, chain=0) strace1.setup(cls.draws, chain=1) varnames = list(cls.test_point.keys()) shapes = { varname: value.shape for varname, value in cls.test_point.items() } dtypes = { varname: value.dtype for varname, value in cls.test_point.items() } cls.expected = {0: {}, 1: {}} for varname in varnames: mcmc_shape = (cls.draws, ) + shapes[varname] values = np.arange(cls.draws * np.prod(shapes[varname]), dtype=dtypes[varname]) cls.expected[0][varname] = values.reshape(mcmc_shape) cls.expected[1][varname] = values.reshape(mcmc_shape) * 100 if cls.sampler_vars is not None: cls.expected_stats = {0: [], 1: []} for vars in cls.sampler_vars: stats = {} cls.expected_stats[0].append(stats) cls.expected_stats[1].append(stats) for key, dtype in vars.items(): if dtype == np.bool: stats[key] = np.zeros(cls.draws, dtype=dtype) else: stats[key] = np.arange(cls.draws, dtype=dtype) for idx in range(cls.draws): point0 = { varname: cls.expected[0][varname][idx, ...] for varname in varnames } point1 = { varname: cls.expected[1][varname][idx, ...] for varname in varnames } if cls.sampler_vars is not None: stats1 = [ dict((key, val[idx]) for key, val in stats.items()) for stats in cls.expected_stats[0] ] stats2 = [ dict((key, val[idx]) for key, val in stats.items()) for stats in cls.expected_stats[1] ] strace0.record(point=point0, sampler_stats=stats1) strace1.record(point=point1, sampler_stats=stats2) else: strace0.record(point=point0) strace1.record(point=point1) strace0.close() strace1.close() cls.mtrace = base.MultiTrace([strace0, strace1]) cls.stat_dtypes = {} cls.stats_counts = collections.Counter() for stats in cls.sampler_vars or []: cls.stat_dtypes.update(stats) cls.stats_counts.update(stats.keys())
def validate(config, val_loader, val_dataset, model, criterion, output_dir, tb_log_dir, writer_dict=None): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # switch to evaluate mode model.eval() num_samples = len(val_dataset) all_preds = np.zeros((num_samples, config.MODEL.NUM_JOINTS, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for i, (input, target, target_weight, meta) in enumerate(val_loader): # compute output outputs = model(input) if isinstance(outputs, list): output = outputs[-1] else: output = outputs if config.TEST.FLIP_TEST: # this part is ugly, because pytorch has not supported negative index # input_flipped = model(input[:, :, :, ::-1]) input_flipped = np.flip(input.cpu().numpy(), 3).copy() input_flipped = torch.from_numpy(input_flipped).cuda() outputs_flipped = model(input_flipped) if isinstance(outputs_flipped, list): output_flipped = outputs_flipped[-1] else: output_flipped = outputs_flipped output_flipped = flip_back(output_flipped.cpu().numpy(), val_dataset.flip_pairs) output_flipped = torch.from_numpy(output_flipped.copy()).cuda() # feature is not aligned, shift flipped heatmap for higher accuracy if config.TEST.SHIFT_HEATMAP: output_flipped[:, :, :, 1:] = \ output_flipped.clone()[:, :, :, 0:-1] output = (output + output_flipped) * 0.5 target = target.cuda(non_blocking=True) target_weight = target_weight.cuda(non_blocking=True) loss = criterion(output, target, target_weight) num_images = input.size(0) # measure accuracy and record loss losses.update(loss.item(), num_images) _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(), target.cpu().numpy()) acc.update(avg_acc, cnt) # measure elapsed time batch_time.update(time.time() - end) end = time.time() c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds, maxvals = get_final_preds(config, output.clone().cpu().numpy(), c, s) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images if i % config.PRINT_FREQ == 0: msg = 'Test: [{0}/{1}]\t' \ 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, acc=acc) logger.info(msg) prefix = '{}_{}'.format(os.path.join(output_dir, 'val'), i) save_debug_images(config, input, meta, target, pred * 4, output, prefix) name_values, perf_indicator = val_dataset.evaluate( config, all_preds, output_dir, all_boxes, image_path, filenames, imgnums) model_name = config.MODEL.NAME if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) if writer_dict: writer = writer_dict['writer'] global_steps = writer_dict['valid_global_steps'] writer.add_scalar('valid_loss', losses.avg, global_steps) writer.add_scalar('valid_acc', acc.avg, global_steps) if isinstance(name_values, list): for name_value in name_values: writer.add_scalars('valid', dict(name_value), global_steps) else: writer.add_scalars('valid', dict(name_values), global_steps) writer_dict['valid_global_steps'] = global_steps + 1 return perf_indicator
def preprocess(x_or_y): x_or_y = x_or_y.astype('float32') / 255. x_or_y = x_or_y.reshape((len(x_or_y), np.prod(x_or_y.shape[1:]))) return x_or_y
# Task # # You are given a 2-D array with dimensions N X M. # Your task is to perform the sum tool over axis 0 and then find the product of that result. # Input Format # # The first line of input contains space separated values of N and M. # The next N lines contains M space separated integers. import numpy as np if __name__ == '__main__': n, m = map(int, input().split()) array_2d = np.array([input().split() for _ in range(n)], int) sum_per_col = np.sum(array_2d, axis=0) final_prod = np.prod(sum_per_col) print(final_prod)
def gen_complex(shape): n = np.prod(shape) re = np.random.uniform(size=n) im = np.random.uniform(size=n) ret = (re + im * 1j).reshape(shape) return ret
def size(self: Any) -> int: # cast to int so that shape = () gives size = 1 return int(np.prod(self.shape))
def gen(shape): n = np.prod(shape) re = np.random.uniform(size=n) im = np.random.uniform(size=n) return (re + im * 1j).reshape(shape)
}) return X_latent[:, 0, :] def pred_proba(self, X0, X1, X2): X0 = preprocess(X0) X1 = preprocess(X1) X2 = preprocess(X2) logits = self.sess.run(self.d_test, feed_dict={ self.x0: X0, self.x1: X1, self.x2: X2 }).flatten() return 1. / (1. + np.exp(-logits)) if __name__ == '__main__': latent_dim = [2, 2, 2] noise_dim = [10, 0, 0] n_points = [64, 64, 64] bezier_degree = [31, None, None] model = Model(latent_dim, noise_dim, n_points, bezier_degree) model.restore(save_dir='../results/SEoEi/infogan/10000/') n_vars = 0 for v in tf.global_variables(): n_vars += np.prod(v.get_shape().as_list()) print(n_vars)
def compile_layer(self, conv_tiling, conv_out_tensor, pu_ops, simd_lanes=4): """ Compiler for PU layers """ pool_pad = ((0, 0), (0, 0), (0, 0), (0, 0)) for op in pu_ops: if isinstance(op, MaxPooling): pool_pad = op.pad pool_pad_h_t = pool_pad[1][0] pool_pad_h_b = pool_pad[1][1] pool_pad_w_l = pool_pad[2][0] pool_pad_w_r = pool_pad[2][1] pool_pad_h = pool_pad_h_t + pool_pad_h_b pool_pad_w = pool_pad_w_l + pool_pad_w_r if len(pu_ops) > 0: self.fpga_manager.alloc(pu_ops[-1].output_tensors) for op in pu_ops[:-1]: if isinstance(op, BatchNorm): self.fpga_manager.alloc(op.mean) self.fpga_manager.alloc(op.scale) # get tile size b = conv_tiling['B/b'][1] oc = conv_tiling['OC/oc'][1] oh = conv_tiling['OH/oh'][1] ow = conv_tiling['OW/ow'][1] # get input tensor size B, OH, OW, OC = conv_out_tensor.shape # initialize pooled size pool_ow, pool_oh = ow, oh pool_kw = 1 pool_kh = 1 pool_sh = 1 pool_sw = 1 P_OW, P_OH = OH, OW for op in pu_ops: if isinstance(op, MaxPooling): P_OW = op.output_tensors.shape[-2] P_OH = op.output_tensors.shape[-3] pool_kh = op.pooling_kernel[1] pool_kw = op.pooling_kernel[2] pool_sh = op.stride[1] pool_sw = op.stride[2] pool_ow = (ow - pool_kw) // pool_sw + 1 pool_oh = (oh - pool_kh) // pool_sh + 1 pooled_output_strides = { 'IC/ic': (0, 1), 'OC/oc': (3, 1), 'B/b': (0, 1), 'OH/oh': (1, 1), 'OW/ow': (2, 1), 'KH/kh': (0, 0), 'KW/kw': (0, 0) } pu_inst_list = [None] conv_tile_shape = (b, oh, ow, oc) pool_tile_shape = (b, pool_oh, pool_ow, oc) pre_pool_ops = [] pool_op = None post_pool_ops = [] pre_pool = True ld0_required = False ld1_required = False bn_pre_pool = False bn_mean_addr = None bn_scale_addr = None for op in pu_ops: if isinstance(op, BatchNorm): ld0_required = True ld1_required = True if pre_pool: bn_pre_pool = True bn_mean_addr = op.mean.fpga_addr bn_scale_addr = op.scale.fpga_addr if isinstance(op, MaxPooling): pool_op = op pre_pool = False else: if pre_pool: pre_pool_ops.append(op) else: post_pool_ops.append(op) if len(pu_ops) > 0: t_out = pu_ops[-1].output_tensors else: t_out = conv_out_tensor t_out_addr = t_out.fpga_addr pad_offset = 0 for i in range(len(t_out.shape)): pad_offset += t_out.fpga_pad[i][0] * np.prod( t_out.fpga_shape[i + 1:]) pad_offset = int(pad_offset * t_out.dtype.bits / 8) t_out_addr += pad_offset pu_inst_list.append(BaseAddressInstruction(0, 0, 0)) pu_inst_list.append(BaseAddressInstruction(1, 0, t_out_addr)) pu_inst_list.append(BaseAddressInstruction(1, 1, t_out_addr)) if ld0_required: pu_inst_list.append(BaseAddressInstruction(2, 0, bn_mean_addr)) pu_inst_list.append(BaseAddressInstruction(2, 1, bn_mean_addr)) if ld1_required: pu_inst_list.append(BaseAddressInstruction(3, 0, bn_scale_addr)) pu_inst_list.append(BaseAddressInstruction(3, 1, bn_scale_addr)) pu_inst_list.append(LoopInstruction(0, 0, pool_kw - 1)) pu_inst_list.append(GenAddrLowInstruction(0, 0, 0, oc)) pu_inst_list.append(LoopInstruction(0, 0, pool_kh - 1)) pu_inst_list.append(GenAddrLowInstruction(0, 0, 0, oc * ow)) pu_inst_list.append(LoopInstruction(0, 0, pool_ow - 1)) pu_inst_list.append(GenAddrLowInstruction(0, 0, 0, oc * pool_sw)) pu_inst_list.append(LoopInstruction(0, 0, pool_oh - 1)) pu_inst_list.append(GenAddrLowInstruction(0, 0, 0, oc * pool_sh * ow)) pu_inst_list.append(LoopInstruction(0, 0, oc - 1)) pu_inst_list.append(GenAddrLowInstruction(0, 0, 0, 1)) pu_inst_list.append(LoopInstruction(0, 0, b - 1)) pu_inst_list.append(GenAddrLowInstruction(0, 0, 0, oc * oh * ow)) if ld0_required: pu_inst_list.append(LDMemInstruction(2, 32, 0, 0)) if ld1_required: pu_inst_list.append(LDMemInstruction(3, 32, 0, 0)) _pool_tile = { 'B/b': b, 'OC/oc': oc, 'OH/oh': pool_oh, 'OW/ow': pool_ow } base_addr_loops = 0 for loop, it in conv_tiling.items(): if it[0] > 1 and loop in _pool_tile: if len(pu_ops) > 0: P_B, P_OH, P_OW, P_OC = op.output_tensors.fpga_shape else: P_B, P_OH, P_OW, P_OC = conv_out_tensor.fpga_shape dim, dim_stride = pooled_output_strides[loop] shape = (P_B, P_OH, P_OW, int(math.ceil(float(P_OC) / simd_lanes))) pu_inst_list.append(LoopInstruction(5, 5, it[0] - 1)) stride = int( np.prod(shape[dim + 1:]) * dim_stride * 2 * simd_lanes) * _pool_tile[loop] if stride > (1 << 15): pu_inst_list.append(GenAddrHighInstruction( 5, 5, 0, stride)) pu_inst_list.append(GenAddrLowInstruction(5, 5, 0, stride)) base_addr_loops += 1 if loop == 'OC/oc' and ld0_required: stride = 2 * simd_lanes * oc else: stride = 0 assert stride < (1 << 15) pu_inst_list.append(GenAddrLowInstruction(6, 6, 0, stride)) if loop == 'OC/oc' and ld1_required: stride = 2 * simd_lanes * oc else: stride = 0 assert stride < (1 << 15) pu_inst_list.append(GenAddrLowInstruction(7, 7, 0, stride)) if base_addr_loops == 0: pu_inst_list.append(LoopInstruction(5, 5, 0)) pu_inst_list.append(GenAddrLowInstruction(5, 5, 0, 0)) pu_inst_list.append(GenAddrLowInstruction(6, 6, 0, 0)) pu_inst_list.append(GenAddrLowInstruction(7, 7, 0, 0)) if len(pu_ops) > 0: P_B, P_OH, P_OW, P_OC = pu_ops[-1].output_tensors.fpga_shape else: P_B, P_OH, P_OW, P_OC = conv_out_tensor.fpga_shape P_OC = int(math.ceil(P_OC / float(simd_lanes))) pu_inst_list.append(LoopInstruction(1, 1, pool_ow - 1)) if P_OC > (1 << 15): pu_inst_list.append(GenAddrHighInstruction(1, 1, 0, P_OC)) pu_inst_list.append(GenAddrLowInstruction(1, 1, 0, P_OC)) pu_inst_list.append(LoopInstruction(1, 1, pool_oh - 1)) if P_OC * P_OW > (1 << 15): pu_inst_list.append(GenAddrHighInstruction(1, 1, 0, P_OC * P_OW)) pu_inst_list.append(GenAddrLowInstruction(1, 1, 0, P_OC * P_OW)) pu_inst_list.append(LoopInstruction(1, 1, oc - 1)) pu_inst_list.append(GenAddrLowInstruction(1, 1, 0, 1)) pu_inst_list.append(LoopInstruction(1, 1, b - 1)) if P_OC * P_OW * P_OH > (1 << 15): pu_inst_list.append( GenAddrHighInstruction(1, 1, 0, P_OC * P_OW * P_OH)) pu_inst_list.append(GenAddrLowInstruction(1, 1, 0, P_OC * P_OH * P_OW)) if ld0_required: # if bn_pre_pool: # pu_inst_list.append(LoopInstruction(2, 2, pool_kw-1)) # pu_inst_list.append(GenAddrLowInstruction(2, 2, 0, 0)) # pu_inst_list.append(LoopInstruction(2, 2, pool_kh-1)) # pu_inst_list.append(GenAddrLowInstruction(2, 2, 0, 0)) pu_inst_list.append(LoopInstruction(2, 2, pool_ow - 1)) pu_inst_list.append(GenAddrLowInstruction(2, 2, 0, 0)) pu_inst_list.append(LoopInstruction(2, 2, pool_oh - 1)) pu_inst_list.append(GenAddrLowInstruction(2, 2, 0, 0)) pu_inst_list.append(LoopInstruction(2, 2, oc - 1)) pu_inst_list.append(GenAddrLowInstruction(2, 2, 0, 1)) pu_inst_list.append(LoopInstruction(2, 2, b - 1)) pu_inst_list.append(GenAddrLowInstruction(2, 2, 0, 0)) if ld1_required: # if bn_pre_pool: # pu_inst_list.append(LoopInstruction(3, 3, pool_kw-1)) # pu_inst_list.append(GenAddrLowInstruction(3, 3, 0, 0)) # pu_inst_list.append(LoopInstruction(3, 3, pool_kh-1)) # pu_inst_list.append(GenAddrLowInstruction(3, 3, 0, 0)) pu_inst_list.append(LoopInstruction(3, 3, pool_ow - 1)) pu_inst_list.append(GenAddrLowInstruction(3, 3, 0, 0)) pu_inst_list.append(LoopInstruction(3, 3, pool_oh - 1)) pu_inst_list.append(GenAddrLowInstruction(3, 3, 0, 0)) pu_inst_list.append(LoopInstruction(3, 3, oc - 1)) pu_inst_list.append(GenAddrLowInstruction(3, 3, 0, 1)) pu_inst_list.append(LoopInstruction(3, 3, b - 1)) pu_inst_list.append(GenAddrLowInstruction(3, 3, 0, 0)) compute_instructions = [] dest_reg = None pool_reg = None bn_scale_reg = None bn_mean_reg = None for idx in range(pool_kw * pool_kh): if dest_reg is None: dest_reg = self.acquire_reg() # output_frac_bits = pu_ops[-1].output_tensors.dtype.frac_bits # input_frac_bits = conv_out_tensor.dtype.frac_bits # bits = input_frac_bits - output_frac_bits # compute_instructions.append(ComputeRshiftImm(src0_addr=8, imm=bits, dest_addr=dest_reg)) compute_instructions.append( ComputeNop(src0_addr=8, dest_addr=dest_reg)) for op in pre_pool_ops: if isinstance(op, LeakyReLU): val = op.scalar.data # assuming 16-bits bits = 16 val = int(float(val) * (1 << bits)) assert val < (1 << bits) - 1 and val >= -(1 << bits) tmp_reg = self.acquire_reg() compute_instructions.append( ComputeMulImm(src0_addr=dest_reg, imm=val, dest_addr=tmp_reg)) compute_instructions.append( ComputeRshiftImm(src0_addr=tmp_reg, imm=bits, dest_addr=tmp_reg)) compute_instructions.append( ComputeMax(src0_addr=dest_reg, src1_addr=tmp_reg, dest_addr=dest_reg)) self.release_reg(tmp_reg) elif isinstance(op, BatchNorm): if bn_scale_reg is None: bn_scale_reg = self.acquire_reg() compute_instructions.append( ComputeNop(src0_addr=9, dest_addr=bn_scale_reg)) assert bn_mean_reg is None bn_mean_reg = self.acquire_reg() compute_instructions.append( ComputeNop(src0_addr=10, dest_addr=bn_mean_reg)) if isinstance( compute_instructions[-1], ComputeNop ) and compute_instructions[-1].dest_addr == dest_reg: src_addr = compute_instructions[-1].src0_addr compute_instructions[-1] = ComputeSub( src0_addr=src_addr, src1_addr=bn_scale_reg, dest_addr=dest_reg) else: compute_instructions.append( ComputeSub(src0_addr=dest_reg, src1_addr=bn_scale_reg, dest_addr=dest_reg)) compute_instructions.append( ComputeRshiftImm(src0_addr=dest_reg, imm=0, dest_addr=dest_reg)) compute_instructions.append( ComputeMul(src0_addr=dest_reg, src1_addr=bn_mean_reg, dest_addr=dest_reg)) elif isinstance(op, TypeCastOp): shift = op.data.dtype.frac_bits - op.output_tensors.dtype.frac_bits compute_instructions.append( ComputeRshiftImm(src0_addr=dest_reg, imm=shift, dest_addr=dest_reg)) else: raise ValueError('Not implemented') if pool_reg is None: pool_reg = dest_reg dest_reg = None else: assert dest_reg is not None assert pool_reg is not None if idx != (pool_kw * pool_kh - 1) or len(post_pool_ops) > 0: compute_instructions.append( ComputeMax(src0_addr=dest_reg, src1_addr=pool_reg, dest_addr=pool_reg)) else: compute_instructions.append( ComputeMax(src0_addr=dest_reg, src1_addr=pool_reg, dest_addr=8)) pool_reg = self.release_reg(pool_reg) dest_reg = self.release_reg(dest_reg) if ld0_required or ld1_required: assert bn_scale_reg is not None bn_scale_reg = self.release_reg(bn_scale_reg) assert bn_mean_reg is not None bn_mean_reg = self.release_reg(bn_mean_reg) # Post pool ops assert dest_reg is None dest_reg = pool_reg for op in post_pool_ops: if isinstance(op, LeakyReLU): val = op.scalar.data # assuming 16-bits bits = 16 val = int(float(val) * (1 << bits)) assert val < (1 << bits) - 1 and val >= -(1 << bits) tmp_reg = self.acquire_reg() compute_instructions.append( ComputeMulImm(src0_addr=dest_reg, imm=val, dest_addr=tmp_reg)) compute_instructions.append( ComputeRshiftImm(src0_addr=tmp_reg, imm=bits, dest_addr=tmp_reg)) compute_instructions.append( ComputeMax(src0_addr=dest_reg, src1_addr=tmp_reg, dest_addr=dest_reg)) self.release_reg(tmp_reg) elif isinstance(op, BatchNorm): compute_instructions.append( ComputeSub(src0_addr=dest_reg, src1_addr=9, dest_addr=dest_reg)) compute_instructions.append( ComputeRshiftImm(src0_addr=dest_reg, imm=0, dest_addr=dest_reg)) compute_instructions.append( ComputeMul(src0_addr=dest_reg, src1_addr=10, dest_addr=dest_reg)) elif isinstance(op, TypeCastOp): shift = op.data.dtype.frac_bits - op.output_tensors.dtype.frac_bits compute_instructions.append( ComputeRshiftImm(src0_addr=dest_reg, imm=shift, dest_addr=dest_reg)) else: raise ValueError('Not implemented') if dest_reg is not None: compute_instructions.append( ComputeNop(src0_addr=dest_reg, dest_addr=8)) dest_reg = self.release_reg(dest_reg) for inst in compute_instructions: pu_inst_list.append(inst) num_repeats = b * pool_ow * pool_oh * oc pu_inst_list.append(PUBlockRepeat(num_repeats)) for i in self.rf: assert i == 0 inst_array = [] if len(pu_inst_list) > 1: pu_inst_list[0] = PUBlockStart(len(pu_inst_list) - 2) for i in pu_inst_list: inst_array.append(i.get_binary()) return inst_array else: return None
def gen_real(shape): n = np.prod(shape) re = np.random.uniform(size=n) ret = re.reshape(shape) return ret
x1,z1, r1 = 6., -4., 3. ln_sigback = -5. ln_sigc = -3. ln_sigr = -7. noisemean = 0. noisevar = 0.0 overburden_extent = 0. ln_over = -4. #m = (lnsig_background)*np.ones(mesh.nC); #mu =np.ones(mesh.nC); mtrue = ln_sigback*np.ones(mesh.nC) + norm(noisemean,noisevar).rvs(mesh.nC) overb = (mesh.gridCC[:,1] >-overburden_extent) & (mesh.gridCC[:,1]<=0) mtrue[overb] = ln_over*np.ones_like(mtrue[overb])+ norm(noisemean,noisevar).rvs(np.prod((mtrue[overb]).shape)) csph = (np.sqrt((mesh.gridCC[:,1]-z0)**2.+(mesh.gridCC[:,0]-x0)**2.))< r0 mtrue[csph] = ln_sigc*np.ones_like(mtrue[csph]) + norm(noisemean,noisevar).rvs(np.prod((mtrue[csph]).shape)) #Define the sphere limit rsph = (np.sqrt((mesh.gridCC[:,1]-z1)**2.+(mesh.gridCC[:,0]-x1)**2.))< r1 mtrue[rsph] = ln_sigr*np.ones_like(mtrue[rsph]) + norm(noisemean,noisevar).rvs(np.prod((mtrue[rsph]).shape)) mtrue = Utils.mkvc(mtrue); mesh.plotGrid() plt.gca().set_xlim([-10,10]) plt.gca().set_ylim([-10,0]) xyzlim = np.r_[[[-10.,10.],[-10.,1.]]]
def _validate_x_shape(x_shape): x_shape = tuple([int(v) for v in x_shape]) if np.prod(x_shape) != 784: raise ValueError('`x_shape` does not product to 784: {!r}'. format(x_shape)) return x_shape
def MakeNdarray(tensor): """Create a numpy ndarray from a tensor. Create a numpy ndarray with the same shape and data as the tensor. Args: tensor: A TensorProto. Returns: A numpy array with the tensor contents. Raises: TypeError: if tensor has unsupported type. """ shape = [d.size for d in tensor.tensor_shape.dim] num_elements = np.prod(shape) tensor_dtype = dtypes.as_dtype(tensor.dtype) dtype = tensor_dtype.as_numpy_dtype if tensor.tensor_content: return np.fromstring(tensor.tensor_content, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.float16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 if len(tensor.half_val) == 1: tmp = np.array(tensor.half_val[0], dtype=np.uint16) tmp.dtype = np.float16 return np.repeat(tmp, num_elements).reshape(shape) else: tmp = np.fromiter(tensor.half_val, dtype=np.uint16) tmp.dtype = np.float16 return tmp.reshape(shape) elif tensor_dtype == dtypes.float32: if len(tensor.float_val) == 1: return np.repeat(np.array(tensor.float_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.float_val, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.float64: if len(tensor.double_val) == 1: return np.repeat(np.array(tensor.double_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.double_val, dtype=dtype).reshape(shape) elif tensor_dtype in [dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16, dtypes.int8, dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16, dtypes.bfloat16]: if len(tensor.int_val) == 1: return np.repeat(np.array(tensor.int_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.int_val, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.int64: if len(tensor.int64_val) == 1: return np.repeat(np.array(tensor.int64_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.int64_val, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.string: if len(tensor.string_val) == 1: return np.repeat(np.array(tensor.string_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.array([x for x in tensor.string_val], dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.complex64: it = iter(tensor.scomplex_val) if len(tensor.scomplex_val) == 2: return np.repeat(np.array(complex(tensor.scomplex_val[0], tensor.scomplex_val[1]), dtype=dtype), num_elements).reshape(shape) else: return np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.complex128: it = iter(tensor.dcomplex_val) if len(tensor.dcomplex_val) == 2: return np.repeat(np.array(complex(tensor.dcomplex_val[0], tensor.dcomplex_val[1]), dtype=dtype), num_elements).reshape(shape) else: return np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.bool: if len(tensor.bool_val) == 1: return np.repeat(np.array(tensor.bool_val[0], dtype=dtype), num_elements).reshape(shape) else: return np.fromiter(tensor.bool_val, dtype=dtype).reshape(shape) else: raise TypeError("Unsupported tensor type: %s" % tensor.dtype)
def averageError(self, image0, image1): self.assertEqual(image0.shape, image1.shape) image0 = image0.astype(int) # Avoid overflow return np.abs(image0 - image1).sum() / np.prod(image0.shape)
def make_tensor_proto(values, dtype=None, shape=None): """Create a TensorProto. Args: values: Values to put in the TensorProto. dtype: Optional tensor_pb2 DataType value. shape: List of integers representing the dimensions of tensor. Returns: A TensorProto. Depending on the type, it may contain data in the "tensor_content" attribute, which is not directly useful to Python programs. To access the values you should convert the proto back to a numpy ndarray with tensor_util.MakeNdarray(proto). Raises: TypeError: if unsupported types are provided. ValueError: if arguments have inappropriate values. make_tensor_proto accepts "values" of a python scalar, a python list, a numpy ndarray, or a numpy scalar. If "values" is a python scalar or a python list, make_tensor_proto first convert it to numpy ndarray. If dtype is None, the conversion tries its best to infer the right numpy data type. Otherwise, the resulting numpy array has a compatible data type with the given dtype. In either case above, the numpy ndarray (either the caller provided or the auto converted) must have the compatible type with dtype. make_tensor_proto then converts the numpy array to a tensor proto. If "shape" is None, the resulting tensor proto represents the numpy array precisely. Otherwise, "shape" specifies the tensor's shape and the numpy array can not have more elements than what "shape" specifies. """ if dtype: dtype = dtypes.as_dtype(dtype) is_quantized = (dtype in [dtypes.qint8, dtypes.quint8, dtypes.qint16, dtypes.quint16, dtypes.qint32]) # We first convert value to a numpy array or scalar. if isinstance(values, (np.ndarray, np.generic)): if dtype: nparray = values.astype(dtype.as_numpy_dtype) else: nparray = values else: if values is None: raise ValueError("None values not supported.") # if dtype is provided, forces numpy array to be the type # provided if possible. np_dt = dtype.as_numpy_dtype if dtype else None if np.prod(shape) == 0: nparray = np.empty(shape, dtype=np_dt) else: _AssertCompatible(values, dtype) nparray = np.array(values, dtype=np_dt) # check to them. # We need to pass in quantized values as tuples, so don't apply the shape if (list(nparray.shape) != _GetDenseDimensions(values) and not is_quantized): raise ValueError("""Argument must be a dense tensor: %s""" """ - got shape %s, but wanted %s.""" % ( values, list(nparray.shape), _GetDenseDimensions(values))) # python/numpy default float type is float64. We prefer float32 instead. if (nparray.dtype == np.float64) and dtype is None: nparray = nparray.astype(np.float32) # python/numpy default int type is int64. We prefer int32 instead. elif (nparray.dtype == np.int64) and dtype is None: downcasted_array = nparray.astype(np.int32) # Do not down cast if it leads to precision loss. if np.array_equal(downcasted_array, nparray): nparray = downcasted_array # if dtype is provided, it must be compatible with what numpy # conversion says. numpy_dtype = dtypes.as_dtype(nparray.dtype) if numpy_dtype is None: raise TypeError("Unrecognized data type: %s" % nparray.dtype) # If dtype was specified and is a quantized type, we convert # numpy_dtype back into the quantized version. if is_quantized: numpy_dtype = dtype if dtype is not None and (not hasattr(dtype, "base_dtype") or dtype.base_dtype != numpy_dtype.base_dtype): raise TypeError("Incompatible types: %s vs. %s" % (dtype, nparray.dtype)) # If shape is not given, get the shape from the numpy array. if shape is None: shape = nparray.shape is_same_size = True shape_size = nparray.size else: shape = [int(dim) for dim in shape] shape_size = np.prod(shape) is_same_size = shape_size == nparray.size if nparray.size > shape_size: raise ValueError( "Too many elements provided. Needed at most %d, but received %d" % (shape_size, nparray.size)) tensor_proto = tensor_pb2.TensorProto( dtype=numpy_dtype.as_datatype_enum, tensor_shape=tensor_shape.as_shape(shape).as_proto()) if is_same_size and numpy_dtype in _TENSOR_CONTENT_TYPES and shape_size > 1: if nparray.size * nparray.itemsize >= (1 << 31): raise ValueError( "Cannot create a tensor proto whose content is larger than 2GB.") tensor_proto.tensor_content = nparray.tostring() return tensor_proto # If we were not given values as a numpy array, compute the proto_values # from the given values directly, to avoid numpy trimming nulls from the # strings. Since values could be a list of strings, or a multi-dimensional # list of lists that might or might not correspond to the given shape, # we flatten it conservatively. if numpy_dtype == dtypes.string and not isinstance(values, np.ndarray): proto_values = _FlattenToStrings(values) tensor_proto.string_val.extend([compat.as_bytes(x) for x in proto_values]) return tensor_proto # TensorFlow expects C order (a.k.a., eigen row major). proto_values = nparray.ravel() append_fn = GetNumpyAppendFn(proto_values.dtype) if append_fn is None: raise TypeError("Element type not supported in TensorProto: %s" % numpy_dtype.name) append_fn(tensor_proto, proto_values) return tensor_proto
def _ConstantValue(tensor): # TODO(touts): Support Variables? if not isinstance(tensor, ops.Tensor): raise TypeError("tensor is not a Tensor") if tensor.op.type == "Const": return MakeNdarray(tensor.op.get_attr("value")) elif tensor.op.type == "Shape": input_shape = tensor.op.inputs[0].get_shape() if input_shape.is_fully_defined(): return np.array([dim.value for dim in input_shape.dims], dtype=tensor.dtype.as_numpy_dtype) else: return None elif tensor.op.type == "Size": input_shape = tensor.op.inputs[0].get_shape() if input_shape.is_fully_defined(): return np.prod([dim.value for dim in input_shape.dims], dtype=np.int32) else: return None elif tensor.op.type == "Rank": input_shape = tensor.op.inputs[0].get_shape() if input_shape.ndims is not None: return input_shape.ndims else: return None elif tensor.op.type == "Range": start = constant_value(tensor.op.inputs[0]) if start is None: return None limit = constant_value(tensor.op.inputs[1]) if limit is None: return None delta = constant_value(tensor.op.inputs[2]) if delta is None: return None return np.arange(start, limit, delta, dtype=tensor.dtype.as_numpy_dtype) elif tensor.op.type == "Cast": pre_cast = constant_value(tensor.op.inputs[0]) if pre_cast is None: return None cast_dtype = dtypes.as_dtype(tensor.op.get_attr("DstT")) return pre_cast.astype(cast_dtype.as_numpy_dtype) elif tensor.op.type == "Concat": dim = constant_value(tensor.op.inputs[0]) if dim is None: return None values = [] for x in tensor.op.inputs[1:]: value = constant_value(x) if value is None: return None values.append(value) return np.concatenate(values, axis=dim) elif tensor.op.type == "Pack": values = [] for x in tensor.op.inputs: value = constant_value(x) if value is None: return None values.append(value) return np.array(values) else: return None