def build_model(deterministic=False): def control_loop(state, memory): positions, velocities, rot_matrices = state #sensor_values = engine.get_sensor_values(state=(positions, velocities, rot_matrices)) ALPHA = 1.0 image = engine.get_camera_image(EngineState(*state), CAMERA) controller["input"].input_var = image - 0.5 #for normalization if "recurrent" in controller: controller["memory"].input_var = memory memory = lasagne.layers.helper.get_output( controller["recurrent"], deterministic=deterministic) memory = mulgrad(memory, ALPHA) motor_signals = lasagne.layers.helper.get_output( controller["output"], deterministic=deterministic) positions, velocities, rot_matrices = mulgrad( positions, ALPHA), mulgrad(velocities, ALPHA), mulgrad(rot_matrices, ALPHA) newstate = engine.do_time_step(state=EngineState( positions, velocities, rot_matrices), motor_signals=motor_signals) newstate += (image, ) if "recurrent" in controller: newstate += (memory, ) return newstate # T.TensorConstant. Actively avoid Theano introducing broadcastable dimensions which might mask bugs. empty_image = (T.TensorConstant( T.ftensor4, data=np.zeros(shape=engine.get_camera_image_size(CAMERA), dtype='float32')), ) if "recurrent" in controller: empty_memory = (T.TensorConstant(T.fmatrix, data=np.zeros(shape=(BATCH_SIZE, MEMORY_SIZE), dtype='float32')), ) else: empty_memory = () # The scan which iterates over all time steps outputs, updates = theano.scan( fn=lambda a, b, c, imgs, m, *ns: control_loop(state=(a, b, c), memory=m), outputs_info=get_randomized_initial_state() + empty_image + empty_memory, n_steps=int(math.ceil(total_time / engine.DT)), strict=True, non_sequences=get_shared_variables()) return outputs, updates
def get_initial_state(self): # T.TensorConstant. Actively avoid Theano introducing broadcastable dimensions which might mask bugs. return EngineState( positions=T.TensorConstant(type=T.ftensor3, data=self.initial_positions, name='initial positions'), velocities=T.TensorConstant(type=T.ftensor3, data=self.initial_velocities, name='initial velocities'), rotations=T.TensorConstant(type=T.ftensor4, data=self.initial_rotations, name='initial rotations'))
def _kernel_matching(q1_x, q1_mu, xt_x, xt_mu, radius) : """ Given two measures q1 and xt represented by locations/weights arrays, outputs a kernel-fidelity term and an empty 'info' array. """ K_qq, K_qx, K_xx = _cross_kernels(q1_x, xt_x, radius) q1_mu = q1_mu.dimshuffle(0,'x') # column xt_mu = xt_mu.dimshuffle(0,'x') # column cost = .5 * ( T.sum(K_qq * q1_mu.dot(q1_mu.T)) \ + T.sum(K_xx * xt_mu.dot(xt_mu.T)) \ -2*T.sum(K_qx * q1_mu.dot(xt_mu.T)) ) # Info = the 2D graph of the blurred distance function res = 10 ; ticks = np.linspace( 0, 1, res + 1)[:-1] + 1/(2*res) X,Y = np.meshgrid( ticks, ticks ) points = T.TensorConstant( T.TensorType( config.floatX, [False,False] ) , np.vstack( (X.ravel(), Y.ravel()) ).T.astype(config.floatX) ) info = _k( points, q1_x , radius ).dot(q1_mu) \ - _k( points, xt_x , radius ).dot(xt_mu) return [cost , info.reshape( (res,res) ) ]
total_time = 8 # seconds BATCH_SIZE = 1 MEMORY_SIZE = 128 CAMERA = "front_camera" engine.compile(batch_size=BATCH_SIZE) print "#batch:", BATCH_SIZE print "#memory:", MEMORY_SIZE print "#sensors:", engine.num_sensors print "#motors:", engine.num_motors print "#cameras:", engine.num_cameras #engine.randomizeInitialState(rotate_around="spine") # step 2: build the model, controller and engine for simulation target = T.TensorConstant(T.fvector,data=np.array([0,0,0.9],dtype='float32')) def build_objectives(states_list): positions, velocities, rotations = states_list[:3] return T.mean((positions[:,:,top_id,:]-target[None,None,:]).norm(2,axis=2),axis=0) def build_objectives_test(states_list): positions, velocities, rotations = states_list[:3] return T.mean((positions[700:,:,top_id,:]-target[None,None,:]).norm(2,axis=2),axis=0) srng = RandomStreams(seed=317070) def get_randomized_initial_state(): state = engine.get_initial_state() positions, velocities, rotations = state if BATCH_SIZE>1:
def get_camera_image(self, state, camera_name): # get camera image # do ray-sphere and ray-plane intersections # find cube by using 6 planes, throwing away the irrelevant intersections # step 1: generate list of rays (1 per pixel) # focal_point (3,) # ray_dir (px_hor, px_ver, 3) # ray_offset (px_hor, px_ver, 3) camera = self.cameras[camera_name] positions, velocities, rotations = state.positions, state.velocities, state.rotations ray_dir = camera["ray_direction"] ray_offset = camera["ray_offset"] parent = camera["parent"] px_ver = ray_dir.shape[0] px_hor = ray_dir.shape[1] # WORKPOINT if parent: pid = self.objects[parent] # rotate and move the camera according to its parent ray_dir = theano_convert_model_to_world_coordinate_no_bias( ray_dir, rotations[:, pid, :, :]) ray_offset = theano_convert_model_to_world_coordinate( ray_offset, rotations[:, pid, :, :], positions[:, pid, :]) else: ray_dir = ray_dir[None, :, :, :] ray_offset = ray_offset[None, :, :, :] # step 2a: intersect the rays with all the spheres has_spheres = (0 != len(self.sphere_parent)) #s_relevant = np.ones(shape=(self.batch_size, px_ver, px_hor, self.sphere_parent.shape[0])) if has_spheres: s_pos_vectors = positions[:, None, None, self.sphere_parent, :] s_rot_matrices = rotations[:, self.sphere_parent, :, :] L = s_pos_vectors - ray_offset[:, :, :, None, :] tca = T.sum(L * ray_dir[:, :, :, None, :], axis=4) # L.dotProduct(ray_dir) #// if (tca < 0) return false; d2 = T.sum(L * L, axis=4) - tca * tca r2 = self.sphere_radius**2 #if (d2 > radius2) return false; s_relevant = (tca > 0) * (d2[:, :, :, :] < r2[None, None, None, :]) float_s_relevant = T.cast(s_relevant, 'float32') thc = T.sqrt( (r2[None, None, None, :] - float_s_relevant * d2[:, :, :, :])) s_t0 = tca - thc Phit = ray_offset[:, :, :, None, :] + s_t0[:, :, :, :, None] * ray_dir[:, :, :, None, :] N = (Phit - s_pos_vectors) / self.sphere_radius[None, None, None, :, None] N = theano_convert_world_to_model_coordinate_no_bias( N, s_rot_matrices[:, None, None, :, :, :]) # tex_y en tex_x in [-1,1] s_tex_x = T.arctan2(N[:, :, :, :, 2], N[:, :, :, :, 0]) / np.pi s_tex_y = -1. + (2. - eps) * T.arccos( T.clip(N[:, :, :, :, 1], -1.0, 1.0)) / np.pi # step 2b: intersect the rays with the cubes (cubes=planes) # step 2c: intersect the rays with the planes has_faces = (0 != len(self.face_parent)) if has_faces: hasparent = [ i for i, par in enumerate(self.face_parent) if par is not None ] hasnoparent = [ i for i, par in enumerate(self.face_parent) if par is None ] parents = [ parent for parent in self.face_parent if parent is not None ] static_fn = numpy_repeat_new_axis(self.face_normal[hasnoparent, :], self.batch_size) static_fp = numpy_repeat_new_axis(self.face_point[hasnoparent, :], self.batch_size) static_ftx = numpy_repeat_new_axis( self.face_texture_x[hasnoparent, :], self.batch_size) static_fty = numpy_repeat_new_axis( self.face_texture_y[hasnoparent, :], self.batch_size) if hasparent: fn = theano_convert_model_to_world_coordinate_no_bias( self.face_normal[None, hasparent, :], rotations[:, parents, :, :]) fn = T.concatenate([static_fn, fn], axis=1) fp = theano_convert_model_to_world_coordinate( self.face_point[None, hasparent, :], rotations[:, parents, :, :], positions[:, parents, :]) fp = T.concatenate([static_fp, fp], axis=1) ftx = theano_convert_model_to_world_coordinate_no_bias( self.face_texture_x[None, hasparent, :], rotations[:, parents, :, :]) ftx = T.concatenate([static_ftx, ftx], axis=1) fty = theano_convert_model_to_world_coordinate_no_bias( self.face_texture_y[None, hasparent, :], rotations[:, parents, :, :]) fty = T.concatenate([static_fty, fty], axis=1) else: fn = static_fn fp = static_fp ftx = static_ftx fty = static_fty # reshuffle the face_texture_indexes to match the reshuffling we did above face_indices = hasnoparent + hasparent face_texture_index = self.face_texture_index[face_indices] face_texture_limited = self.face_texture_limited[face_indices] face_colors = self.face_colors[face_indices, :] denom = T.sum(fn[:, None, None, :, :] * ray_dir[:, :, :, None, :], axis=4) p0l0 = fp[:, None, None, :, :] - ray_offset[:, :, :, None, :] p_t0 = T.sum(p0l0 * fn[:, None, None, :, :], axis=4) / (denom + 1e-9) Phit = ray_offset[:, :, :, None, :] + p_t0[:, :, :, :, None] * ray_dir[:, :, :, None, :] pd = Phit - fp[:, None, None, :, :] p_tex_x = T.sum(ftx[:, None, None, :, :] * pd, axis=4) p_tex_y = T.sum(fty[:, None, None, :, :] * pd, axis=4) # the following only on limited textures p_relevant = (p_t0 > 0) * (1 - (1 - (-1 < p_tex_x) * (p_tex_x < 1) * (-1 < p_tex_y) * (p_tex_y < 1)) * face_texture_limited) p_tex_x = ((p_tex_x + 1) % 2.) - 1 p_tex_y = ((p_tex_y + 1) % 2.) - 1 # step 3: find the closest point of intersection for all objects (z-culling) if has_spheres and has_faces: relevant = T.concatenate([s_relevant, p_relevant], axis=3).astype('float32') tex_x = T.concatenate([s_tex_x, p_tex_x], axis=3) tex_y = T.concatenate([s_tex_y, p_tex_y], axis=3) tex_t = np.concatenate( [self.sphere_texture_index, face_texture_index], axis=0) t = T.concatenate([s_t0, p_t0], axis=3) elif has_spheres: relevant = s_relevant.astype('float32') tex_x = s_tex_x tex_y = s_tex_y tex_t = self.sphere_texture_index t = s_t0 elif has_faces: relevant = p_relevant.astype('float32') tex_x = p_tex_x tex_y = p_tex_y tex_t = face_texture_index t = p_t0 else: raise NotImplementedError() mint = T.min(t * relevant + (1. - relevant) * np.float32(1e9), axis=3) relevant *= (t <= mint[:, :, :, None]) #only use the closest object # step 4: go into the object's texture and get the corresponding value (see image transform) x_size, y_size = self.textures.shape[1] - 1, self.textures.shape[2] - 1 tex_x = (tex_x + 1) * x_size / 2. tex_y = (tex_y + 1) * y_size / 2. x_idx = T.floor(tex_x) x_wgh = tex_x - x_idx y_idx = T.floor(tex_y) y_wgh = tex_y - y_idx # if the following are -2,147,483,648 or -9,223,372,036,854,775,808, you have NaN's x_idx, y_idx = T.cast(x_idx, 'int64'), T.cast(y_idx, 'int64') textures = T.TensorConstant(type=T.ftensor4, data=self.textures.astype('float32'), name='textures') sample= ( x_wgh * y_wgh )[:,:,:,:,None] * textures[tex_t[None,None,None,:],x_idx+1,y_idx+1,:] + \ ( x_wgh * (1-y_wgh))[:,:,:,:,None] * textures[tex_t[None,None,None,:],x_idx+1,y_idx ,:] + \ ((1-x_wgh) * y_wgh )[:,:,:,:,None] * textures[tex_t[None,None,None,:],x_idx ,y_idx+1,:] + \ ((1-x_wgh) * (1-y_wgh))[:,:,:,:,None] * textures[tex_t[None,None,None,:],x_idx ,y_idx ,:] # multiply with color of object colors = np.concatenate([self.sphere_colors, face_colors], axis=0) if np.min(colors) != 1.: # if the colors are actually used sample = colors[None, None, None, :, :] * sample # step 5: return this value image = T.sum(sample * relevant[:, :, :, :, None], axis=3) background_color = camera["background_color"] if background_color is not None: # find the rays for which no object was relevant. Make them background color background = background_color[None, None, None, :] * ( 1 - T.max(relevant[:, :, :, :], axis=3))[:, :, :, None] image += background # do a dimshuffle to closer match the deep learning conventions image = T.unbroadcast(image, 0, 1, 2, 3).dimshuffle(0, 3, 2, 1) return image
def _infer_ndim_bcast(ndim, shape, *args): """ Infer the number of dimensions from the shape or the other arguments. :rtype: (int, variable, tuple) triple, where the variable is an integer vector, and the tuple contains Booleans. :returns: the first element returned is the inferred number of dimensions. The second element is the shape inferred (combining symbolic and constant informations from shape and args). The third element is a broadcasting pattern corresponding to that shape. """ # Find the minimum value of ndim required by the *args if args: args_ndim = max(arg.ndim for arg in args) else: args_ndim = 0 # there is a convention that -1 means the corresponding shape of a # potentially-broadcasted symbolic arg if (isinstance(shape, (tuple, list)) and numpy.all(numpy.asarray(shape) >= 0)): bcast = [(s == 1) for s in shape] v_shape = tensor.TensorConstant(type=tensor.lvector, data=theano._asarray(shape, dtype='int64')) shape_ndim = len(shape) if ndim is None: ndim = shape_ndim else: if shape_ndim != ndim: raise ValueError( 'ndim should be equal to len(shape), but\n', 'ndim = %s, len(shape) = %s, shape = %s' % (ndim, shape_ndim, shape)) elif isinstance(shape, (tuple, list)): # there is a convention that -1 means the corresponding shape of a # potentially-broadcasted symbolic arg # # This case combines together symbolic and non-symbolic shape # information if ndim is None: ndim = args_ndim else: ndim = max(args_ndim, ndim) ndim = max(args_ndim, len(shape)) shape = [-1] * (ndim - len(shape)) + list(shape) bcast = [] pre_v_shape = [] for i, s in enumerate(shape): if hasattr(s, 'type'): # s is symbolic bcast.append(False) # todo - introspect further pre_v_shape.append(s) else: if s >= 0: pre_v_shape.append(tensor.as_tensor_variable(s)) bcast.append((s == 1)) elif s == -1: n_a_i = 0 for a in args: # ndim: _ _ _ _ _ _ # ashp: s0 s1 s2 s3 # i if i >= ndim - a.ndim: n_a_i += 1 a_i = i + a.ndim - ndim if not a.broadcastable[a_i]: pre_v_shape.append(a.shape[a_i]) bcast.append(False) break else: if n_a_i == 0: raise ValueError( ('Auto-shape of -1 must overlap' 'with the shape of one of the broadcastable' 'inputs')) else: pre_v_shape.append(tensor.as_tensor_variable(1)) bcast.append(True) else: ValueError('negative shape', s) # post-condition: shape may still contain both symbolic and # non-symbolic things v_shape = tensor.stack(*pre_v_shape) elif shape is None: # The number of drawn samples will be determined automatically, # but we need to know ndim if not args: raise TypeError(('_infer_ndim_bcast cannot infer shape without' ' either shape or args')) template = reduce(lambda a, b: a + b, args) v_shape = template.shape bcast = template.broadcastable ndim = template.ndim else: v_shape = tensor.as_tensor_variable(shape) if ndim is None: ndim = tensor.get_vector_length(v_shape) bcast = [False] * ndim if (not (v_shape.dtype.startswith('int') or v_shape.dtype.startswith('uint'))): raise TypeError('shape must be an integer vector or list', v_shape.dtype) if args_ndim > ndim: raise ValueError( 'ndim should be at least as big as required by args value', (ndim, args_ndim), args) assert ndim == len(bcast) return ndim, tensor.cast(v_shape, 'int32'), tuple(bcast)