def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) if multi_target: controller_inputs.append((goal - goal_pos) / goal_range) else: controller_inputs.append(goal) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 6 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 6 * num_groups, 1) # Batch, 6 * num_groups, 1 intermediate = tf.matmul( W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]), controller_inputs) # Batch, #actuations, 1 assert intermediate.shape == (batch_size, len(actuations), 1) assert intermediate.shape[2] == 1 intermediate = intermediate[:, :, 0] # Batch, #actuations actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength debug = { 'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation } total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress total_actuation = total_actuation + act return total_actuation, debug res = (40, 40) bc = get_bounding_box_bc(res) if config == 'B': bc[0][:, :, :7] = -1 # Sticky bc[1][:, :, :7] = 0 # Sticky sim = Simulation(dt=0.005, num_particles=num_particles, grid_res=res, gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * 6 final_position = final_state[:, s:s + 2] final_velocity = final_state[:, s + 2:s + 4] loss1 = tf.reduce_mean(tf.reduce_sum((final_position - goal)**2, axis=1)) loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity**2, axis=1)) loss = loss1 + gamma * loss2 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state(position=np.array(initial_positions), youngs_modulus=10) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) sym = sim.gradients_sym(loss, variables=trainables) sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) if multi_target: fout = open('multi_target_{}.log'.format(lr), 'w') else: fout = open('single_target_{}.log'.format(lr), 'w') # Optimization loop for it in range(100000): t = time.time() goal_input = ((np.random.random([batch_size, 2]) - 0.5) * goal_range + goal_pos) print('train...') memo = sim.run(initial_state=initial_state, num_steps=150, iteration_feed_dict={goal: goal_input}, loss=loss) grad = sim.eval_gradients(sym=sym, memo=memo) gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad) ] sess.run(gradient_descent) print('Iter {:5d} time {:.3f} loss {}'.format(it, time.time() - t, memo.loss)) loss_cal = memo.loss if False: #i % 5 == 0: sim.visualize(memo, batch=0, interval=5) # sim.visualize(memo, batch = 1) print('L2:', loss_cal**0.5) print(it, 'L2 distance: ', loss_cal**0.5, file=fout) '''
def generate_sim(): #utility function for ppo t = time.time() # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) controller_inputs.append(goal) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 6 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 6 * num_groups, 1) # Batch, 6 * num_groups, 1 #IPython.embed() debug = { 'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation } total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress total_actuation = total_actuation + act return total_actuation, debug res = (80, 40) bc = get_bounding_box_bc(res) dt = 0.005 sim = Simulation(dt=dt, num_particles=num_particles, grid_res=res, dx=1.0 / res[1], gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess, scale=20) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * 6 final_position = final_state[:, s:s + 2] final_velocity = final_state[:, s + 2:s + 4] loss1 = tf.reduce_mean(tf.reduce_sum((final_position - goal)**2, axis=1)) loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity**2, axis=1)) loss_x = tf.reduce_mean(tf.reduce_sum(final_position[0, 0])) loss_y = tf.reduce_mean(tf.reduce_sum(final_position[0, 1])) loss_obs = final_state loss_fwd = tf.reduce_mean( tf.reduce_sum(final_state[:, s + 2:s + 3], axis=1)) * dt loss = loss_fwd #really, the reward forward initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state(position=np.array(initial_positions), youngs_modulus=10) #trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) return initial_state, sim, loss, loss_obs
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) accel = tf.reduce_sum(mask * state.acceleration, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) controller_inputs.append(goal) controller_inputs.append(accel) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 8 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 8 * num_groups, 1) actuation = tf.expand_dims( actuation_seq[0, (state.step_count - 1) // (num_steps // num_acts), :], 0) debug = { 'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation, 'acceleration': state.acceleration, 'velocity': state.velocity } total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress total_actuation = total_actuation + act return total_actuation, debug res = (30, 30) bc = get_bounding_box_bc(res) bc[0][:, :, :5] = -1 # Sticky bc[1][:, :, :5] = 0 # Sticky sim = Simulation(dt=0.0025, num_particles=num_particles, grid_res=res, gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] final_acceleration = sim.initial_state['debug']['acceleration'] final_velocity_all = sim.initial_state['debug']['velocity'] s = head * 8 final_position = final_state[:, s:s + 2] final_velocity = final_state[:, s + 2:s + 4] final_accel = final_state[:, s + 6:s + 8] gamma = 0.0 loss_position = tf.reduce_sum((final_position - goal)**2) loss_velocity = tf.reduce_mean(final_velocity_all**2) / 10.0 loss_act = tf.reduce_sum(actuation_seq**2.0) / 10000.0 loss_zero = tf.Variable(0.0, trainable=False) #loss_accel = tf.reduce_mean(final_acceleration ** 2.0) / 10000.0 loss_accel = loss_zero #IPython.embed() #acceleration_constraint = tf.reduce_sum(final_acceleration, axis=1) initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) youngs_modulus = tf.Variable( 10.0 * tf.ones(shape=[1, 1, num_particles], dtype=tf.float32), trainable=True) initial_state = sim.get_initial_state( position=np.array(initial_positions), youngs_modulus=tf.identity(youngs_modulus)) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sess.run(tf.global_variables_initializer()) sim.set_initial_state(initial_state=initial_state) sym_pos = sim.gradients_sym(loss_position, variables=trainables) sym_vel = sim.gradients_sym(loss_velocity, variables=trainables) sym_act = sim.gradients_sym(loss_act, variables=trainables) sym_zero = sim.gradients_sym(loss_zero, variables=trainables) sym_accel = sim.gradients_sym(loss_accel, variables=trainables) #sym_acc = [sim.gradients_sym(acceleration, variables=trainables) for acceleration in acceleration_constraint] #sym_acc = tf.map_fn(lambda x : sim.gradients_sym(x, variables=trainables), acceleration_constraint) #acc_flat = flatten_vectors([final_acceleration]) #sym_acc = tf.map_fn((lambda x : sim.gradients_sym(x, variables=trainables)), acc_flat) #IPython.embed() sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) goal_input = np.array([[ 0.7 + (random.random() - 0.5) * goal_range * 2, 0.5 + (random.random() - 0.5) * goal_range ] for _ in range(batch_size)], dtype=np.float32) def eval_sim(loss_tensor, sym_, need_grad=True): memo = sim.run(initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss_tensor) if need_grad: grad = sim.eval_gradients(sym=sym_, memo=memo) else: grad = None return memo.loss, grad, memo def flatten_trainables(): return tf.concat( [tf.squeeze(ly.flatten(trainable)) for trainable in trainables], 0) def assignment_run(xs): sess.run([trainable.assign(x) for x, trainable in zip(xs, trainables)]) t = time.time() #loss_val, grad, memo = eval_sim(loss_position, sym_pos) #IPython.embed() #Begin optimization def assignment_helper(x): assignments = [] idx = 0 x = x.astype(np.float32) for v in trainables: #first, get count: var_cnt = tf.size(v).eval() assignments += [ v.assign(tf.reshape(x[idx:idx + var_cnt], v.shape)) ] idx += var_cnt sess.run(assignments) class RobotProblem: def __init__(self, use_act): self.use_act = use_act goal_ball = 0.0001 def fitness(self, x): assignment_helper(x) if self.use_act: loss_act_val, _, _ = eval_sim(loss_act, sym_act, need_grad=False) else: loss_act_val, _, _ = eval_sim(loss_zero, sym_zero, need_grad=False) loss_pos_val, _, _ = eval_sim(loss_position, sym_pos, need_grad=False) loss_accel_val, _, _ = eval_sim(loss_accel, sym_accel, need_grad=False) c1, _, memo = eval_sim(loss_velocity, sym_vel, need_grad=False) global iter_ sim.visualize(memo, show=False, folder="arm_log/it{:04d}".format(iter_)) iter_ += 1 print('loss pos', loss_pos_val) print('loss vel', c1) print('loss accel', loss_accel_val) #IPython.embed() return [ loss_act_val.astype(np.float64), loss_pos_val.astype(np.float64) - self.goal_ball, c1.astype(np.float64) - self.goal_ball, loss_accel_val.astype(np.float64) - self.goal_ball ] def get_nic(self): return 3 def get_nec(self): return 0 def gradient(self, x): assignment_helper(x) _, grad_position, _ = eval_sim(loss_position, sym_pos) _, grad_velocity, _ = eval_sim(loss_velocity, sym_vel) _, grad_accel, _ = eval_sim(loss_accel, sym_accel) if self.use_act: _, grad_act, _ = eval_sim(loss_act, sym_act) else: _, grad_act, _ = eval_sim(loss_zero, sym_zero) return np.concatenate([ flatten_vectors(grad_act).eval().astype(np.float64), flatten_vectors(grad_position).eval().astype(np.float64), flatten_vectors(grad_velocity).eval().astype(np.float64), flatten_vectors(grad_accel).eval().astype(np.float64) ]) #return flatten_vectors(grad).eval().astype(np.float64) def get_bounds(self): #actuation lb = [] ub = [] acts = trainables[0] lb += [-1.0 / num_links] * tf.size(acts).eval() ub += [1.0 / num_links] * tf.size(acts).eval() designs = trainables[1] lb += [3] * tf.size(designs).eval() ub += [40] * tf.size(designs).eval() return (lb, ub) #IPython.embed() uda = pg.nlopt("slsqp") #uda = ppnf.snopt7(screen_output = False, library = "/home/aespielberg/snopt/lib/libsnopt7.so") algo = pg.algorithm(uda) #algo.extract(pg.nlopt).local_optimizer = pg.nlopt('lbfgs') algo.extract(pg.nlopt).maxeval = 50 algo.set_verbosity(1) udp = RobotProblem(False) bounds = udp.get_bounds() mean = (np.array(bounds[0]) + np.array(bounds[1])) / 2.0 num_vars = len(mean) prob = pg.problem(udp) pop = pg.population(prob, size=1) #TODO: initialize both parts different here acts = trainables[0] designs = trainables[1] std_act = np.ones(tf.size(acts).eval()) * 0.1 std_young = np.ones(tf.size(designs).eval()) * 0.0 #IPython.embed() std = np.concatenate([std_act, std_young]) #act_part = np.random.normal(scale=0.1, loc=mean, size=(tf.size(acts).eval(),)) #young_part = 10.0 * tf.size(designs).eval() pop.set_x(0, np.random.normal(scale=std, loc=mean, size=(num_vars, ))) #IPython.embed() pop.problem.c_tol = [1e-6] * prob.get_nc() #pop.problem.c_tol = [1e-4] * prob.get_nc() pop.problem.f_tol_rel = [100000.0] #IPython.embed() pop = algo.evolve(pop) IPython.embed() #IPython.embed() #We need to refactor this for real old_x = pop.champion_x assert False udp = RobotProblem(True) prob = pg.problem(udp) pop = pg.population(prob, size=1) pop.set_x(0, old_x) pop.problem.c_tol = [1e-6] * prob.get_nc() #pop.problem.f_tol = [1e-6] pop.problem.f_tol_rel = [1e-4] pop = algo.evolve(pop) #now a second time _, _, memo = eval_sim(loss) sim.visualize(memo)
def main(sess): batch_size = 1 gravity = (0, -1) N = 10 num_particles = N * N steps = 150 dt = 1e-2 goal_range = 0.15 res = (30, 30) bc = get_bounding_box_bc(res) lr = 1e-2 goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') sim = Simulation(dt=dt, num_particles=num_particles, grid_res=res, bc=bc, gravity=gravity, m_p=1, V_p=1, E=10, nu=0.3, sess=sess) position = np.zeros(shape=(batch_size, num_particles, 2)) velocity_ph = tf.Variable([0.2, 0.3], trainable=True) velocity = velocity_ph[None, :, None] + tf.zeros( shape=[batch_size, 2, num_particles], dtype=tf.float32) for b in range(batch_size): for i in range(N): for j in range(N): position[b, i * N + j] = ((i * 0.5 + 3) / 30, (j * 0.5 + 12.75) / 30) position = np.array(position).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state(position=position, velocity=velocity) final_position = sim.initial_state.center_of_mass() loss = tf.reduce_sum((final_position - goal)**2) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) sym = sim.gradients_sym(loss, variables=trainables) goal_input = np.array([[0.7, 0.3]], dtype=np.float32) for i in range(100): # if i > 10: # lr = 1e-1 # elif i > 20: # lr = 1e-2 t = time.time() memo = sim.run(initial_state=initial_state, num_steps=steps, iteration_feed_dict={goal: goal_input}, loss=loss) #if i % 1 == 0: # sim.visualize(memo) grad = sim.eval_gradients(sym, memo) gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad) ] sess.run(gradient_descent) print('iter {:5d} time {:.3f} loss {:.4f}'.format( i, time.time() - t, memo.loss))
import time from simulation import Simulation, get_bounding_box_bc import tensorflow as tf import numpy as np from IPython import embed batch_size = 1 gravity = (0, 0) N = 10 group_particles = N * N * 2 num_particles = group_particles * 2 steps = 100 dt = 5e-3 goal_range = 0.15 res = (100, 100) bc = get_bounding_box_bc(res) lr = 5e-1 def main(sess): goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') sim = Simulation(dt=dt, num_particles=num_particles, grid_res=res, bc=bc, gravity=gravity, E=1, m_p=1,
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 3], name='goal') # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) controller_inputs.append((goal - goal_pos) / np.maximum(goal_range, 1e-5)) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 9 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 9 * num_groups, 1) # Batch, 6 * num_groups, 1 intermediate = tf.matmul(W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]), controller_inputs) # Batch, #actuations, 1 assert intermediate.shape == (batch_size, len(actuations), 1) assert intermediate.shape[2] == 1 intermediate = intermediate[:, :, 0] # Batch, #actuations actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength debug = {'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation} total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i+1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask act = make_matrix3d(zeros, zeros, zeros, zeros, act, zeros, zeros, zeros, zeros) total_actuation = total_actuation + act return total_actuation, debug res = (60, 30, 30) bc = get_bounding_box_bc(res) sim = Simulation( dt=0.007, num_particles=num_particles, grid_res=res, dx=1.0 / res[1], gravity=gravity, controller=None, #controller, batch_size=batch_size, bc=bc, sess=sess, E=15, part_size = 10) print("Building time: {:.4f}s".format(time.time() - t)) tt = time.time() memo = sim.run( initial_state=initial_state, num_steps=400, iteration_feed_dict={goal: goal_input}, loss=loss) print('forward', time.time() - tt) tt = time.time() final_state = sim.initial_state['debug']['controller_inputs'] s = head * 9 final_position = final_state[:, s:s+3] final_velocity = final_state[:, s + 3: s + 6] loss1 = tf.reduce_mean(tf.reduce_sum((final_position - goal) ** 2, axis = 1)) loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity ** 2, axis = 1)) loss = loss1 + gamma * loss2 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): for z in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0] ) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1] ) * scale + 0.1 w = ((z + 0.5) / sample_density * group_sizes[i][2] + offset[2] ) * scale + 0.1 initial_positions[b].append([u, v, w]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state( position=np.array(initial_positions), youngs_modulus=10) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) tt = time.time() sym = sim.gradients_sym(loss, variables=trainables) print('sym', time.time() - tt) gx, gy, gz = goal_range pos_x, pos_y, pos_z = goal_pos goal_train = [np.array( [[pos_x + (random.random() - 0.5) * gx, pos_y + (random.random() - 0.5) * gy, pos_z + (random.random() - 0.5) * gz ] for _ in range(batch_size)], dtype=np.float32) for __ in range(1)] vis_id = list(range(batch_size)) random.shuffle(vis_id) grad_ph = [ tf.placeholder(shape = v.shape, dtype = tf.float32) for v in trainables ] gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad_ph) ] # Optimization loop for e in range(200): t = time.time() print('Epoch {:5d}, learning rate {}'.format(e, lr)) loss_cal = 0. print('train...') for it, goal_input in enumerate(goal_train): tt = time.time() memo = sim.run( initial_state=initial_state, num_steps=400, iteration_feed_dict={goal: goal_input}, loss=loss) print('forward', time.time() - tt) tt = time.time() grad = sim.eval_gradients(sym=sym, memo=memo) print('backward', time.time() - tt) for i, g in enumerate(grad): print(i, np.mean(np.abs(g))) grad = [np.clip(g, -1, 1) for g in grad] grad_feed_dict = {} for gp, g in zip(grad_ph, grad): grad_feed_dict[gp] = g sess.run(gradient_descent, feed_dict = grad_feed_dict) print('Iter {:5d} time {:.3f} loss {}'.format( it, time.time() - t, memo.loss)) loss_cal = loss_cal + memo.loss ''' if e % 1 == 0: sim.visualize(memo, batch=random.randrange(batch_size), export=None, show=True, interval=5, folder='walker3d_demo/{:04d}/'.format(e)) ''' #exp.export() print('train loss {}'.format(loss_cal / len(goal_train)))
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) controller_inputs.append(goal) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 6 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 6 * num_groups, 1) # Batch, 6 * num_groups, 1 if nn_control: intermediate = tf.matmul( W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]), controller_inputs) # Batch, #actuations, 1 assert intermediate.shape == (batch_size, len(actuations), 1) assert intermediate.shape[2] == 1 intermediate = intermediate[:, :, 0] # Batch, #actuations actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength else: #IPython.embed() actuation = tf.expand_dims( actuation_seq[0, state.step_count // (num_steps // num_acts), :], 0) debug = { 'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation } total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress total_actuation = total_actuation + act return total_actuation, debug res = (30, 30) bc = get_bounding_box_bc(res) if config == 'B': bc[0][:, :, :5] = -1 # Sticky bc[1][:, :, :5] = 0 # Sticky sim = Simulation(dt=0.0025, num_particles=num_particles, grid_res=res, gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * 6 final_position = final_state[:, s:s + 2] final_velocity = final_state[:, s + 2:s + 4] gamma = 0.0 loss1 = tf.reduce_sum((final_position - goal)**2) loss2 = tf.reduce_sum(final_velocity**2) loss_velocity = loss2 loss_act = tf.reduce_sum(actuation_seq**2.0) loss_zero = tf.reduce_sum(actuation_seq * 0.0) loss = loss1 + gamma * loss2 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) youngs_modulus = tf.Variable( 10.0 * tf.ones(shape=[1, 1, num_particles], dtype=tf.float32), trainable=True) initial_state = sim.get_initial_state( position=np.array(initial_positions), youngs_modulus=tf.identity(youngs_modulus)) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if use_bfgs: B = [ tf.Variable(tf.eye(tf.size(trainable)), trainable=False) for trainable in trainables ] sess.run(tf.global_variables_initializer()) sim.set_initial_state(initial_state=initial_state) sym = sim.gradients_sym(loss, variables=trainables) sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) if config == 'A': goal_input = np.array([[ 0.5 + (random.random() - 0.5) * goal_range * 2, 0.6 + (random.random() - 0.5) * goal_range ] for _ in range(batch_size)], dtype=np.float32) elif config == 'B': goal_input = np.array([[ 0.65 + (random.random() - 0.5) * goal_range * 2, 0.55 + (random.random() - 0.5) * goal_range ] for _ in range(batch_size)], dtype=np.float32) # Optimization loop #IPython.embed() #In progress code ''' memo = sim.run( initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss) IPython.embed() def loss_callback(): memo = sim.run( initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss) return loss ''' c1 = 1e-4 c2 = 0.9 def eval_sim(loss_tensor): memo = sim.run(initial_state=initial_state, num_steps=num_steps, iteration_feed_dict={goal: goal_input}, loss=loss_tensor) grad = sim.eval_gradients(sym=sym, memo=memo) return memo.loss, grad, memo def flatten_trainables(): return tf.concat( [tf.squeeze(ly.flatten(trainable)) for trainable in trainables], 0) def flatten_vectors(vectors): return tf.concat( [tf.squeeze(ly.flatten(vector)) for vector in vectors], 0) def assignment_run(xs): sess.run([trainable.assign(x) for x, trainable in zip(xs, trainables)]) def f_and_grad_step(step_size, x, delta_x): old_x = [x_i.eval() for x_i in x] assignment_run([ x_i + step_size * delta_x_i for x_i, delta_x_i in zip(x, delta_x) ]) #take step loss, grad, _ = eval_sim(loss) assignment_run(old_x) #revert return loss, grad def wolfe_1(delta_x, new_f, current_f, current_grad, step_size): valid = new_f <= current_f + c1 * step_size * tf.tensordot( flatten_vectors(current_grad), flatten_vectors(delta_x), 1) return valid.eval() def wolfe_2(delta_x, new_grad, current_grad, step_size): valid = np.abs( tf.tensordot(flatten_vectors(new_grad), flatten_vectors(delta_x), 1).eval()) <= -c2 * tf.tensordot( flatten_vectors(current_grad), flatten_vectors(delta_x), 1).eval() return valid def zoom(a_min, a_max, search_dirs, current_f, current_grad): while True: a_mid = (a_min + a_max) / 2.0 print('a_min: ', a_min, 'a_max: ', a_max, 'a_mid: ', a_mid) step_loss_min, step_grad_min = f_and_grad_step( a_min, trainables, search_dirs) step_loss, step_grad = f_and_grad_step(a_mid, trainables, search_dirs) valid_1 = wolfe_1(search_dirs, step_loss, current_f, current_grad, a_mid) valid_2 = wolfe_2(search_dirs, step_grad, current_grad, a_mid) if not valid_1 or step_loss >= step_loss_min: a_max = a_mid else: if valid_2: return a_mid if tf.tensordot(flatten_vectors(step_grad), flatten_vectors(search_dirs), 1) * (a_max - a_min) >= 0: a_max = a_min a_min = a_mid loss_val, grad, memo = eval_sim( loss ) #TODO: this is to get dimensions, find a better way to do this without simming old_g_flat = [None] * len(grad) old_v_flat = [None] * len(grad) t = time.time() loss_val, grad, memo = eval_sim(loss) #BFGS update: #IPython.embed() if use_pygmo: def assignment_helper(x): assignments = [] idx = 0 x = x.astype(np.float32) for v in trainables: #first, get count: var_cnt = tf.size(v).eval() assignments += [ v.assign(tf.reshape(x[idx:idx + var_cnt], v.shape)) ] idx += var_cnt sess.run(assignments) class RobotProblem: def __init__(self, use_act): self.use_act = use_act goal_ball = 0.002 def fitness(self, x): assignment_helper(x) if self.use_act: loss_act_val, _, _ = eval_sim(loss_act) else: loss_act_val, _, _ = eval_sim(loss_zero) loss_val, _, _ = eval_sim(loss) c1, _, memo = eval_sim(loss_velocity) sim.visualize(memo) return [ loss_act_val.astype(np.float64), loss_val.astype(np.float64) - self.goal_ball, c1.astype(np.float64) - self.goal_ball ] def get_nic(self): return 2 def get_nec(self): return 0 def gradient(self, x): assignment_helper(x) _, grad, _ = eval_sim(loss) _, grad_velocity, _ = eval_sim(loss_velocity) _, grad_act, _ = eval_sim(loss_act) return np.concatenate([ flatten_vectors(grad_act).eval().astype(np.float64), flatten_vectors(grad).eval().astype(np.float64), flatten_vectors(grad_velocity).eval().astype(np.float64) ]) #return flatten_vectors(grad).eval().astype(np.float64) def get_bounds(self): #actuation lb = [] ub = [] acts = trainables[0] lb += [-5] * tf.size(acts).eval() ub += [5] * tf.size(acts).eval() designs = trainables[1] lb += [5] * tf.size(designs).eval() ub += [20] * tf.size(designs).eval() return (lb, ub) #IPython.embed() uda = pg.nlopt("slsqp") #uda = ppnf.snopt7(screen_output = False, library = "/home/aespielberg/snopt/lib/libsnopt7.so") algo = pg.algorithm(uda) #algo.extract(pg.nlopt).local_optimizer = pg.nlopt('lbfgs') algo.extract(pg.nlopt).maxeval = 20 algo.set_verbosity(1) udp = RobotProblem(False) bounds = udp.get_bounds() mean = (np.array(bounds[0]) + np.array(bounds[1])) / 2.0 num_vars = len(mean) prob = pg.problem(udp) pop = pg.population(prob, size=1) pop.set_x(0, np.random.normal(scale=0.3, loc=mean, size=(num_vars, ))) pop.problem.c_tol = [1e-4] * prob.get_nc() #pop.problem.c_tol = [1e-4] * prob.get_nc() pop.problem.f_tol_rel = [100000.0] #IPython.embed() pop = algo.evolve(pop) IPython.embed() #IPython.embed() #We need to refactor this for real old_x = pop.champion_x udp = RobotProblem(True) prob = pg.problem(udp) pop = pg.population(prob, size=1) pop.set_x(0, old_x) pop.problem.c_tol = [1e-4] * prob.get_nc() #pop.problem.f_tol = [1e-6] pop.problem.f_tol_rel = [1e-4] pop = algo.evolve(pop) #now a second time _, _, memo = eval_sim(loss) sim.visualize(memo) return for i in range(1000000): if use_bfgs: bfgs = [None] * len(grad) B_update = [None] * len(grad) search_dirs = [None] * len(grad) #TODO: for now, assuming there is only one trainable and one grad for ease for v, g, idx in zip(trainables, grad, range(len(grad))): g_flat = ly.flatten(g) v_flat = ly.flatten(v) if B[idx] == None: B[idx] = tf.eye(tf.size(v_flat)) if i > 0: y_flat = tf.squeeze(g_flat - old_g_flat[idx]) s_flat = tf.squeeze(v_flat - old_v_flat[idx]) B_s_flat = tf.tensordot(B[idx], s_flat, 1) term_1 = -tf.tensordot(B_s_flat, tf.transpose(B_s_flat), 0) / tf.tensordot( s_flat, B_s_flat, 1) term_2 = tf.tensordot(y_flat, y_flat, 0) / tf.tensordot( y_flat, s_flat, 1) B_update[idx] = B[idx].assign(B[idx] + term_1 + term_2) sess.run([B_update[idx]]) if tf.abs(tf.matrix_determinant(B[idx])).eval() < 1e-6: sess.run([B[idx].assign(tf.eye(tf.size(v_flat)))]) search_dir = -tf.transpose(g_flat) else: #search_dir = -tf.matrix_solve_ls(B[idx],tf.transpose(g_flat), l2_regularizer=0.0, fast=True) #adding regularizer for stability search_dir = -tf.matmul( tf.linalg.inv(B[idx]), tf.transpose(g_flat)) #TODO: inverse bad,speed htis up search_dir_reshape = tf.reshape(search_dir, g.shape) search_dirs[idx] = search_dir_reshape old_g_flat[idx] = g_flat old_v_flat[idx] = v_flat.eval() #TODO: B upate #Now it's linesearch time if wolfe_search: a_max = 0.1 a_1 = a_max / 2.0 a_0 = 0.0 iterate = 1 while True: step_loss, step_grad = f_and_grad_step( a_1, trainables, search_dirs) print(a_1) valid_1 = wolfe_1(search_dirs, step_loss, loss_val, grad, a_1) valid_2 = wolfe_2(search_dirs, step_grad, grad, a_1) print('wolfe 1: ', valid_1, 'wolfe 2: ', valid_2) if (not valid_1) or (iterate > 1 and step_loss > loss_val): print('cond1') a = zoom(a_0, a_1, search_dirs, loss_val, grad) if valid_2: print('cond2') a = a_1 break if tf.tensordot(flatten_vectors(step_grad), flatten_vectors(search_dirs), 1).eval() >= 0: print('cond3') a = zoom(a_1, a_0, search_dirs, current_f, current_grad) break print('no cond') temp = a_1 a_1 = (a_1 + a_max) / 2.0 a_0 = temp iterate += 1 if iterate > 5: #close enough a = a_1 break else: a = lr for v, idx in zip(trainables, range(len(grad))): print('final a ', a) bfgs[idx] = v.assign(v + search_dirs[idx] * a) sess.run(bfgs) print('stepped!!') else: gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad) ] sess.run(gradient_descent) print('iter {:5d} time {:.3f} loss {:.4f}'.format( i, time.time() - t, memo.loss)) if i % 1 == 0: sim.visualize(memo) #in progress code '''
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): controller_inputs = [] for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) controller_inputs.append((goal - goal_pos) / np.maximum(goal_range, 1e-5)) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, 6 * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, 6 * num_groups, 1) # Batch, 6 * num_groups, 1 intermediate = tf.matmul(W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]), controller_inputs) # Batch, #actuations, 1 assert intermediate.shape == (batch_size, len(actuations), 1) assert intermediate.shape[2] == 1 intermediate = intermediate[:, :, 0] # Batch, #actuations actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength debug = {'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation} total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i+1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress F = state['deformation_gradient'] total_actuation = total_actuation + act return total_actuation, debug res = (80, 40) bc = get_bounding_box_bc(res) sim = Simulation( dt=0.005, num_particles=num_particles, grid_res=res, dx=1.0 / res[1], gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess, scale=20, part_size = 1) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * 6 final_position = final_state[:, s:s+2] final_velocity = final_state[:, s + 2: s + 4] loss1 = tf.reduce_mean(tf.reduce_sum(-final_position[:, 0])) loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity ** 2, axis = 1)) saver = tf.train.Saver() loss = loss1 + gamma * loss2 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0] ) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1] ) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state( position=np.array(initial_positions), youngs_modulus=10) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) tt = time.time() sym = sim.gradients_sym(loss, variables=trainables) print('sym', time.time() - tt) #sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) #sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) #sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) gx, gy = goal_range pos_x, pos_y = goal_pos goal_train = [np.array( [[pos_x + (random.random() - 0.5) * gx, pos_y + (random.random() - 0.5) * gy] for _ in range(batch_size)], dtype=np.float32) for __ in range(1)] vis_id = list(range(batch_size)) random.shuffle(vis_id) grad_ph = [ tf.placeholder(shape = v.shape, dtype = tf.float32) for v in trainables ] # Optimization loop tt0 = time.time() for e in range(50): tt = time.time() for it, goal_input in enumerate(goal_train): memo = sim.run( initial_state=initial_state, num_steps=200, iteration_feed_dict={goal: goal_input}, loss=loss) grad = sim.eval_gradients(sym=sym, memo=memo) print(f'Time in epoch {e} is {np.round(time.time() - tt,3)}. Total: {np.round(time.time() - tt0,3)}')
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): actuation = 2 * np.ones(shape=(batch_size, num_groups)) * tf.sin( 0.1 * tf.cast(state.get_evaluated()['step_count'], tf.float32)) total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] * 2 assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask # First PK stress here act = make_matrix2d(zeros, zeros, zeros, act) # Convert to Kirchhoff stress total_actuation = total_actuation + act return total_actuation, 1 res = (80, 40) bc = get_bounding_box_bc(res) sim = Simulation(dt=0.005, num_particles=num_particles, grid_res=res, dx=1.0 / res[1], gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess, scale=20) print("Building time: {:.4f}s".format(time.time() - t)) s = head * 6 initial_positions = [[] for _ in range(batch_size)] initial_velocity = np.zeros(shape=(batch_size, 2, num_particles)) for b in range(batch_size): c = 0 for i, offset in enumerate(group_offsets): c += 1 for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) initial_velocity[0, :, c] = (2 * (y - sample_density / 2), -2 * (x - sample_density / 2)) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state(position=np.array(initial_positions), youngs_modulus=10, velocity=initial_velocity) sim.set_initial_state(initial_state=initial_state) gx, gy = goal_range pos_x, pos_y = goal_pos goal_train = [ np.array([[ pos_x + (random.random() - 0.5) * gx, pos_y + (random.random() - 0.5) * gy ] for _ in range(batch_size)], dtype=np.float32) for __ in range(1) ] vis_id = list(range(batch_size)) random.shuffle(vis_id) # Optimization loop for i in range(100000): t = time.time() print('Epoch {:5d}, learning rate {}'.format(i, lr)) print('train...') for it, goal_input in enumerate(goal_train): tt = time.time() memo = sim.run( initial_state=initial_state, num_steps=400, iteration_feed_dict={goal: goal_input}, ) print('forward', time.time() - tt) tt = time.time() print('backward', time.time() - tt) sim.visualize(memo, batch=random.randrange(batch_size), export=exp, show=True, interval=4)
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') # Define your controller here def controller(state): controller_inputs = [] goal_feature = (goal - goal_pos) / np.maximum(goal_range, 1e-5) for i in range(num_groups): mask = particle_mask(i * group_num_particles, (i + 1) * group_num_particles)[:, None, :] * ( 1.0 / group_num_particles) pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False) vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False) controller_inputs.append(pos) controller_inputs.append(vel) #controller_inputs.append(goal_feature) # Batch, dim controller_inputs = tf.concat(controller_inputs, axis=1) assert controller_inputs.shape == (batch_size, feature_dim_per_group * num_groups), controller_inputs.shape controller_inputs = controller_inputs[:, :, None] assert controller_inputs.shape == (batch_size, feature_dim_per_group * num_groups, 1) # Batch, 6 * num_groups, 1 intermediate = tf.matmul( W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]), controller_inputs) # Batch, #actuations, 1 assert intermediate.shape == (batch_size, len(actuations), 1) assert intermediate.shape[2] == 1 intermediate = intermediate[:, :, 0] # Batch, #actuations actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength debug = { 'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation } total_actuation = 0 zeros = tf.zeros(shape=(batch_size, num_particles)) for i, group in enumerate(actuations): act = actuation[:, i:i + 1] assert len(act.shape) == 2 mask = particle_mask_from_group(group) act = act * mask total_actuation = total_actuation + act total_actuation = make_matrix2d(zeros, zeros, zeros, total_actuation) return total_actuation, debug res = (80, 40) bc = get_bounding_box_bc(res) sim = Simulation(dt=0.005, num_particles=num_particles, grid_res=res, dx=1.0 / res[1], gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess, scale=20, part_size=10) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * feature_dim_per_group final_position = final_state[:, s:s + 2] final_velocity = final_state[:, s + 2:s + 4] loss1 = tf.reduce_mean(tf.reduce_sum(-final_position[:, 0])) loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity**2, axis=1)) saver = tf.train.Saver() loss = loss1 + gamma * loss2 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]) * scale + 0.1 initial_positions[b].append([u, v]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state(position=np.array(initial_positions), youngs_modulus=10) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) tt = time.time() sym = sim.gradients_sym(loss, variables=trainables) print('sym', time.time() - tt) #sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) gx, gy = goal_range pos_x, pos_y = goal_pos goal_train = [ np.array([[ pos_x + (random.random() - 0.5) * gx, pos_y + (random.random() - 0.5) * gy ] for _ in range(batch_size)], dtype=np.float32) for __ in range(1) ] vis_id = list(range(batch_size)) random.shuffle(vis_id) grad_ph = [ tf.placeholder(shape=v.shape, dtype=tf.float32) for v in trainables ] gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad_ph) ] # Optimization loop for e in range(200): t = time.time() print('Epoch {:5d}, learning rate {}'.format(e, lr)) loss_cal = 0. print('train...') for it, goal_input in enumerate(goal_train): tt = time.time() memo = sim.run(initial_state=initial_state, num_steps=800, iteration_feed_dict={goal: goal_input}, loss=loss) print('# *** forward', time.time() - tt) tt = time.time() grad = sim.eval_gradients(sym=sym, memo=memo) print('# *** eval_gradients', time.time() - tt) tt = time.time() grad_feed_dict = {} for gp, g in zip(grad_ph, grad): grad_feed_dict[gp] = g sess.run(gradient_descent, feed_dict=grad_feed_dict) print('gradient_descent', time.time() - tt) print('Iter {:5d} time {:.3f} loss {}'.format( it, time.time() - t, memo.loss)) loss_cal = loss_cal + memo.loss save_path = saver.save(sess, "./models/walker_2d.ckpt") print("Model saved in path: %s" % save_path) sim.visualize(memo, batch=random.randrange(batch_size), export=None, show=True, interval=4) print('train loss {}'.format(loss_cal / len(goal_train)))
def main(sess): t = time.time() goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 3], name='goal') # Define your controller here def controller(state): actuations =tf.zeros(shape=(1, 3, 3, num_particles)) debug = {'controller_inputs': tf.zeros(shape=(1, 10, 10)), 'actuation': actuations} return actuations, debug res = (60 + 100 * int(evaluate), 30, 30) bc = get_bounding_box_bc(res) sim = Simulation( dt=0.005, num_particles=num_particles, grid_res=res, dx=1.0 / res[1], gravity=gravity, controller=controller, batch_size=batch_size, bc=bc, sess=sess, E=25, damping=0.001 * evaluate, part_size=10) print("Building time: {:.4f}s".format(time.time() - t)) final_state = sim.initial_state['debug']['controller_inputs'] s = head * 9 loss1 = tf.reduce_mean(tf.reduce_sum((goal) ** 2, axis = 1)) loss = loss1 initial_positions = [[] for _ in range(batch_size)] for b in range(batch_size): for i, offset in enumerate(group_offsets): for x in range(sample_density): for y in range(sample_density): for z in range(sample_density): scale = 0.2 u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0] ) * scale + 0.2 v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1] ) * scale + 0.1 w = ((z + 0.5) / sample_density * group_sizes[i][2] + offset[2] ) * scale + 0.1 initial_positions[b].append([u, v, w]) assert len(initial_positions[0]) == num_particles initial_positions = np.array(initial_positions).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state( position=np.array(initial_positions), youngs_modulus=10) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) sym = sim.gradients_sym(loss, variables=trainables) gx, gy, gz = goal_range pos_x, pos_y, pos_z = goal_pos goal_train = [np.array( [[pos_x + (random.random() - 0.5) * gx, pos_y + (random.random() - 0.5) * gy, pos_z + (random.random() - 0.5) * gz ] for _ in range(batch_size)], dtype=np.float32) for __ in range(1)] vis_id = list(range(batch_size)) random.shuffle(vis_id) # Optimization loop saver = tf.train.Saver() if evaluate: '''evaluate''' saver.restore(sess, "crawler3d_demo/0014/data.ckpt") tt = time.time() memo = sim.run( initial_state=initial_state, num_steps=1800, iteration_feed_dict={goal: goal_train[0]}, loss=loss) print('forward', time.time() - tt) fn = 'crawler3d_demo/eval' sim.visualize(memo, batch=random.randrange(batch_size), export=None, show=True, interval=5, folder=fn) return for e in range(100000): t = time.time() print('Epoch {:5d}, learning rate {}'.format(e, lr)) loss_cal = 0. print('train...') for it, goal_input in enumerate(goal_train): tt = time.time() memo = sim.run( initial_state=initial_state, num_steps=400, iteration_feed_dict={goal: goal_input}, loss=loss) print('forward', time.time() - tt) tt = time.time() grad = sim.eval_gradients(sym=sym, memo=memo) print('backward', time.time() - tt) for i, g in enumerate(grad): print(i, np.mean(np.abs(g))) grad = [np.clip(g, -1, 1) for g in grad] gradient_descent = [ v.assign(v - lr * g) for v, g in zip(trainables, grad) ] sess.run(gradient_descent) print('Iter {:5d} time {:.3f} loss {}'.format( it, time.time() - t, memo.loss)) loss_cal = loss_cal + memo.loss fn = 'crawler3d_demo/{:04d}/'.format(e) saver.save(sess, "{}/data.ckpt".format(fn)) sim.visualize(memo, batch=random.randrange(batch_size), export=None, show=True, interval=5, folder=fn) #exp.export() print('train loss {}'.format(loss_cal / len(goal_train)))
def main(sess): batch_size = 1 gravity = (0, -1) # gravity = (0, 0) N = 5 dR = 0.2 R = (N - 1) * dR dC = 1.6 num_particles = int(((N - 1) * dC + 1)**2) steps = 1000 dt = 5e-3 goal_range = 0.15 res = (45, 30) bc = get_bounding_box_bc(res) lr = 1e-2 goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal') def F_controller(state): F = state.position - state.center_of_mass()[:, :, None] F = tf.stack([F[:, 1], -F[:, 0]], axis=1) # T = tf.cast(state.step_count // 100 % 2, dtype = tf.float32) * 2 - 1 return F * 10 # * T sim = Simulation(dt=dt, num_particles=num_particles, grid_res=res, bc=bc, gravity=gravity, m_p=1, V_p=1, E=10, nu=0.3, sess=sess, use_visualize=True, F_controller=F_controller) position = np.zeros(shape=(batch_size, num_particles, 2)) # velocity_ph = tf.constant([0.2, 0.3]) velocity_ph = tf.constant([0, 0], dtype=tf.float32) velocity = velocity_ph[None, :, None] + tf.zeros( shape=[batch_size, 2, num_particles], dtype=tf.float32) random.seed(123) for b in range(batch_size): dx, dy = 5, 4 cnt = 0 las = 0 for i in range(N): l = int((dC * i + 1)**2) l, las = l - las, l print(l) dth = 2 * np.pi / l dr = R / (N - 1) * i theta = np.pi * 2 * np.random.random() for j in range(l): theta += dth x, y = np.cos(theta) * dr, np.sin(theta) * dr position[b, cnt] = ((dx + x) / 30, (dy + y) / 30) cnt += 1 position = np.array(position).swapaxes(1, 2) sess.run(tf.global_variables_initializer()) initial_state = sim.get_initial_state(position=position, velocity=velocity) final_position = sim.initial_state.center_of_mass() loss = tf.reduce_sum((final_position - goal)**2) sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3) sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3) trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) sim.set_initial_state(initial_state=initial_state) sym = sim.gradients_sym(loss, variables=trainables) goal_input = np.array([[0.7, 0.3]], dtype=np.float32) memo = sim.run(initial_state=initial_state, num_steps=steps, iteration_feed_dict={goal: goal_input}, loss=loss) if True: sim.visualize(memo, show=True, interval=2) else: sim.visualize(memo, show=False, interval=1, export=exp)