Ejemplo n.º 1
0
def main(sess):
    t = time.time()

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    # Define your controller here
    def controller(state):
        controller_inputs = []
        for i in range(num_groups):
            mask = particle_mask(i * group_num_particles,
                                 (i + 1) * group_num_particles)[:, None, :] * (
                                     1.0 / group_num_particles)
            pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
            vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
            controller_inputs.append(pos)
            controller_inputs.append(vel)
            if multi_target:
                controller_inputs.append((goal - goal_pos) / goal_range)
            else:
                controller_inputs.append(goal)
        # Batch, dim
        controller_inputs = tf.concat(controller_inputs, axis=1)
        assert controller_inputs.shape == (batch_size, 6 *
                                           num_groups), controller_inputs.shape
        controller_inputs = controller_inputs[:, :, None]
        assert controller_inputs.shape == (batch_size, 6 * num_groups, 1)
        # Batch, 6 * num_groups, 1
        intermediate = tf.matmul(
            W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]),
            controller_inputs)
        # Batch, #actuations, 1
        assert intermediate.shape == (batch_size, len(actuations), 1)
        assert intermediate.shape[2] == 1
        intermediate = intermediate[:, :, 0]
        # Batch, #actuations
        actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength
        debug = {
            'controller_inputs': controller_inputs[:, :, 0],
            'actuation': actuation
        }
        total_actuation = 0
        zeros = tf.zeros(shape=(batch_size, num_particles))
        for i, group in enumerate(actuations):
            act = actuation[:, i:i + 1]
            assert len(act.shape) == 2
            mask = particle_mask_from_group(group)
            act = act * mask
            # First PK stress here
            act = make_matrix2d(zeros, zeros, zeros, act)
            # Convert to Kirchhoff stress
            total_actuation = total_actuation + act
        return total_actuation, debug

    res = (40, 40)
    bc = get_bounding_box_bc(res)

    if config == 'B':
        bc[0][:, :, :7] = -1  # Sticky
        bc[1][:, :, :7] = 0  # Sticky

    sim = Simulation(dt=0.005,
                     num_particles=num_particles,
                     grid_res=res,
                     gravity=gravity,
                     controller=controller,
                     batch_size=batch_size,
                     bc=bc,
                     sess=sess)
    print("Building time: {:.4f}s".format(time.time() - t))

    final_state = sim.initial_state['debug']['controller_inputs']
    s = head * 6

    final_position = final_state[:, s:s + 2]
    final_velocity = final_state[:, s + 2:s + 4]
    loss1 = tf.reduce_mean(tf.reduce_sum((final_position - goal)**2, axis=1))
    loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity**2, axis=1))

    loss = loss1 + gamma * loss2

    initial_positions = [[] for _ in range(batch_size)]
    for b in range(batch_size):
        for i, offset in enumerate(group_offsets):
            for x in range(sample_density):
                for y in range(sample_density):
                    scale = 0.2
                    u = ((x + 0.5) / sample_density * group_sizes[i][0] +
                         offset[0]) * scale + 0.2
                    v = ((y + 0.5) / sample_density * group_sizes[i][1] +
                         offset[1]) * scale + 0.1
                    initial_positions[b].append([u, v])
    assert len(initial_positions[0]) == num_particles
    initial_positions = np.array(initial_positions).swapaxes(1, 2)

    sess.run(tf.global_variables_initializer())

    initial_state = sim.get_initial_state(position=np.array(initial_positions),
                                          youngs_modulus=10)

    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    sim.set_initial_state(initial_state=initial_state)

    sym = sim.gradients_sym(loss, variables=trainables)
    sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)
    sim.add_vector_visualization(pos=final_position,
                                 vector=final_velocity,
                                 color=(0, 0, 1),
                                 scale=50)

    sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)

    if multi_target:
        fout = open('multi_target_{}.log'.format(lr), 'w')
    else:
        fout = open('single_target_{}.log'.format(lr), 'w')

    # Optimization loop
    for it in range(100000):
        t = time.time()

        goal_input = ((np.random.random([batch_size, 2]) - 0.5) * goal_range +
                      goal_pos)

        print('train...')
        memo = sim.run(initial_state=initial_state,
                       num_steps=150,
                       iteration_feed_dict={goal: goal_input},
                       loss=loss)
        grad = sim.eval_gradients(sym=sym, memo=memo)
        gradient_descent = [
            v.assign(v - lr * g) for v, g in zip(trainables, grad)
        ]
        sess.run(gradient_descent)
        print('Iter {:5d} time {:.3f} loss {}'.format(it,
                                                      time.time() - t,
                                                      memo.loss))
        loss_cal = memo.loss
        if False:  #i % 5 == 0:
            sim.visualize(memo, batch=0, interval=5)
            # sim.visualize(memo, batch = 1)

        print('L2:', loss_cal**0.5)
        print(it, 'L2 distance: ', loss_cal**0.5, file=fout)
        '''
Ejemplo n.º 2
0
def generate_sim():
    #utility function for ppo
    t = time.time()

    # Define your controller here

    def controller(state):
        controller_inputs = []
        for i in range(num_groups):
            mask = particle_mask(i * group_num_particles,
                                 (i + 1) * group_num_particles)[:, None, :] * (
                                     1.0 / group_num_particles)
            pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
            vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
            controller_inputs.append(pos)
            controller_inputs.append(vel)
            controller_inputs.append(goal)
        # Batch, dim
        controller_inputs = tf.concat(controller_inputs, axis=1)
        assert controller_inputs.shape == (batch_size, 6 *
                                           num_groups), controller_inputs.shape
        controller_inputs = controller_inputs[:, :, None]
        assert controller_inputs.shape == (batch_size, 6 * num_groups, 1)
        # Batch, 6 * num_groups, 1
        #IPython.embed()

        debug = {
            'controller_inputs': controller_inputs[:, :, 0],
            'actuation': actuation
        }
        total_actuation = 0
        zeros = tf.zeros(shape=(batch_size, num_particles))
        for i, group in enumerate(actuations):
            act = actuation[:, i:i + 1]
            assert len(act.shape) == 2
            mask = particle_mask_from_group(group)
            act = act * mask
            # First PK stress here
            act = make_matrix2d(zeros, zeros, zeros, act)
            # Convert to Kirchhoff stress
            total_actuation = total_actuation + act
        return total_actuation, debug

    res = (80, 40)
    bc = get_bounding_box_bc(res)
    dt = 0.005

    sim = Simulation(dt=dt,
                     num_particles=num_particles,
                     grid_res=res,
                     dx=1.0 / res[1],
                     gravity=gravity,
                     controller=controller,
                     batch_size=batch_size,
                     bc=bc,
                     sess=sess,
                     scale=20)
    print("Building time: {:.4f}s".format(time.time() - t))

    final_state = sim.initial_state['debug']['controller_inputs']
    s = head * 6

    final_position = final_state[:, s:s + 2]
    final_velocity = final_state[:, s + 2:s + 4]
    loss1 = tf.reduce_mean(tf.reduce_sum((final_position - goal)**2, axis=1))
    loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity**2, axis=1))

    loss_x = tf.reduce_mean(tf.reduce_sum(final_position[0, 0]))
    loss_y = tf.reduce_mean(tf.reduce_sum(final_position[0, 1]))

    loss_obs = final_state
    loss_fwd = tf.reduce_mean(
        tf.reduce_sum(final_state[:, s + 2:s + 3], axis=1)) * dt

    loss = loss_fwd  #really, the reward forward

    initial_positions = [[] for _ in range(batch_size)]
    for b in range(batch_size):
        for i, offset in enumerate(group_offsets):
            for x in range(sample_density):
                for y in range(sample_density):
                    scale = 0.2
                    u = ((x + 0.5) / sample_density * group_sizes[i][0] +
                         offset[0]) * scale + 0.2
                    v = ((y + 0.5) / sample_density * group_sizes[i][1] +
                         offset[1]) * scale + 0.1
                    initial_positions[b].append([u, v])
    assert len(initial_positions[0]) == num_particles
    initial_positions = np.array(initial_positions).swapaxes(1, 2)

    sess.run(tf.global_variables_initializer())

    initial_state = sim.get_initial_state(position=np.array(initial_positions),
                                          youngs_modulus=10)

    #trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)
    sim.add_vector_visualization(pos=final_position,
                                 vector=final_velocity,
                                 color=(0, 0, 1),
                                 scale=50)

    return initial_state, sim, loss, loss_obs
Ejemplo n.º 3
0
def main(sess):
    t = time.time()

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    # Define your controller here
    def controller(state):
        controller_inputs = []
        for i in range(num_groups):
            mask = particle_mask(i * group_num_particles,
                                 (i + 1) * group_num_particles)[:, None, :] * (
                                     1.0 / group_num_particles)
            pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
            vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
            accel = tf.reduce_sum(mask * state.acceleration,
                                  axis=2,
                                  keepdims=False)
            controller_inputs.append(pos)
            controller_inputs.append(vel)
            controller_inputs.append(goal)
            controller_inputs.append(accel)
        # Batch, dim
        controller_inputs = tf.concat(controller_inputs, axis=1)
        assert controller_inputs.shape == (batch_size, 8 *
                                           num_groups), controller_inputs.shape
        controller_inputs = controller_inputs[:, :, None]
        assert controller_inputs.shape == (batch_size, 8 * num_groups, 1)

        actuation = tf.expand_dims(
            actuation_seq[0, (state.step_count - 1) //
                          (num_steps // num_acts), :], 0)
        debug = {
            'controller_inputs': controller_inputs[:, :, 0],
            'actuation': actuation,
            'acceleration': state.acceleration,
            'velocity': state.velocity
        }
        total_actuation = 0
        zeros = tf.zeros(shape=(batch_size, num_particles))
        for i, group in enumerate(actuations):
            act = actuation[:, i:i + 1]
            assert len(act.shape) == 2
            mask = particle_mask_from_group(group)
            act = act * mask
            # First PK stress here
            act = make_matrix2d(zeros, zeros, zeros, act)
            # Convert to Kirchhoff stress
            total_actuation = total_actuation + act
        return total_actuation, debug

    res = (30, 30)
    bc = get_bounding_box_bc(res)

    bc[0][:, :, :5] = -1  # Sticky
    bc[1][:, :, :5] = 0  # Sticky

    sim = Simulation(dt=0.0025,
                     num_particles=num_particles,
                     grid_res=res,
                     gravity=gravity,
                     controller=controller,
                     batch_size=batch_size,
                     bc=bc,
                     sess=sess)
    print("Building time: {:.4f}s".format(time.time() - t))

    final_state = sim.initial_state['debug']['controller_inputs']
    final_acceleration = sim.initial_state['debug']['acceleration']
    final_velocity_all = sim.initial_state['debug']['velocity']
    s = head * 8

    final_position = final_state[:, s:s + 2]
    final_velocity = final_state[:, s + 2:s + 4]
    final_accel = final_state[:, s + 6:s + 8]
    gamma = 0.0
    loss_position = tf.reduce_sum((final_position - goal)**2)
    loss_velocity = tf.reduce_mean(final_velocity_all**2) / 10.0
    loss_act = tf.reduce_sum(actuation_seq**2.0) / 10000.0
    loss_zero = tf.Variable(0.0, trainable=False)

    #loss_accel = tf.reduce_mean(final_acceleration ** 2.0) / 10000.0
    loss_accel = loss_zero
    #IPython.embed()

    #acceleration_constraint = tf.reduce_sum(final_acceleration, axis=1)

    initial_positions = [[] for _ in range(batch_size)]
    for b in range(batch_size):
        for i, offset in enumerate(group_offsets):
            for x in range(sample_density):
                for y in range(sample_density):
                    scale = 0.2
                    u = ((x + 0.5) / sample_density * group_sizes[i][0] +
                         offset[0]) * scale + 0.2
                    v = ((y + 0.5) / sample_density * group_sizes[i][1] +
                         offset[1]) * scale + 0.1
                    initial_positions[b].append([u, v])
    assert len(initial_positions[0]) == num_particles
    initial_positions = np.array(initial_positions).swapaxes(1, 2)

    youngs_modulus = tf.Variable(
        10.0 * tf.ones(shape=[1, 1, num_particles], dtype=tf.float32),
        trainable=True)
    initial_state = sim.get_initial_state(
        position=np.array(initial_positions),
        youngs_modulus=tf.identity(youngs_modulus))

    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    sess.run(tf.global_variables_initializer())

    sim.set_initial_state(initial_state=initial_state)

    sym_pos = sim.gradients_sym(loss_position, variables=trainables)
    sym_vel = sim.gradients_sym(loss_velocity, variables=trainables)
    sym_act = sim.gradients_sym(loss_act, variables=trainables)
    sym_zero = sim.gradients_sym(loss_zero, variables=trainables)
    sym_accel = sim.gradients_sym(loss_accel, variables=trainables)

    #sym_acc = [sim.gradients_sym(acceleration, variables=trainables) for acceleration in acceleration_constraint]
    #sym_acc = tf.map_fn(lambda x : sim.gradients_sym(x, variables=trainables), acceleration_constraint)
    #acc_flat = flatten_vectors([final_acceleration])
    #sym_acc = tf.map_fn((lambda x : sim.gradients_sym(x, variables=trainables)), acc_flat)
    #IPython.embed()

    sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)
    sim.add_vector_visualization(pos=final_position,
                                 vector=final_velocity,
                                 color=(0, 0, 1),
                                 scale=50)

    sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)

    goal_input = np.array([[
        0.7 + (random.random() - 0.5) * goal_range * 2, 0.5 +
        (random.random() - 0.5) * goal_range
    ] for _ in range(batch_size)],
                          dtype=np.float32)

    def eval_sim(loss_tensor, sym_, need_grad=True):
        memo = sim.run(initial_state=initial_state,
                       num_steps=num_steps,
                       iteration_feed_dict={goal: goal_input},
                       loss=loss_tensor)
        if need_grad:
            grad = sim.eval_gradients(sym=sym_, memo=memo)
        else:
            grad = None
        return memo.loss, grad, memo

    def flatten_trainables():
        return tf.concat(
            [tf.squeeze(ly.flatten(trainable)) for trainable in trainables], 0)

    def assignment_run(xs):
        sess.run([trainable.assign(x) for x, trainable in zip(xs, trainables)])

    t = time.time()

    #loss_val, grad, memo = eval_sim(loss_position, sym_pos)

    #IPython.embed()

    #Begin optimization

    def assignment_helper(x):
        assignments = []
        idx = 0
        x = x.astype(np.float32)
        for v in trainables:
            #first, get count:
            var_cnt = tf.size(v).eval()
            assignments += [
                v.assign(tf.reshape(x[idx:idx + var_cnt], v.shape))
            ]
            idx += var_cnt
        sess.run(assignments)

    class RobotProblem:
        def __init__(self, use_act):
            self.use_act = use_act

        goal_ball = 0.0001

        def fitness(self, x):
            assignment_helper(x)
            if self.use_act:
                loss_act_val, _, _ = eval_sim(loss_act,
                                              sym_act,
                                              need_grad=False)
            else:
                loss_act_val, _, _ = eval_sim(loss_zero,
                                              sym_zero,
                                              need_grad=False)
            loss_pos_val, _, _ = eval_sim(loss_position,
                                          sym_pos,
                                          need_grad=False)
            loss_accel_val, _, _ = eval_sim(loss_accel,
                                            sym_accel,
                                            need_grad=False)
            c1, _, memo = eval_sim(loss_velocity, sym_vel, need_grad=False)
            global iter_
            sim.visualize(memo,
                          show=False,
                          folder="arm_log/it{:04d}".format(iter_))
            iter_ += 1
            print('loss pos', loss_pos_val)
            print('loss vel', c1)
            print('loss accel', loss_accel_val)
            #IPython.embed()
            return [
                loss_act_val.astype(np.float64),
                loss_pos_val.astype(np.float64) - self.goal_ball,
                c1.astype(np.float64) - self.goal_ball,
                loss_accel_val.astype(np.float64) - self.goal_ball
            ]

        def get_nic(self):
            return 3

        def get_nec(self):
            return 0

        def gradient(self, x):
            assignment_helper(x)
            _, grad_position, _ = eval_sim(loss_position, sym_pos)
            _, grad_velocity, _ = eval_sim(loss_velocity, sym_vel)
            _, grad_accel, _ = eval_sim(loss_accel, sym_accel)
            if self.use_act:
                _, grad_act, _ = eval_sim(loss_act, sym_act)
            else:
                _, grad_act, _ = eval_sim(loss_zero, sym_zero)
            return np.concatenate([
                flatten_vectors(grad_act).eval().astype(np.float64),
                flatten_vectors(grad_position).eval().astype(np.float64),
                flatten_vectors(grad_velocity).eval().astype(np.float64),
                flatten_vectors(grad_accel).eval().astype(np.float64)
            ])
            #return flatten_vectors(grad).eval().astype(np.float64)

        def get_bounds(self):
            #actuation
            lb = []
            ub = []
            acts = trainables[0]
            lb += [-1.0 / num_links] * tf.size(acts).eval()
            ub += [1.0 / num_links] * tf.size(acts).eval()
            designs = trainables[1]
            lb += [3] * tf.size(designs).eval()
            ub += [40] * tf.size(designs).eval()

            return (lb, ub)

    #IPython.embed()
    uda = pg.nlopt("slsqp")
    #uda = ppnf.snopt7(screen_output = False, library = "/home/aespielberg/snopt/lib/libsnopt7.so")
    algo = pg.algorithm(uda)
    #algo.extract(pg.nlopt).local_optimizer = pg.nlopt('lbfgs')

    algo.extract(pg.nlopt).maxeval = 50
    algo.set_verbosity(1)
    udp = RobotProblem(False)
    bounds = udp.get_bounds()
    mean = (np.array(bounds[0]) + np.array(bounds[1])) / 2.0
    num_vars = len(mean)
    prob = pg.problem(udp)
    pop = pg.population(prob, size=1)

    #TODO: initialize both parts different here
    acts = trainables[0]
    designs = trainables[1]

    std_act = np.ones(tf.size(acts).eval()) * 0.1
    std_young = np.ones(tf.size(designs).eval()) * 0.0
    #IPython.embed()
    std = np.concatenate([std_act, std_young])
    #act_part =  np.random.normal(scale=0.1, loc=mean, size=(tf.size(acts).eval(),))
    #young_part = 10.0 * tf.size(designs).eval()

    pop.set_x(0, np.random.normal(scale=std, loc=mean, size=(num_vars, )))
    #IPython.embed()

    pop.problem.c_tol = [1e-6] * prob.get_nc()
    #pop.problem.c_tol = [1e-4] * prob.get_nc()
    pop.problem.f_tol_rel = [100000.0]
    #IPython.embed()
    pop = algo.evolve(pop)
    IPython.embed()

    #IPython.embed() #We need to refactor this for real
    old_x = pop.champion_x
    assert False
    udp = RobotProblem(True)
    prob = pg.problem(udp)
    pop = pg.population(prob, size=1)
    pop.set_x(0, old_x)
    pop.problem.c_tol = [1e-6] * prob.get_nc()
    #pop.problem.f_tol = [1e-6]
    pop.problem.f_tol_rel = [1e-4]
    pop = algo.evolve(pop)

    #now a second time

    _, _, memo = eval_sim(loss)
    sim.visualize(memo)
Ejemplo n.º 4
0
def main(sess):
    batch_size = 1
    gravity = (0, -1)
    N = 10
    num_particles = N * N
    steps = 150
    dt = 1e-2
    goal_range = 0.15
    res = (30, 30)
    bc = get_bounding_box_bc(res)

    lr = 1e-2

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    sim = Simulation(dt=dt,
                     num_particles=num_particles,
                     grid_res=res,
                     bc=bc,
                     gravity=gravity,
                     m_p=1,
                     V_p=1,
                     E=10,
                     nu=0.3,
                     sess=sess)
    position = np.zeros(shape=(batch_size, num_particles, 2))

    velocity_ph = tf.Variable([0.2, 0.3], trainable=True)
    velocity = velocity_ph[None, :, None] + tf.zeros(
        shape=[batch_size, 2, num_particles], dtype=tf.float32)
    for b in range(batch_size):
        for i in range(N):
            for j in range(N):
                position[b, i * N + j] = ((i * 0.5 + 3) / 30,
                                          (j * 0.5 + 12.75) / 30)
    position = np.array(position).swapaxes(1, 2)

    sess.run(tf.global_variables_initializer())

    initial_state = sim.get_initial_state(position=position, velocity=velocity)

    final_position = sim.initial_state.center_of_mass()
    loss = tf.reduce_sum((final_position - goal)**2)
    sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)
    sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)

    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    sim.set_initial_state(initial_state=initial_state)

    sym = sim.gradients_sym(loss, variables=trainables)

    goal_input = np.array([[0.7, 0.3]], dtype=np.float32)

    for i in range(100):
        # if i > 10:
        #     lr = 1e-1
        # elif i > 20:
        #     lr = 1e-2
        t = time.time()
        memo = sim.run(initial_state=initial_state,
                       num_steps=steps,
                       iteration_feed_dict={goal: goal_input},
                       loss=loss)

        #if i % 1 == 0:
        #  sim.visualize(memo)
        grad = sim.eval_gradients(sym, memo)
        gradient_descent = [
            v.assign(v - lr * g) for v, g in zip(trainables, grad)
        ]
        sess.run(gradient_descent)
        print('iter {:5d} time {:.3f} loss {:.4f}'.format(
            i,
            time.time() - t, memo.loss))
Ejemplo n.º 5
0
import time
from simulation import Simulation, get_bounding_box_bc
import tensorflow as tf
import numpy as np
from IPython import embed

batch_size = 1
gravity = (0, 0)
N = 10
group_particles = N * N * 2
num_particles = group_particles * 2
steps = 100
dt = 5e-3
goal_range = 0.15
res = (100, 100)
bc = get_bounding_box_bc(res)

lr = 5e-1


def main(sess):

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    sim = Simulation(dt=dt,
                     num_particles=num_particles,
                     grid_res=res,
                     bc=bc,
                     gravity=gravity,
                     E=1,
                     m_p=1,
Ejemplo n.º 6
0
def main(sess):
  t = time.time()

  goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 3], name='goal')

  # Define your controller here
  def controller(state):
    controller_inputs = []
    for i in range(num_groups):
      mask = particle_mask(i * group_num_particles,
                           (i + 1) * group_num_particles)[:, None, :] * (
                               1.0 / group_num_particles)
      pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
      vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
      controller_inputs.append(pos)
      controller_inputs.append(vel)
      controller_inputs.append((goal - goal_pos) / np.maximum(goal_range, 1e-5))
    # Batch, dim
    controller_inputs = tf.concat(controller_inputs, axis=1)
    assert controller_inputs.shape == (batch_size, 9 * num_groups), controller_inputs.shape
    controller_inputs = controller_inputs[:, :, None]
    assert controller_inputs.shape == (batch_size, 9 * num_groups, 1)
    # Batch, 6 * num_groups, 1
    intermediate = tf.matmul(W1[None, :, :] +
                             tf.zeros(shape=[batch_size, 1, 1]), controller_inputs)
    # Batch, #actuations, 1
    assert intermediate.shape == (batch_size, len(actuations), 1)
    assert intermediate.shape[2] == 1
    intermediate = intermediate[:, :, 0]
    # Batch, #actuations
    actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength
    debug = {'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation}
    total_actuation = 0
    zeros = tf.zeros(shape=(batch_size, num_particles))
    for i, group in enumerate(actuations):
      act = actuation[:, i:i+1]
      assert len(act.shape) == 2
      mask = particle_mask_from_group(group)
      act = act * mask
      act = make_matrix3d(zeros, zeros, zeros, zeros, act, zeros, zeros, zeros, zeros)
      total_actuation = total_actuation + act
    return total_actuation, debug
  
  res = (60, 30, 30)
  bc = get_bounding_box_bc(res)
  
  sim = Simulation(
      dt=0.007,
      num_particles=num_particles,
      grid_res=res,
      dx=1.0 / res[1],
      gravity=gravity,
      controller=None, #controller,
      batch_size=batch_size,
      bc=bc,
      sess=sess,
      E=15,
      part_size = 10)
  print("Building time: {:.4f}s".format(time.time() - t))
  tt = time.time()
  memo = sim.run(
      initial_state=initial_state,
      num_steps=400,
      iteration_feed_dict={goal: goal_input},
      loss=loss)
  print('forward', time.time() - tt)
  tt = time.time()

  final_state = sim.initial_state['debug']['controller_inputs']
  s = head * 9
  
  final_position = final_state[:, s:s+3]
  final_velocity = final_state[:, s + 3: s + 6]
  loss1 = tf.reduce_mean(tf.reduce_sum((final_position - goal) ** 2, axis = 1))
  loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity ** 2, axis = 1)) 

  loss = loss1 + gamma * loss2

  initial_positions = [[] for _ in range(batch_size)]
  for b in range(batch_size):
    for i, offset in enumerate(group_offsets):
      for x in range(sample_density):
        for y in range(sample_density):
          for z in range(sample_density):
            scale = 0.2
            u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]
                ) * scale + 0.2
            v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]
                ) * scale + 0.1
            w = ((z + 0.5) / sample_density * group_sizes[i][2] + offset[2]
                 ) * scale + 0.1
            initial_positions[b].append([u, v, w])
  assert len(initial_positions[0]) == num_particles
  initial_positions = np.array(initial_positions).swapaxes(1, 2)

  sess.run(tf.global_variables_initializer())

  initial_state = sim.get_initial_state(
      position=np.array(initial_positions), youngs_modulus=10)

  trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
  sim.set_initial_state(initial_state=initial_state)
  
  tt = time.time()
  sym = sim.gradients_sym(loss, variables=trainables)
  print('sym', time.time() - tt)

  gx, gy, gz = goal_range
  pos_x, pos_y, pos_z = goal_pos
  goal_train = [np.array(
    [[pos_x + (random.random() - 0.5) * gx,
      pos_y + (random.random() - 0.5) * gy,
      pos_z + (random.random() - 0.5) * gz
      ] for _ in range(batch_size)],
    dtype=np.float32) for __ in range(1)]

  vis_id = list(range(batch_size))
  random.shuffle(vis_id)
  grad_ph = [
      tf.placeholder(shape = v.shape, dtype = tf.float32) for v in trainables
  ]
  gradient_descent = [
      v.assign(v - lr * g) for v, g in zip(trainables, grad_ph)
  ]

  # Optimization loop
  for e in range(200):
    t = time.time()
    print('Epoch {:5d}, learning rate {}'.format(e, lr))

    loss_cal = 0.
    print('train...')
    for it, goal_input in enumerate(goal_train):
      tt = time.time()
      memo = sim.run(
          initial_state=initial_state,
          num_steps=400,
          iteration_feed_dict={goal: goal_input},
          loss=loss)
      print('forward', time.time() - tt)
      tt = time.time()
      grad = sim.eval_gradients(sym=sym, memo=memo)
      print('backward', time.time() - tt)

      for i, g in enumerate(grad):
        print(i, np.mean(np.abs(g)))
      grad = [np.clip(g, -1, 1) for g in grad]


      grad_feed_dict = {}
      for gp, g in zip(grad_ph, grad):
        grad_feed_dict[gp] = g
      sess.run(gradient_descent, feed_dict = grad_feed_dict)
      print('Iter {:5d} time {:.3f} loss {}'.format(
          it, time.time() - t, memo.loss))
      loss_cal = loss_cal + memo.loss
      '''
      if e % 1 == 0:
        sim.visualize(memo, batch=random.randrange(batch_size), export=None,
                      show=True, interval=5, folder='walker3d_demo/{:04d}/'.format(e))
      '''

#exp.export()
    print('train loss {}'.format(loss_cal / len(goal_train)))
Ejemplo n.º 7
0
def main(sess):
    t = time.time()

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    # Define your controller here
    def controller(state):
        controller_inputs = []
        for i in range(num_groups):
            mask = particle_mask(i * group_num_particles,
                                 (i + 1) * group_num_particles)[:, None, :] * (
                                     1.0 / group_num_particles)
            pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
            vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
            controller_inputs.append(pos)
            controller_inputs.append(vel)
            controller_inputs.append(goal)
        # Batch, dim
        controller_inputs = tf.concat(controller_inputs, axis=1)
        assert controller_inputs.shape == (batch_size, 6 *
                                           num_groups), controller_inputs.shape
        controller_inputs = controller_inputs[:, :, None]
        assert controller_inputs.shape == (batch_size, 6 * num_groups, 1)
        # Batch, 6 * num_groups, 1
        if nn_control:
            intermediate = tf.matmul(
                W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]),
                controller_inputs)
            # Batch, #actuations, 1
            assert intermediate.shape == (batch_size, len(actuations), 1)
            assert intermediate.shape[2] == 1
            intermediate = intermediate[:, :, 0]
            # Batch, #actuations
            actuation = tf.tanh(intermediate +
                                b1[None, :]) * actuation_strength
        else:
            #IPython.embed()
            actuation = tf.expand_dims(
                actuation_seq[0,
                              state.step_count // (num_steps // num_acts), :],
                0)
        debug = {
            'controller_inputs': controller_inputs[:, :, 0],
            'actuation': actuation
        }
        total_actuation = 0
        zeros = tf.zeros(shape=(batch_size, num_particles))
        for i, group in enumerate(actuations):
            act = actuation[:, i:i + 1]
            assert len(act.shape) == 2
            mask = particle_mask_from_group(group)
            act = act * mask
            # First PK stress here
            act = make_matrix2d(zeros, zeros, zeros, act)
            # Convert to Kirchhoff stress
            total_actuation = total_actuation + act
        return total_actuation, debug

    res = (30, 30)
    bc = get_bounding_box_bc(res)

    if config == 'B':
        bc[0][:, :, :5] = -1  # Sticky
        bc[1][:, :, :5] = 0  # Sticky

    sim = Simulation(dt=0.0025,
                     num_particles=num_particles,
                     grid_res=res,
                     gravity=gravity,
                     controller=controller,
                     batch_size=batch_size,
                     bc=bc,
                     sess=sess)
    print("Building time: {:.4f}s".format(time.time() - t))

    final_state = sim.initial_state['debug']['controller_inputs']
    s = head * 6

    final_position = final_state[:, s:s + 2]
    final_velocity = final_state[:, s + 2:s + 4]
    gamma = 0.0
    loss1 = tf.reduce_sum((final_position - goal)**2)
    loss2 = tf.reduce_sum(final_velocity**2)
    loss_velocity = loss2
    loss_act = tf.reduce_sum(actuation_seq**2.0)
    loss_zero = tf.reduce_sum(actuation_seq * 0.0)

    loss = loss1 + gamma * loss2

    initial_positions = [[] for _ in range(batch_size)]
    for b in range(batch_size):
        for i, offset in enumerate(group_offsets):
            for x in range(sample_density):
                for y in range(sample_density):
                    scale = 0.2
                    u = ((x + 0.5) / sample_density * group_sizes[i][0] +
                         offset[0]) * scale + 0.2
                    v = ((y + 0.5) / sample_density * group_sizes[i][1] +
                         offset[1]) * scale + 0.1
                    initial_positions[b].append([u, v])
    assert len(initial_positions[0]) == num_particles
    initial_positions = np.array(initial_positions).swapaxes(1, 2)

    youngs_modulus = tf.Variable(
        10.0 * tf.ones(shape=[1, 1, num_particles], dtype=tf.float32),
        trainable=True)
    initial_state = sim.get_initial_state(
        position=np.array(initial_positions),
        youngs_modulus=tf.identity(youngs_modulus))

    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    if use_bfgs:
        B = [
            tf.Variable(tf.eye(tf.size(trainable)), trainable=False)
            for trainable in trainables
        ]

    sess.run(tf.global_variables_initializer())

    sim.set_initial_state(initial_state=initial_state)

    sym = sim.gradients_sym(loss, variables=trainables)
    sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)
    sim.add_vector_visualization(pos=final_position,
                                 vector=final_velocity,
                                 color=(0, 0, 1),
                                 scale=50)

    sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)

    if config == 'A':
        goal_input = np.array([[
            0.5 + (random.random() - 0.5) * goal_range * 2, 0.6 +
            (random.random() - 0.5) * goal_range
        ] for _ in range(batch_size)],
                              dtype=np.float32)
    elif config == 'B':
        goal_input = np.array([[
            0.65 + (random.random() - 0.5) * goal_range * 2, 0.55 +
            (random.random() - 0.5) * goal_range
        ] for _ in range(batch_size)],
                              dtype=np.float32)
    # Optimization loop
    #IPython.embed()
    #In progress code
    '''
  memo = sim.run(
        initial_state=initial_state,
        num_steps=num_steps,
        iteration_feed_dict={goal: goal_input},
        loss=loss)
  IPython.embed()
  
  
  def loss_callback():
    memo = sim.run(
        initial_state=initial_state,
        num_steps=num_steps,
        iteration_feed_dict={goal: goal_input},
        loss=loss)
    
    return loss
  '''

    c1 = 1e-4
    c2 = 0.9

    def eval_sim(loss_tensor):
        memo = sim.run(initial_state=initial_state,
                       num_steps=num_steps,
                       iteration_feed_dict={goal: goal_input},
                       loss=loss_tensor)
        grad = sim.eval_gradients(sym=sym, memo=memo)
        return memo.loss, grad, memo

    def flatten_trainables():
        return tf.concat(
            [tf.squeeze(ly.flatten(trainable)) for trainable in trainables], 0)

    def flatten_vectors(vectors):
        return tf.concat(
            [tf.squeeze(ly.flatten(vector)) for vector in vectors], 0)

    def assignment_run(xs):
        sess.run([trainable.assign(x) for x, trainable in zip(xs, trainables)])

    def f_and_grad_step(step_size, x, delta_x):
        old_x = [x_i.eval() for x_i in x]
        assignment_run([
            x_i + step_size * delta_x_i for x_i, delta_x_i in zip(x, delta_x)
        ])  #take step
        loss, grad, _ = eval_sim(loss)
        assignment_run(old_x)  #revert
        return loss, grad

    def wolfe_1(delta_x, new_f, current_f, current_grad, step_size):
        valid = new_f <= current_f + c1 * step_size * tf.tensordot(
            flatten_vectors(current_grad), flatten_vectors(delta_x), 1)
        return valid.eval()

    def wolfe_2(delta_x, new_grad, current_grad, step_size):
        valid = np.abs(
            tf.tensordot(flatten_vectors(new_grad), flatten_vectors(delta_x),
                         1).eval()) <= -c2 * tf.tensordot(
                             flatten_vectors(current_grad),
                             flatten_vectors(delta_x), 1).eval()
        return valid

    def zoom(a_min, a_max, search_dirs, current_f, current_grad):
        while True:
            a_mid = (a_min + a_max) / 2.0
            print('a_min: ', a_min, 'a_max: ', a_max, 'a_mid: ', a_mid)
            step_loss_min, step_grad_min = f_and_grad_step(
                a_min, trainables, search_dirs)
            step_loss, step_grad = f_and_grad_step(a_mid, trainables,
                                                   search_dirs)
            valid_1 = wolfe_1(search_dirs, step_loss, current_f, current_grad,
                              a_mid)
            valid_2 = wolfe_2(search_dirs, step_grad, current_grad, a_mid)
            if not valid_1 or step_loss >= step_loss_min:
                a_max = a_mid
            else:
                if valid_2:
                    return a_mid
                if tf.tensordot(flatten_vectors(step_grad),
                                flatten_vectors(search_dirs),
                                1) * (a_max - a_min) >= 0:
                    a_max = a_min
                a_min = a_mid

    loss_val, grad, memo = eval_sim(
        loss
    )  #TODO: this is to get dimensions, find a better way to do this without simming
    old_g_flat = [None] * len(grad)
    old_v_flat = [None] * len(grad)

    t = time.time()

    loss_val, grad, memo = eval_sim(loss)

    #BFGS update:
    #IPython.embed()

    if use_pygmo:

        def assignment_helper(x):
            assignments = []
            idx = 0
            x = x.astype(np.float32)
            for v in trainables:
                #first, get count:
                var_cnt = tf.size(v).eval()
                assignments += [
                    v.assign(tf.reshape(x[idx:idx + var_cnt], v.shape))
                ]
                idx += var_cnt
            sess.run(assignments)

        class RobotProblem:
            def __init__(self, use_act):
                self.use_act = use_act

            goal_ball = 0.002

            def fitness(self, x):
                assignment_helper(x)
                if self.use_act:
                    loss_act_val, _, _ = eval_sim(loss_act)
                else:
                    loss_act_val, _, _ = eval_sim(loss_zero)
                loss_val, _, _ = eval_sim(loss)
                c1, _, memo = eval_sim(loss_velocity)
                sim.visualize(memo)
                return [
                    loss_act_val.astype(np.float64),
                    loss_val.astype(np.float64) - self.goal_ball,
                    c1.astype(np.float64) - self.goal_ball
                ]

            def get_nic(self):
                return 2

            def get_nec(self):
                return 0

            def gradient(self, x):
                assignment_helper(x)
                _, grad, _ = eval_sim(loss)
                _, grad_velocity, _ = eval_sim(loss_velocity)
                _, grad_act, _ = eval_sim(loss_act)
                return np.concatenate([
                    flatten_vectors(grad_act).eval().astype(np.float64),
                    flatten_vectors(grad).eval().astype(np.float64),
                    flatten_vectors(grad_velocity).eval().astype(np.float64)
                ])
                #return flatten_vectors(grad).eval().astype(np.float64)

            def get_bounds(self):
                #actuation
                lb = []
                ub = []
                acts = trainables[0]
                lb += [-5] * tf.size(acts).eval()
                ub += [5] * tf.size(acts).eval()
                designs = trainables[1]
                lb += [5] * tf.size(designs).eval()
                ub += [20] * tf.size(designs).eval()

                return (lb, ub)

        #IPython.embed()
        uda = pg.nlopt("slsqp")
        #uda = ppnf.snopt7(screen_output = False, library = "/home/aespielberg/snopt/lib/libsnopt7.so")
        algo = pg.algorithm(uda)
        #algo.extract(pg.nlopt).local_optimizer = pg.nlopt('lbfgs')

        algo.extract(pg.nlopt).maxeval = 20
        algo.set_verbosity(1)
        udp = RobotProblem(False)
        bounds = udp.get_bounds()
        mean = (np.array(bounds[0]) + np.array(bounds[1])) / 2.0
        num_vars = len(mean)
        prob = pg.problem(udp)
        pop = pg.population(prob, size=1)
        pop.set_x(0, np.random.normal(scale=0.3, loc=mean, size=(num_vars, )))
        pop.problem.c_tol = [1e-4] * prob.get_nc()
        #pop.problem.c_tol = [1e-4] * prob.get_nc()
        pop.problem.f_tol_rel = [100000.0]
        #IPython.embed()
        pop = algo.evolve(pop)
        IPython.embed()

        #IPython.embed() #We need to refactor this for real
        old_x = pop.champion_x
        udp = RobotProblem(True)
        prob = pg.problem(udp)
        pop = pg.population(prob, size=1)
        pop.set_x(0, old_x)
        pop.problem.c_tol = [1e-4] * prob.get_nc()
        #pop.problem.f_tol = [1e-6]
        pop.problem.f_tol_rel = [1e-4]
        pop = algo.evolve(pop)

        #now a second time

        _, _, memo = eval_sim(loss)
        sim.visualize(memo)
        return

    for i in range(1000000):

        if use_bfgs:
            bfgs = [None] * len(grad)
            B_update = [None] * len(grad)
            search_dirs = [None] * len(grad)
            #TODO: for now, assuming there is only one trainable and one grad for ease
            for v, g, idx in zip(trainables, grad, range(len(grad))):
                g_flat = ly.flatten(g)
                v_flat = ly.flatten(v)
                if B[idx] == None:
                    B[idx] = tf.eye(tf.size(v_flat))
                if i > 0:
                    y_flat = tf.squeeze(g_flat - old_g_flat[idx])
                    s_flat = tf.squeeze(v_flat - old_v_flat[idx])
                    B_s_flat = tf.tensordot(B[idx], s_flat, 1)
                    term_1 = -tf.tensordot(B_s_flat, tf.transpose(B_s_flat),
                                           0) / tf.tensordot(
                                               s_flat, B_s_flat, 1)
                    term_2 = tf.tensordot(y_flat, y_flat, 0) / tf.tensordot(
                        y_flat, s_flat, 1)
                    B_update[idx] = B[idx].assign(B[idx] + term_1 + term_2)
                    sess.run([B_update[idx]])

                if tf.abs(tf.matrix_determinant(B[idx])).eval() < 1e-6:
                    sess.run([B[idx].assign(tf.eye(tf.size(v_flat)))])
                    search_dir = -tf.transpose(g_flat)
                else:
                    #search_dir = -tf.matrix_solve_ls(B[idx],tf.transpose(g_flat), l2_regularizer=0.0, fast=True) #adding regularizer for stability
                    search_dir = -tf.matmul(
                        tf.linalg.inv(B[idx]),
                        tf.transpose(g_flat))  #TODO: inverse bad,speed htis up
                search_dir_reshape = tf.reshape(search_dir, g.shape)
                search_dirs[idx] = search_dir_reshape
                old_g_flat[idx] = g_flat
                old_v_flat[idx] = v_flat.eval()
                #TODO: B upate

            #Now it's linesearch time
            if wolfe_search:
                a_max = 0.1
                a_1 = a_max / 2.0
                a_0 = 0.0

                iterate = 1
                while True:
                    step_loss, step_grad = f_and_grad_step(
                        a_1, trainables, search_dirs)
                    print(a_1)
                    valid_1 = wolfe_1(search_dirs, step_loss, loss_val, grad,
                                      a_1)
                    valid_2 = wolfe_2(search_dirs, step_grad, grad, a_1)
                    print('wolfe 1: ', valid_1, 'wolfe 2: ', valid_2)
                    if (not valid_1) or (iterate > 1 and step_loss > loss_val):
                        print('cond1')
                        a = zoom(a_0, a_1, search_dirs, loss_val, grad)
                    if valid_2:
                        print('cond2')
                        a = a_1
                        break
                    if tf.tensordot(flatten_vectors(step_grad),
                                    flatten_vectors(search_dirs),
                                    1).eval() >= 0:
                        print('cond3')
                        a = zoom(a_1, a_0, search_dirs, current_f,
                                 current_grad)
                        break
                    print('no cond')
                    temp = a_1
                    a_1 = (a_1 + a_max) / 2.0
                    a_0 = temp
                    iterate += 1
                    if iterate > 5:
                        #close enough
                        a = a_1
                        break
            else:
                a = lr
            for v, idx in zip(trainables, range(len(grad))):
                print('final a ', a)
                bfgs[idx] = v.assign(v + search_dirs[idx] * a)
            sess.run(bfgs)
            print('stepped!!')
        else:
            gradient_descent = [
                v.assign(v - lr * g) for v, g in zip(trainables, grad)
            ]
            sess.run(gradient_descent)

        print('iter {:5d} time {:.3f} loss {:.4f}'.format(
            i,
            time.time() - t, memo.loss))
        if i % 1 == 0:
            sim.visualize(memo)

    #in progress code
    '''
Ejemplo n.º 8
0
def main(sess):
  t = time.time()

  goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

  # Define your controller here
  def controller(state):
    controller_inputs = []
    for i in range(num_groups):
      mask = particle_mask(i * group_num_particles,
                           (i + 1) * group_num_particles)[:, None, :] * (
                               1.0 / group_num_particles)
      pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
      vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
      controller_inputs.append(pos)
      controller_inputs.append(vel)
      controller_inputs.append((goal - goal_pos) / np.maximum(goal_range, 1e-5))
    # Batch, dim
    controller_inputs = tf.concat(controller_inputs, axis=1)
    assert controller_inputs.shape == (batch_size, 6 * num_groups), controller_inputs.shape
    controller_inputs = controller_inputs[:, :, None]
    assert controller_inputs.shape == (batch_size, 6 * num_groups, 1)
    # Batch, 6 * num_groups, 1
    intermediate = tf.matmul(W1[None, :, :] +
                             tf.zeros(shape=[batch_size, 1, 1]), controller_inputs)
    # Batch, #actuations, 1
    assert intermediate.shape == (batch_size, len(actuations), 1)
    assert intermediate.shape[2] == 1
    intermediate = intermediate[:, :, 0]
    # Batch, #actuations
    actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength
    debug = {'controller_inputs': controller_inputs[:, :, 0], 'actuation': actuation}
    total_actuation = 0
    zeros = tf.zeros(shape=(batch_size, num_particles))
    for i, group in enumerate(actuations):
      act = actuation[:, i:i+1]
      assert len(act.shape) == 2
      mask = particle_mask_from_group(group)
      act = act * mask
      # First PK stress here
      act = make_matrix2d(zeros, zeros, zeros, act)
      # Convert to Kirchhoff stress
      F = state['deformation_gradient']
      total_actuation = total_actuation + act
    return total_actuation, debug
  
  res = (80, 40)
  bc = get_bounding_box_bc(res)
  
  sim = Simulation(
      dt=0.005,
      num_particles=num_particles,
      grid_res=res,
      dx=1.0 / res[1],
      gravity=gravity,
      controller=controller,
      batch_size=batch_size,
      bc=bc,
      sess=sess,
      scale=20,
      part_size = 1)
  print("Building time: {:.4f}s".format(time.time() - t))

  final_state = sim.initial_state['debug']['controller_inputs']
  s = head * 6
  
  final_position = final_state[:, s:s+2]
  final_velocity = final_state[:, s + 2: s + 4]
  loss1 = tf.reduce_mean(tf.reduce_sum(-final_position[:, 0]))
  loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity ** 2, axis = 1))

  saver = tf.train.Saver()

  loss = loss1 + gamma * loss2

  initial_positions = [[] for _ in range(batch_size)]
  for b in range(batch_size):
    for i, offset in enumerate(group_offsets):
      for x in range(sample_density):
        for y in range(sample_density):
          scale = 0.2
          u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]
              ) * scale + 0.2
          v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]
              ) * scale + 0.1
          initial_positions[b].append([u, v])
  assert len(initial_positions[0]) == num_particles
  initial_positions = np.array(initial_positions).swapaxes(1, 2)

  sess.run(tf.global_variables_initializer())

  initial_state = sim.get_initial_state(
      position=np.array(initial_positions), youngs_modulus=10)

  trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
  sim.set_initial_state(initial_state=initial_state)
  
  tt = time.time()
  sym = sim.gradients_sym(loss, variables=trainables)
  print('sym', time.time() - tt)
  #sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)
  #sim.add_vector_visualization(pos=final_position, vector=final_velocity, color=(0, 0, 1), scale=50)
  #sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)

  gx, gy = goal_range
  pos_x, pos_y = goal_pos
  goal_train = [np.array(
    [[pos_x + (random.random() - 0.5) * gx,
      pos_y + (random.random() - 0.5) * gy] for _ in range(batch_size)],
    dtype=np.float32) for __ in range(1)]

  vis_id = list(range(batch_size))
  random.shuffle(vis_id)
  grad_ph = [
      tf.placeholder(shape = v.shape, dtype = tf.float32) for v in trainables
  ]
  # Optimization loop
  tt0 = time.time()
  for e in range(50):
    tt = time.time()
    for it, goal_input in enumerate(goal_train):
      memo = sim.run(
          initial_state=initial_state,
          num_steps=200,
          iteration_feed_dict={goal: goal_input},
          loss=loss)
      grad = sim.eval_gradients(sym=sym, memo=memo)
    print(f'Time in epoch {e} is {np.round(time.time() - tt,3)}. Total: {np.round(time.time() - tt0,3)}')
Ejemplo n.º 9
0
def main(sess):
    t = time.time()

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    # Define your controller here
    def controller(state):
        actuation = 2 * np.ones(shape=(batch_size, num_groups)) * tf.sin(
            0.1 * tf.cast(state.get_evaluated()['step_count'], tf.float32))
        total_actuation = 0
        zeros = tf.zeros(shape=(batch_size, num_particles))
        for i, group in enumerate(actuations):
            act = actuation[:, i:i + 1] * 2
            assert len(act.shape) == 2
            mask = particle_mask_from_group(group)
            act = act * mask
            # First PK stress here
            act = make_matrix2d(zeros, zeros, zeros, act)
            # Convert to Kirchhoff stress
            total_actuation = total_actuation + act
        return total_actuation, 1

    res = (80, 40)
    bc = get_bounding_box_bc(res)

    sim = Simulation(dt=0.005,
                     num_particles=num_particles,
                     grid_res=res,
                     dx=1.0 / res[1],
                     gravity=gravity,
                     controller=controller,
                     batch_size=batch_size,
                     bc=bc,
                     sess=sess,
                     scale=20)
    print("Building time: {:.4f}s".format(time.time() - t))

    s = head * 6

    initial_positions = [[] for _ in range(batch_size)]
    initial_velocity = np.zeros(shape=(batch_size, 2, num_particles))
    for b in range(batch_size):
        c = 0
        for i, offset in enumerate(group_offsets):
            c += 1
            for x in range(sample_density):
                for y in range(sample_density):
                    scale = 0.2
                    u = ((x + 0.5) / sample_density * group_sizes[i][0] +
                         offset[0]) * scale + 0.2
                    v = ((y + 0.5) / sample_density * group_sizes[i][1] +
                         offset[1]) * scale + 0.1
                    initial_positions[b].append([u, v])
                    initial_velocity[0, :, c] = (2 * (y - sample_density / 2),
                                                 -2 * (x - sample_density / 2))
    assert len(initial_positions[0]) == num_particles
    initial_positions = np.array(initial_positions).swapaxes(1, 2)

    sess.run(tf.global_variables_initializer())

    initial_state = sim.get_initial_state(position=np.array(initial_positions),
                                          youngs_modulus=10,
                                          velocity=initial_velocity)

    sim.set_initial_state(initial_state=initial_state)

    gx, gy = goal_range
    pos_x, pos_y = goal_pos
    goal_train = [
        np.array([[
            pos_x + (random.random() - 0.5) * gx, pos_y +
            (random.random() - 0.5) * gy
        ] for _ in range(batch_size)],
                 dtype=np.float32) for __ in range(1)
    ]

    vis_id = list(range(batch_size))
    random.shuffle(vis_id)

    # Optimization loop
    for i in range(100000):
        t = time.time()
        print('Epoch {:5d}, learning rate {}'.format(i, lr))

        print('train...')
        for it, goal_input in enumerate(goal_train):
            tt = time.time()
            memo = sim.run(
                initial_state=initial_state,
                num_steps=400,
                iteration_feed_dict={goal: goal_input},
            )
            print('forward', time.time() - tt)
            tt = time.time()
            print('backward', time.time() - tt)

            sim.visualize(memo,
                          batch=random.randrange(batch_size),
                          export=exp,
                          show=True,
                          interval=4)
Ejemplo n.º 10
0
def main(sess):
    t = time.time()

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    # Define your controller here
    def controller(state):
        controller_inputs = []
        goal_feature = (goal - goal_pos) / np.maximum(goal_range, 1e-5)
        for i in range(num_groups):
            mask = particle_mask(i * group_num_particles,
                                 (i + 1) * group_num_particles)[:, None, :] * (
                                     1.0 / group_num_particles)
            pos = tf.reduce_sum(mask * state.position, axis=2, keepdims=False)
            vel = tf.reduce_sum(mask * state.velocity, axis=2, keepdims=False)
            controller_inputs.append(pos)
            controller_inputs.append(vel)
            #controller_inputs.append(goal_feature)
        # Batch, dim
        controller_inputs = tf.concat(controller_inputs, axis=1)
        assert controller_inputs.shape == (batch_size, feature_dim_per_group *
                                           num_groups), controller_inputs.shape
        controller_inputs = controller_inputs[:, :, None]
        assert controller_inputs.shape == (batch_size,
                                           feature_dim_per_group * num_groups,
                                           1)
        # Batch, 6 * num_groups, 1
        intermediate = tf.matmul(
            W1[None, :, :] + tf.zeros(shape=[batch_size, 1, 1]),
            controller_inputs)
        # Batch, #actuations, 1
        assert intermediate.shape == (batch_size, len(actuations), 1)
        assert intermediate.shape[2] == 1
        intermediate = intermediate[:, :, 0]
        # Batch, #actuations
        actuation = tf.tanh(intermediate + b1[None, :]) * actuation_strength
        debug = {
            'controller_inputs': controller_inputs[:, :, 0],
            'actuation': actuation
        }
        total_actuation = 0
        zeros = tf.zeros(shape=(batch_size, num_particles))
        for i, group in enumerate(actuations):
            act = actuation[:, i:i + 1]
            assert len(act.shape) == 2
            mask = particle_mask_from_group(group)
            act = act * mask
            total_actuation = total_actuation + act
        total_actuation = make_matrix2d(zeros, zeros, zeros, total_actuation)
        return total_actuation, debug

    res = (80, 40)
    bc = get_bounding_box_bc(res)

    sim = Simulation(dt=0.005,
                     num_particles=num_particles,
                     grid_res=res,
                     dx=1.0 / res[1],
                     gravity=gravity,
                     controller=controller,
                     batch_size=batch_size,
                     bc=bc,
                     sess=sess,
                     scale=20,
                     part_size=10)
    print("Building time: {:.4f}s".format(time.time() - t))

    final_state = sim.initial_state['debug']['controller_inputs']
    s = head * feature_dim_per_group

    final_position = final_state[:, s:s + 2]
    final_velocity = final_state[:, s + 2:s + 4]
    loss1 = tf.reduce_mean(tf.reduce_sum(-final_position[:, 0]))
    loss2 = tf.reduce_mean(tf.reduce_sum(final_velocity**2, axis=1))

    saver = tf.train.Saver()

    loss = loss1 + gamma * loss2

    initial_positions = [[] for _ in range(batch_size)]
    for b in range(batch_size):
        for i, offset in enumerate(group_offsets):
            for x in range(sample_density):
                for y in range(sample_density):
                    scale = 0.2
                    u = ((x + 0.5) / sample_density * group_sizes[i][0] +
                         offset[0]) * scale + 0.2
                    v = ((y + 0.5) / sample_density * group_sizes[i][1] +
                         offset[1]) * scale + 0.1
                    initial_positions[b].append([u, v])
    assert len(initial_positions[0]) == num_particles
    initial_positions = np.array(initial_positions).swapaxes(1, 2)

    sess.run(tf.global_variables_initializer())

    initial_state = sim.get_initial_state(position=np.array(initial_positions),
                                          youngs_modulus=10)

    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    sim.set_initial_state(initial_state=initial_state)

    tt = time.time()
    sym = sim.gradients_sym(loss, variables=trainables)
    print('sym', time.time() - tt)
    #sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)
    sim.add_vector_visualization(pos=final_position,
                                 vector=final_velocity,
                                 color=(0, 0, 1),
                                 scale=50)

    sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)

    gx, gy = goal_range
    pos_x, pos_y = goal_pos
    goal_train = [
        np.array([[
            pos_x + (random.random() - 0.5) * gx, pos_y +
            (random.random() - 0.5) * gy
        ] for _ in range(batch_size)],
                 dtype=np.float32) for __ in range(1)
    ]

    vis_id = list(range(batch_size))
    random.shuffle(vis_id)
    grad_ph = [
        tf.placeholder(shape=v.shape, dtype=tf.float32) for v in trainables
    ]
    gradient_descent = [
        v.assign(v - lr * g) for v, g in zip(trainables, grad_ph)
    ]

    # Optimization loop
    for e in range(200):
        t = time.time()
        print('Epoch {:5d}, learning rate {}'.format(e, lr))

        loss_cal = 0.
        print('train...')
        for it, goal_input in enumerate(goal_train):
            tt = time.time()
            memo = sim.run(initial_state=initial_state,
                           num_steps=800,
                           iteration_feed_dict={goal: goal_input},
                           loss=loss)
            print('# *** forward', time.time() - tt)
            tt = time.time()
            grad = sim.eval_gradients(sym=sym, memo=memo)
            print('# *** eval_gradients', time.time() - tt)
            tt = time.time()

            grad_feed_dict = {}
            for gp, g in zip(grad_ph, grad):
                grad_feed_dict[gp] = g
            sess.run(gradient_descent, feed_dict=grad_feed_dict)
            print('gradient_descent', time.time() - tt)
            print('Iter {:5d} time {:.3f} loss {}'.format(
                it,
                time.time() - t, memo.loss))
            loss_cal = loss_cal + memo.loss
        save_path = saver.save(sess, "./models/walker_2d.ckpt")
        print("Model saved in path: %s" % save_path)
        sim.visualize(memo,
                      batch=random.randrange(batch_size),
                      export=None,
                      show=True,
                      interval=4)
        print('train loss {}'.format(loss_cal / len(goal_train)))
Ejemplo n.º 11
0
def main(sess):
  t = time.time()

  goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 3], name='goal')

  # Define your controller here
  def controller(state):
    actuations =tf.zeros(shape=(1, 3, 3, num_particles))
    debug = {'controller_inputs': tf.zeros(shape=(1, 10, 10)), 'actuation': actuations}
    return actuations, debug
  
  
  res = (60 + 100 * int(evaluate), 30, 30)
  bc = get_bounding_box_bc(res)
  
  sim = Simulation(
      dt=0.005,
      num_particles=num_particles,
      grid_res=res,
      dx=1.0 / res[1],
      gravity=gravity,
      controller=controller,
      batch_size=batch_size,
      bc=bc,
      sess=sess,
      E=25, damping=0.001 * evaluate, part_size=10)
  print("Building time: {:.4f}s".format(time.time() - t))

  final_state = sim.initial_state['debug']['controller_inputs']
  s = head * 9
  
  loss1 = tf.reduce_mean(tf.reduce_sum((goal) ** 2, axis = 1))

  loss = loss1

  initial_positions = [[] for _ in range(batch_size)]
  for b in range(batch_size):
    for i, offset in enumerate(group_offsets):
      for x in range(sample_density):
        for y in range(sample_density):
          for z in range(sample_density):
            scale = 0.2
            u = ((x + 0.5) / sample_density * group_sizes[i][0] + offset[0]
                ) * scale + 0.2
            v = ((y + 0.5) / sample_density * group_sizes[i][1] + offset[1]
                ) * scale + 0.1
            w = ((z + 0.5) / sample_density * group_sizes[i][2] + offset[2]
                 ) * scale + 0.1
            initial_positions[b].append([u, v, w])
  assert len(initial_positions[0]) == num_particles
  initial_positions = np.array(initial_positions).swapaxes(1, 2)

  sess.run(tf.global_variables_initializer())

  initial_state = sim.get_initial_state(
      position=np.array(initial_positions), youngs_modulus=10)

  trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
  sim.set_initial_state(initial_state=initial_state)
  
  sym = sim.gradients_sym(loss, variables=trainables)

  gx, gy, gz = goal_range
  pos_x, pos_y, pos_z = goal_pos
  goal_train = [np.array(
    [[pos_x + (random.random() - 0.5) * gx,
      pos_y + (random.random() - 0.5) * gy,
      pos_z + (random.random() - 0.5) * gz
      ] for _ in range(batch_size)],
    dtype=np.float32) for __ in range(1)]

  vis_id = list(range(batch_size))
  random.shuffle(vis_id)

  # Optimization loop
  saver = tf.train.Saver()
  
  if evaluate:
    '''evaluate'''
    saver.restore(sess, "crawler3d_demo/0014/data.ckpt")
    tt = time.time()
    memo = sim.run(
      initial_state=initial_state,
      num_steps=1800,
      iteration_feed_dict={goal: goal_train[0]},
      loss=loss)
    print('forward', time.time() - tt)

    fn = 'crawler3d_demo/eval'
    sim.visualize(memo, batch=random.randrange(batch_size), export=None,
                  show=True, interval=5, folder=fn)
    return
    
  for e in range(100000):
    t = time.time()
    print('Epoch {:5d}, learning rate {}'.format(e, lr))

    loss_cal = 0.
    print('train...')
    for it, goal_input in enumerate(goal_train):
      tt = time.time()
      memo = sim.run(
          initial_state=initial_state,
          num_steps=400,
          iteration_feed_dict={goal: goal_input},
          loss=loss)
      print('forward', time.time() - tt)
      tt = time.time()
      grad = sim.eval_gradients(sym=sym, memo=memo)
      print('backward', time.time() - tt)

      for i, g in enumerate(grad):
        print(i, np.mean(np.abs(g)))
      grad = [np.clip(g, -1, 1) for g in grad]


      gradient_descent = [
          v.assign(v - lr * g) for v, g in zip(trainables, grad)
      ]
      sess.run(gradient_descent)
      print('Iter {:5d} time {:.3f} loss {}'.format(
          it, time.time() - t, memo.loss))
      loss_cal = loss_cal + memo.loss
      fn = 'crawler3d_demo/{:04d}/'.format(e)
      saver.save(sess, "{}/data.ckpt".format(fn))
      sim.visualize(memo, batch=random.randrange(batch_size), export=None,
                    show=True, interval=5, folder=fn)

#exp.export()
    print('train loss {}'.format(loss_cal / len(goal_train)))
Ejemplo n.º 12
0
def main(sess):
    batch_size = 1
    gravity = (0, -1)
    # gravity = (0, 0)
    N = 5
    dR = 0.2
    R = (N - 1) * dR
    dC = 1.6
    num_particles = int(((N - 1) * dC + 1)**2)
    steps = 1000
    dt = 5e-3
    goal_range = 0.15
    res = (45, 30)
    bc = get_bounding_box_bc(res)

    lr = 1e-2

    goal = tf.placeholder(dtype=tf.float32, shape=[batch_size, 2], name='goal')

    def F_controller(state):
        F = state.position - state.center_of_mass()[:, :, None]
        F = tf.stack([F[:, 1], -F[:, 0]], axis=1)
        # T = tf.cast(state.step_count // 100 % 2, dtype = tf.float32) * 2 - 1
        return F * 10  #  * T

    sim = Simulation(dt=dt,
                     num_particles=num_particles,
                     grid_res=res,
                     bc=bc,
                     gravity=gravity,
                     m_p=1,
                     V_p=1,
                     E=10,
                     nu=0.3,
                     sess=sess,
                     use_visualize=True,
                     F_controller=F_controller)
    position = np.zeros(shape=(batch_size, num_particles, 2))

    # velocity_ph = tf.constant([0.2, 0.3])
    velocity_ph = tf.constant([0, 0], dtype=tf.float32)
    velocity = velocity_ph[None, :, None] + tf.zeros(
        shape=[batch_size, 2, num_particles], dtype=tf.float32)
    random.seed(123)
    for b in range(batch_size):
        dx, dy = 5, 4
        cnt = 0
        las = 0
        for i in range(N):
            l = int((dC * i + 1)**2)
            l, las = l - las, l
            print(l)
            dth = 2 * np.pi / l
            dr = R / (N - 1) * i
            theta = np.pi * 2 * np.random.random()
            for j in range(l):
                theta += dth
                x, y = np.cos(theta) * dr, np.sin(theta) * dr
                position[b, cnt] = ((dx + x) / 30, (dy + y) / 30)
                cnt += 1

    position = np.array(position).swapaxes(1, 2)

    sess.run(tf.global_variables_initializer())

    initial_state = sim.get_initial_state(position=position, velocity=velocity)

    final_position = sim.initial_state.center_of_mass()
    loss = tf.reduce_sum((final_position - goal)**2)
    sim.add_point_visualization(pos=final_position, color=(1, 0, 0), radius=3)
    sim.add_point_visualization(pos=goal, color=(0, 1, 0), radius=3)

    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    sim.set_initial_state(initial_state=initial_state)

    sym = sim.gradients_sym(loss, variables=trainables)

    goal_input = np.array([[0.7, 0.3]], dtype=np.float32)

    memo = sim.run(initial_state=initial_state,
                   num_steps=steps,
                   iteration_feed_dict={goal: goal_input},
                   loss=loss)

    if True:
        sim.visualize(memo, show=True, interval=2)
    else:
        sim.visualize(memo, show=False, interval=1, export=exp)