Ejemplo n.º 1
0
    def fit(self,
            sess,
            inputs,
            outputs,
            var_list=None,
            spo_config=None,
            feed_dict=None,
            **kwargs):
        '''
    Fit a given model state via MAP estimation.
    '''
        assert inputs.ndim == outputs.ndim == 2, 'Tensor rank should be 2'
        if (feed_dict is None): feed_dict = dict()

        # Which <tf.Variable> should we optimize?
        if (var_list is None):
            filter_fn = lambda v: isinstance(v, tf.Variable)
            var_list = filter(filter_fn, self.state.values())
        var_list = tuple(var_list)

        # Build/retrieve negative log likelihood
        input_dim, output_dim = inputs.shape[-1], outputs.shape[-1]
        inputs_ref = self.get_or_create_ref('inputs/old', [None, input_dim])
        outputs_ref = self.get_or_create_ref('outputs/old', [None, output_dim])
        nll = self.get_or_create_node\
        (
          group='log_likelihood',
          fn=self.log_likelihood,
          args=(inputs_ref, outputs_ref),
          kwargs={**kwargs, 'state_id':self.active_state, 'as_negative':True},
        )

        # Get (updated) copy of configuration for Scipy Optimize
        spo_config = self.update_dict(self.spo_config,
                                      spo_config,
                                      as_copy=True)

        # For active parameters, replace <string> keys with <tf.Variable>
        var_to_bounds = dict()
        for key, bounds in spo_config.get('var_to_bounds', {}).items():
            for var in var_list:
                if key in var.name:  #[!] too permissive, improve me...
                    var_to_bounds[var] = bounds
                    break
        spo_config['var_to_bounds'] = var_to_bounds

        # Initialize/run optimizer
        feed_dict = {**feed_dict, inputs_ref: inputs, outputs_ref: outputs}
        optimizer = ScipyOptimizerInterface(nll, var_list, **spo_config)
        optimizer.minimize(sess, feed_dict)
        return var_list
Ejemplo n.º 2
0
class ScipyADAMOptimizerInterface(ADAMOptimizerInterface):
    def __init__(self,
                 loss,
                 var_list=None,
                 lr=1e-3,
                 clip_val=10.0,
                 iteration=500,
                 **optimizer_kwargs):
        super(ScipyADAMOptimizerInterface,
              self).__init__(loss, var_list, lr, clip_val, iteration,
                             **optimizer_kwargs)
        self.scipy_optimizer = ScipyOptimizerInterface(loss, var_list)

    def minimize(self, sess, feed_dict):
        super(ScipyADAMOptimizerInterface, self).minimize(sess, feed_dict)
        self.scipy_optimizer.minimize(sess, feed_dict)
Ejemplo n.º 3
0
def _optimize_zinb(mu, dropout, theta=None):
    pred, a, b, t = _tf_zinb_zero(mu, theta)
    #loss = tf.reduce_mean(tf.abs(tf_logit(pred) - tf_logit(dropout)))
    loss = tf.losses.log_loss(labels=dropout.astype('float32'),
                              predictions=pred)

    optimizer = ScipyOptimizerInterface(loss, options={'maxiter': 100})

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        optimizer.minimize(sess)
        ret_a = sess.run(a)
        ret_b = sess.run(b)
        if theta is None:
            ret_t = sess.run(t)
        else:
            ret_t = t

    return ret_a, ret_b, ret_t
Ejemplo n.º 4
0
    def fit(self, sess, data, feed_dict, maxiter):
        pred = self.get_pred(data)
        loss, pred_normed, labels_normed = self.get_loss(pred, data['labels'])
        optimizer = ScipyOptimizerInterface(loss, options={'maxiter': maxiter})
        self.losses = []

        def append_loss(loss):
            self.losses.append(loss)

        optimizer.minimize(sess,
                           feed_dict=feed_dict,
                           loss_callback=append_loss,
                           fetches=[loss])
        for name, var in self.vars.items():
            self.vars_evals[name] = sess.run(var)

        self.eval_pred, self.eval_pred_normed, self.eval_label, self.eval_label_normed = sess.run(
            [pred, pred_normed, data['labels'], labels_normed],
            feed_dict=feed_dict)
        self.r2 = stats.linregress(self.eval_pred_normed.flatten(),
                                   self.eval_label_normed.flatten())[2]**2
        self.final_loss = sess.run(loss, feed_dict=feed_dict)
Ejemplo n.º 5
0
def match(query_full,
          d_query,
          query_2d_full,
          scene,
          intr,
          gap,
          tr_ground,
          scale,
          thresh_log_conf=7.5,
          w_3d=0.01,
          fps=3,
          step_samples=100):
    with_y = False  # optimize for y as well
    np.set_printoptions(suppress=True, linewidth=220)

    pjoin = os.path.join

    len_gap = gap[1] - gap[0] + 1
    query, q_v = get_partial_scenelet(query_full,
                                      start=gap[0],
                                      end=gap[1] + 1,
                                      fps=1)
    q_v_sum = np.sum(q_v)
    q_v_sum_inv = np.float32(1. / q_v_sum)
    # lg.debug("q_v_sum: %s/%s" % (q_v_sum, q_v.size))
    # scene_min_y = scene.skeleton.get_min_y(tr_ground)
    # lg.debug("scene_min_y: %s" % repr(scene_min_y))

    mid_frames = range(len_gap * fps,
                       scene.skeleton.poses.shape[0] - len_gap * fps,
                       step_samples)
    if not len(mid_frames):
        return []

    scenelets, sc_v = (np.array(e) for e in zip(*[
        get_partial_scenelet(
            scene, mid_frame_id=mid_frame_id, n_frames=len_gap, fps=fps)
        for mid_frame_id in mid_frames
    ]))
    # for i, (scenelet, sc_v_) in enumerate(zip(scenelets, sc_v)):
    #     mn = np.min(scenelet[sc_v_.astype('b1'), 1, :])
    #     scenelets[i, :, 1, :] -= mn
    # mn = np.min(scenelets[i, sc_v_.astype('b1'), 1, :])
    # scenelets = np.array(scenelets, dtype=np.float32)
    # sc_v = np.array(sc_v, dtype=np.int32)
    # print("sc_v: %s" % sc_v)
    # print("q_v: %s" % q_v)

    lg.debug("have %d/%d 3D poses in scenelet, and %d/%d in query" %
             (np.sum(sc_v), sc_v.shape[0], np.sum(q_v), q_v.shape[0]))

    query_2d = np.zeros((len_gap, 2, 16), dtype=np.float32)
    conf_2d = np.zeros((len_gap, 1, 16), dtype=np.float32)
    for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):

        if query_2d_full.has_pose(frame_id):
            query_2d[lin_id, :, :] = query_2d_full.get_pose(frame_id)[:2, :]
        # else:
        #     lg.warning("Query2d_full does not have pose at %d?" % frame_id)

        # im = im_.copy()
        if query_2d_full.has_confidence(frame_id):
            # print("showing %s" % frame_id)
            for joint, conf in query_2d_full._confidence[frame_id].items():
                log_conf = abs(np.log(conf)) if conf >= 0. else 0.
                # print("conf: %g, log_conf: %g" % (conf, log_conf))
                # if log_conf <= thresh_log_conf:
                #     p2d = scale * query_2d_full.get_joint_3d(joint,
                #                                              frame_id=frame_id)
                #     p2d = (int(round(p2d[0])), int(round(p2d[1])))
                #     cv2.circle(im, center=p2d,
                #                radius=int(round(3)),
                #                color=(1., 1., 1., 0.5), thickness=1)
                conf_2d[lin_id, 0, joint] = max(
                    0., (thresh_log_conf - log_conf) / thresh_log_conf)

            # cv2.imshow('im', im)
            # cv2.waitKey(100)
    # while cv2.waitKey() != 27: pass
    conf_2d /= np.max(conf_2d)

    # scale from Denis' scale to current image size
    query_2d *= scale

    # move to normalized camera coordinates
    query_2d -= intr[:2, 2:3]
    query_2d[:, 0, :] /= intr[0, 0]
    query_2d[:, 1, :] /= intr[1, 1]

    #
    # initialize translation
    #

    # centroid of query poses
    c3d = np.mean(query[q_v.astype('b1'), :, :], axis=(0, 2))
    # estimate scenelet centroids
    sclt_means = np.array([
        np.mean(scenelets[i, sc_v[i, ...].astype('b1'), ...], axis=(0, 2))
        for i in range(scenelets.shape[0])
    ],
                          dtype=np.float32)
    # don't change height
    sclt_means[:, 1] = 0
    scenelets -= sclt_means[:, None, :, None]
    lg.debug("means: %s" % repr(sclt_means.shape))
    if with_y:
        np_translation = np.array([c3d for i in range(scenelets.shape[0])],
                                  dtype=np.float32)
    else:
        np_translation = np.array(
            [c3d[[0, 2]] for i in range(scenelets.shape[0])], dtype=np.float32)
    np_rotation = np.array(
        [np.pi * (i % 2) for i in range(scenelets.shape[0])],
        dtype=np.float32)[:, None]
    n_cands = np_translation.shape[0]
    graph = tf.Graph()
    with graph.as_default(), tf.device('/gpu:0'):
        # 3D translation
        translation_ = tf.Variable(initial_value=np_translation,
                                   name='translation',
                                   dtype=tf.float32)
        t_y = tf.fill(dims=(n_cands, ),
                      value=(tr_ground[1, 3]).astype(np.float32))
        # t_y = tf.fill(dims=(n_cands,), value=np.float32(0.))
        lg.debug("t_y: %s" % t_y)
        if with_y:
            translation = translation_
        else:
            translation = tf.concat(
                (translation_[:, 0:1], t_y[:, None], translation_[:, 1:2]),
                axis=1)

        lg.debug("translation: %s" % translation)
        # 3D rotation (Euler XYZ)
        rotation = tf.Variable(np_rotation, name='rotation', dtype=tf.float32)
        # lg.debug("rotation: %s" % rotation)

        w = tf.Variable(conf_2d, trainable=False, name='w', dtype=tf.float32)

        pos_3d_in = tf.Variable(query,
                                trainable=False,
                                name='pos_3d_in',
                                dtype=tf.float32)
        # pos_3d_in = tf.constant(query, name='pos_3d_in', dtype=tf.float32)

        pos_2d_in = tf.Variable(query_2d,
                                trainable=False,
                                name='pos_2d_in',
                                dtype=tf.float32)
        # pos_2d_in = tf.constant(query_2d, name='pos_2d_in',
        #                         dtype=tf.float32)

        pos_3d_sclt = tf.Variable(scenelets,
                                  trainable=False,
                                  name='pos_3d_sclt',
                                  dtype=tf.float32)
        # print("pos_3d_sclt: %s" % pos_3d_sclt)

        # rotation around y
        my_zeros = tf.zeros((n_cands, 1), dtype=tf.float32, name='my_zeros')
        # tf.add_to_collection('to_init', my_zeros)
        my_ones = tf.ones((n_cands, 1))
        # tf.add_to_collection('to_init', my_ones)
        c = tf.cos(rotation, 'cos')
        # tf.add_to_collection('to_init', c)
        s = tf.sin(rotation, 'sin')
        # t0 = tf.concat([c, my_zeros, -s], axis=1)
        # t1 = tf.concat([my_zeros, my_ones, my_zeros], axis=1)
        # t2 = tf.concat([s, my_zeros, c], axis=1)
        # transform = tf.stack([t0, t1, t2], axis=2, name="transform")
        # print("t: %s" % transform)
        transform = tf.concat(
            [c, my_zeros, -s, my_zeros, my_ones, my_zeros, s, my_zeros, c],
            axis=1)
        transform = tf.reshape(transform, ((-1, 3, 3)), name='transform')
        print("t2: %s" % transform)
        # lg.debug("transform: %s" % transform)

        # transform to 3d
        # pos_3d = tf.matmul(transform, pos_3d_sclt) \
        #          + tf.tile(tf.expand_dims(translation, 2),
        #                    [1, 1, int(pos_3d_in.shape[2])])
        # pos_3d = tf.einsum("bjk,bcjd->bcjd", transform, pos_3d_sclt)
        shp = pos_3d_sclt.get_shape().as_list()
        transform_tiled = tf.tile(transform[:, None, :, :, None],
                                  (1, shp[1], 1, 1, shp[3]))
        # print("transform_tiled: %s" % transform_tiled)
        pos_3d = tf.einsum("abijd,abjd->abid", transform_tiled, pos_3d_sclt)
        # print("pos_3d: %s" % pos_3d)
        pos_3d += translation[:, None, :, None]
        #pos_3d = pos_3d_sclt
        # print("pos_3d: %s" % pos_3d)

        # perspective divide
        # pos_2d = tf.divide(
        #     tf.slice(pos_3d, [0, 0, 0], [n_cands, 2, -1]),
        #     tf.slice(pos_3d, [0, 2, 0], [n_cands, 1, -1]))
        pos_2d = tf.divide(pos_3d[:, :, :2, :], pos_3d[:, :, 2:3, :])

        # print("pos_2d: %s" % pos_2d)

        diff = pos_2d - pos_2d_in
        # mask loss by 2d key-point visibility
        # print("w: %s" % w)
        # w_sum = tf.reduce_sum()
        masked = tf.multiply(diff, w)
        # print(masked)
        # loss_reproj = tf.nn.l2_loss(masked)
        # loss_reproj = tf.reduce_sum(tf.square(masked[:, :, 0, :])
        #                             + tf.square(masked[:, :, 1, :]),
        #                             axis=[1, 2])
        masked_sqr = tf.square(masked[:, :, 0, :]) \
                     + tf.square(masked[:, :, 1, :])
        loss_reproj = tf.reduce_sum(masked_sqr, axis=[1, 2])
        # lg.debug("loss_reproj: %s" % loss_reproj)

        # distance from existing 3D skeletons
        d_3d = q_v_sum_inv * tf.multiply(pos_3d - query[None, ...],
                                         q_v[None, :, None, None],
                                         name='diff_3d')
        # print(d_3d)

        loss_3d = w_3d * tf.reduce_sum(tf.square(d_3d[:, :, 0, :]) + tf.square(
            d_3d[:, :, 1, :]) + tf.square(d_3d[:, :, 2, :]),
                                       axis=[1, 2],
                                       name='loss_3d_each')
        # print(loss_3d)

        loss = tf.reduce_sum(loss_reproj) + tf.reduce_sum(loss_3d)

        # optimize
        optimizer = ScipyOptimizerInterface(loss,
                                            var_list=[translation_, rotation],
                                            options={'gtol': 1e-12})

    with Timer('solve', verbose=True) as t:
        with tf.Session(graph=graph) as session:
            session.run(tf.global_variables_initializer())
            optimizer.minimize(session)
            o_pos_3d, o_pos_2d, o_masked, o_t, o_r, o_w, o_d_3d, \
                o_loss_reproj, o_loss_3d, o_transform, o_translation = \
                session.run([
                    pos_3d, pos_2d, masked, translation, rotation, w,
                    d_3d, loss_reproj, loss_3d, transform, translation])
            o_masked_sqr = session.run(masked_sqr)
        # o_t, o_r = session.run([translation, rotation])
    # print("pos_3d: %s" % o_pos_3d)
    # print("pos_2d: %s" % o_pos_2d)
    # print("o_loss_reproj: %s, o_loss_3d: %s" % (o_loss_reproj, o_loss_3d))
    # print("t: %s" % o_t)
    # print("r: %s" % o_r)
    chosen = sorted((i for i in range(o_loss_reproj.shape[0])),
                    key=lambda i2: o_loss_reproj[i2] + o_loss_3d[i2])
    lg.info("Best candidate is %d with error %g + %g" %
            (chosen[0], o_loss_reproj[chosen[0]], o_loss_3d[chosen[0]]))
    # print("masked: %s" % o_masked)
    # opp = np.zeros_like(o_pos_3d)
    # for i in range(o_pos_3d.shape[0]):
    #     for j in range(o_pos_3d.shape[1]):
    #         for k in range(16):
    #             opp[i, j, :2, k] = o_pos_3d[i, j, :2, k] / o_pos_3d[i, j, 2:3, k]
    #             # opp[i, j, 0, k] *= intr[0, 0]
    #             # opp[i, j, 1, k] *= intr[1, 1]
    #             # opp[i, j, :2, k] *= intr[1, 1]
    #             a = o_pos_2d[i, j, :, k]
    #             b = opp[i, j, :2, k]
    #             if not np.allclose(a, b):
    #                 print("diff: %s, %s" % (a, b))

    o_pos_2d[:, :, 0, :] *= intr[0, 0]
    o_pos_2d[:, :, 1, :] *= intr[1, 1]
    o_pos_2d += intr[:2, 2:3]

    # for cand_id in range(o_pos_2d.shape[0]):
    if False:
        # return
        # print("w: %s" % o_w)
        # print("conf_2d: %s" % conf_2d)
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d[:, 0, :] *= intr[0, 0]
        query_2d[:, 1, :] *= intr[1, 1]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d += intr[:2, 2:3]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])

        ims = {}
        for cand_id in chosen[:5]:
            lg.debug("starting %s" % cand_id)
            pos_ = o_pos_2d[cand_id, ...]
            for lin_id in range(pos_.shape[0]):
                frame_id = gap[0] + lin_id
                try:
                    im = ims[frame_id].copy()
                except KeyError:
                    p_im = pjoin(d_query, 'origjpg',
                                 "color_%05d.jpg" % frame_id)
                    ims[frame_id] = cv2.imread(p_im)
                    im = ims[frame_id].copy()
                # im = im_.copy()
                for jid in range(pos_.shape[-1]):

                    xy2 = int(round(query_2d[lin_id, 0, jid])), \
                          int(round(query_2d[lin_id, 1, jid]))
                    # print("printing %s" % repr(xy))
                    cv2.circle(im,
                               center=xy2,
                               radius=5,
                               color=(10., 200., 10.),
                               thickness=-1)

                    if o_masked[cand_id, lin_id, 0, jid] > 0 \
                       or o_w[lin_id, 0, jid] > 0:
                        xy = int(round(pos_[lin_id, 0, jid])), \
                             int(round(pos_[lin_id, 1, jid]))
                        # print("printing %s" % repr(xy))
                        cv2.circle(im,
                                   center=xy,
                                   radius=3,
                                   color=(200., 10., 10.),
                                   thickness=-1)
                        cv2.putText(im,
                                    "d2d: %g" %
                                    o_masked_sqr[cand_id, lin_id, jid],
                                    org=((xy2[0] - xy[0]) // 2 + xy[0],
                                         (xy2[1] - xy[1]) // 2 + xy[1]),
                                    fontFace=1,
                                    fontScale=1,
                                    color=(0., 0., 0.))
                        cv2.line(im, xy, xy2, color=(0., 0., 0.))
                        d3d = o_d_3d[cand_id, lin_id, :, jid]
                        d3d_norm = np.linalg.norm(d3d)
                        if d3d_norm > 0.:
                            cv2.putText(
                                im,
                                "%g" % d3d_norm,
                                org=((xy2[0] - xy[0]) // 2 + xy[0] + 10,
                                     (xy2[1] - xy[1]) // 2 + xy[1]),
                                fontFace=1,
                                fontScale=1,
                                color=(0., 0., 255.))

                cv2.putText(im,
                            text="%d::%02d" % (cand_id, lin_id),
                            org=(40, 80),
                            fontFace=1,
                            fontScale=2,
                            color=(255., 255., 255.))

                # pos_2d_ = np.matmul(intr, pos_[lin_id, :2, :] / pos_[lin_id, 2:3, :])
                # for p2d in pos_2d_
                cv2.imshow('im', im)
                cv2.waitKey()
            break

        while cv2.waitKey() != 27:
            pass

    out_scenelets = []
    for cand_id in chosen[:1]:
        lg.debug("score of %d is %g + %g = %g" %
                 (cand_id, o_loss_reproj[cand_id], o_loss_3d[cand_id],
                  o_loss_reproj[cand_id] + o_loss_3d[cand_id]))
        scenelet = Scenelet()
        rate = query_full.skeleton.get_rate()
        prev_time = None
        for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):
            time_ = query_full.get_time(frame_id)
            if lin_id and rate is None:
                rate = time_ - prev_time
            if time_ == frame_id:
                time_ = prev_time + rate
            scenelet.skeleton.set_pose(frame_id=frame_id,
                                       pose=o_pos_3d[cand_id, lin_id, :, :],
                                       time=time_)
            prev_time = time_
        tr = np.concatenate((np.concatenate(
            (o_transform[cand_id, ...], o_translation[cand_id, None, :].T),
            axis=1), [[0., 0., 0., 1.]]),
                            axis=0)
        tr_m = np.concatenate(
            (np.concatenate((np.identity(3), -sclt_means[cand_id, None, :].T),
                            axis=1), [[0., 0., 0., 1.]]),
            axis=0)
        tr = np.matmul(tr, tr_m)
        for oid, ob in scene.objects.items():
            if ob.label in ('wall', 'floor'):
                continue
            ob2 = copy.deepcopy(ob)
            ob2.apply_transform(tr)
            scenelet.add_object(obj_id=oid, scene_obj=ob2, clone=False)
        scenelet.name_scene = scene.name_scene
        out_scenelets.append((o_loss_reproj[cand_id], scenelet))
    return out_scenelets
Ejemplo n.º 6
0
class TRPO():
    def __init__(self, env, gamma=1, lr=0.01, num_episodes=1000, num_steps=200, KL_delta=10 ** (-4)):

        #self.env = ArmEnv(size_x=4, size_y=3, cubes_cnt=4, episode_max_length=2000, finish_reward=200,
        #                  action_minus_reward=0.0, tower_target_size=3)
        self.env = env
        self.gamma = gamma
        self.lr = lr
        self.num_episodes = num_episodes
        self.num_steps = num_steps
        self.KL_delta = KL_delta
        self.success_counter = 0
        self.build_graph()
        self.obs_len = len(self.env.reset())
        self.trajectory = []
        self.total_rew = []
        self.disc = 0
        self.log_file = open('logs_' + str(time.time()) + '.txt', 'w+')

        self.sess = tf.Session()

    def build_graph(self):
        tf.reset_default_graph()
        self.state = tf.placeholder('float32', shape=[None, len(self.env.reset())], name="STATE")
        self.actions = tf.squeeze(tf.placeholder('int32', name="ACTIONS"))
        self.q_estimation = tf.placeholder('float32', name="Q-EST")
        self.build_actor()
        self.build_critic()
        self.build_trpo_tf()
        #self.build_trpo_SOI()


    '''
    ============================
    Actor = Policy Approxiamtion
    ============================
    '''
    def build_actor(self):
        self.inp = tf.layers.dense(
            self.state,
            10,
            name="ACTOR_INPUT",
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.out = tf.layers.dense(
            self.inp,
            self.env.action_space.n,
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.soft_out = tf.nn.softmax(self.out)
        nl = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.out, labels=self.actions)
        wnl = tf.multiply(nl, self.q_estimation)
        self.loss = tf.reduce_mean(wnl)
        self.opt = tf.train.AdamOptimizer(learning_rate=0.001).minimize(self.loss)

    '''
    ======================================
    Critic = Approximation of Q-function
    ======================================
    '''
    def build_critic(self):
        self.q_return = tf.placeholder('float32', name="Q-Return")  # sum of rewards on rest of traj
        self.q_inp = tf.layers.dense(
            tf.concat(self.state, self.actions),
            10,
            name="Q-input",
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.q_out = tf.layers.dense(
            self.q_inp,
            1,
            name="Q-output",
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.1),
            bias_initializer=tf.initializers.constant(0)
        )

        self.q_loss = tf.losses.mean_squared_error(self.q_out, self.q_return)
        self.q_opt = tf.train.AdamOptimizer(0.01).minimize(self.q_loss)

    def build_trpo_SOI(self):
        # I use index _k to fix variables in graph
        # Fixed probs on k-th iteration
        self.soft_out_k = tf.placeholder('float32', name="SOFTOUT_K")

        # Fixed advantage on k-th iteration
        self.A_k = tf.placeholder('float32', name="A_K")

        # Number of steps to estimate expectation
        self.N = tf.placeholder('float32', name="number")

        # Advantage function = emperical_return - baseline
        self.A = self.q_return - self.q_out

        # Choosing particular action "actions" and multiply by A_k
        #here was a mistake -> A instead of A_k
        trpo_obj = -tf.reduce_mean(self.A_k * tf.gather(tf.exp(self.soft_out - self.soft_out_k), self.actions))

        # KL(soft_out_k, soft_out) should be less than KL_delta
        constraints = [(-self.kl(self.soft_out_k, self.soft_out) + self.KL_delta)]

        # Use ScipyOptimiztationInterface (SOI) to solve optimization task with constrains
        self.trpo_opt = SOI(trpo_obj,
                            method='SLSQP',
                            inequalities=constraints,
                            options={'maxiter': 3})

    def apply_trpo_SOI(self, s, a, q_app, soft, r, adv):

        #Use trajectory s -> a -> r to optimize policy in Trust-Region interval
        feed_dict = [[self.state, [s]],
                     [self.soft_out_k, [soft]],
                     [self.actions, [a]],
                     [self.q_return, [r]],
                     [self.q_out, q_app],
                     [self.A_k, adv]]

        self.trpo_opt.minimize(self.sess, feed_dict=feed_dict)



    def build_trpo_tf(self):

        self.beta = tf.placeholder('float32')
        self.eta = tf.placeholder('float32')
        self.learn_rate = tf.placeholder('float32')
        self.learn_rate_value = 0.001

        self.soft_out_k = tf.placeholder('float32', name="SOFTOUT_K")
        # Fixed advantage on k-th iteration
        self.A_k = tf.placeholder('float32', name="A_K")
        self.A = self.q_return - self.q_out #?
        self.D_KL = self.kl(self.soft_out, self.soft_out_k)

        trpo_loss_1 = -tf.reduce_mean(self.A_k * tf.exp(self.soft_out - self.soft_out_k))
        trpo_loss_2 = self.beta * self.D_KL
        trpo_loss_3 = self.eta * tf.square(tf.maximum(0.0, self.KL_delta - 2 * self.D_KL))

        trpo_total_loss = trpo_loss_1 + trpo_loss_2 + trpo_loss_3
        self.trpo_opt = tf.train.AdamOptimizer(self.learn_rate).minimize(trpo_total_loss)



    def apply_trpo_tf(self, old_policy, advantage, state, actions, num_steps):
        beta = 0.5
        eta = 0.5
        DKL = 0.01
        for i in range(num_steps):
            if DKL > 2 * self.KL_delta:
                beta *= 1.5
                if beta > 30:
                    self.learn_rate_value /= 1.5
            elif DKL < 0.5 * self.KL_delta:
                beta /= 1.05
                if beta < 1./30:
                    self.learn_rate_value *= 1.5

            _, DKL = self.sess.run([self.trpo_opt, self.D_KL], feed_dict={self.A_k: advantage,
                                                          self.soft_out_k: old_policy,
                                                          self.actions: actions,
                                                          self.state: [state],
                                                          self.beta: beta,
                                                          self.eta: eta,
                                                          self.learn_rate: self.learn_rate_value})


    def roll_trajectory(self, episode):
        s = self.env.reset()
        self.trajectory = []
        self.total_rew = []

        for step in range(self.num_steps):
            output = self.sess.run([self.soft_out], feed_dict={self.state: [s]})
            probs = output[0][0]
            if not self.learn_flag:
                a = np.random.choice(self.env.action_space.n,
                                     p=[1. / self.env.action_space.n for _ in range(self.env.action_space.n)])
                #print("probs: ", probs, self.learn_flag, "action: ", a)
                self.log_file.write('probs: ' + str(probs) + '\n')
            else:
                a = np.random.choice(self.env.action_space.n, p=probs)
                #print("probs: ", probs, self.learn_flag, "action: ", a)
                self.log_file.write('probs: ' + str(probs) + '\n')
            new_state, reward, done, _ = self.env.step(a)
            self.total_rew.append(reward)
            self.trajectory.append((s, a, reward))

            if done:
                if reward != 0:
                    self.env.render()
                    self.learn_flag = True
                    print(reward)
                    self.success_counter += 1
                return
            s = new_state

        print('====================== end of episode {} ======================'.format(episode))

    def learn(self):
        self.learn_flag = False
        with self.sess:
            self.sess.run(tf.global_variables_initializer())

            # Calculate metrics
            self.successes = []
            self.success_episodes = []
            self.slice = 10
            self.success_counter = 0

            for episode in range(self.num_episodes):
                self.roll_trajectory(episode)
                disc = self.discount_and_norm_rewards(self.total_rew, self.gamma)
                for n, st in enumerate(self.trajectory):
                    traj_state = st[0]
                    traj_action = int(st[1])
                    traj_reward = disc[n]

                    #Learning Critic
                    q_approximated, _ = self.sess.run([self.q_out, self.q_opt],
                                                      feed_dict={self.state: [traj_state],
                                                                 self.actions: [traj_action],
                                                                 self.q_return: traj_reward}
                                                      )

                    #Learning Actor
                    _, soft, adv = self.sess.run([self.opt, self.soft_out, self.A],
                                            feed_dict={self.state: [traj_state],
                                                       self.actions: [traj_action],
                                                       self.q_estimation: q_approximated,
                                                       self.q_return: traj_reward}
                                            )

                    #Optimization Actor-Parameters
                    #self.apply_trpo_SOI(traj_state, traj_action, q_approximated, soft, traj_reward, adv)
                    if episode > 500:
                        self.apply_trpo_tf(soft, adv, traj_state, traj_action, 20)

                if episode % self.slice == 0:
                    print("Episode: ", episode)
                    print("Successes to all: ", self.success_counter / self.slice)
                    self.success_episodes.append(episode)
                    self.successes.append(self.success_counter / self.slice)
                    self.success_counter = 0

            plt.plot(self.success_episodes, self.successes)
            plt.show()
            self.log_file.close()
            print(self.successes)

    @staticmethod
    def kl(p, q):
        return tf.reduce_sum(tf.multiply(p, tf.log(p / q)))

    @staticmethod
    def kl_num(p, q):
        return np.sum(np.multiply(p, np.log(p/q)))

    @staticmethod
    def to_cat(a, n):
        return np.array([1 if a == i else 0 for i in range(n)])

    @staticmethod
    def discount_and_norm_rewards(episode_rewards, gamma):
        discounted_episode_rewards = np.zeros_like(episode_rewards)
        cumulative = 0
        for t in reversed(range(len(episode_rewards))):
            cumulative = cumulative * gamma + episode_rewards[t]
            discounted_episode_rewards[t] = cumulative
        return discounted_episode_rewards
Ejemplo n.º 7
0
class PINN:

    def __init__(
            self,
            layers: List[int],
            lower_bound: np.array,
            upper_bound: np.array,
            dtype=tf.float32,
            regularization_param=1.0):

        self.layers = layers
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.dtype = dtype
        self.regularization_param = regularization_param

        self._build_net()

    def cleanup(self):
        self.sess.close()

        del self.sess

    def _build_net(self):

        with tf.Graph().as_default() as g:

            self._init_variables()

            self._init_placeholders()

            self.U_hat = self.__NN(self.X)
            self.loss_U = self._get_loss(self.U, self.U_hat)
            self.loss_dU = self._get_loss_du()

            self.loss = self.loss_U + self.regularization_param * self.loss_dU

            self.optimizer_BFGS = ScipyOptimizerInterface(
                self.loss,
                method='L-BFGS-B',
                options={'maxiter': 50000,
                         'maxfun': 50000,
                         'maxcor': 50,
                         'maxls': 50,
                         'ftol': 1.0 * np.finfo(float).eps,
                         'gtol': 1.0 * np.finfo(float).eps})

            init = tf.global_variables_initializer()

            self.sess = tf.Session(graph=g)

        self.sess.run(init)
        self.sess.graph.finalize()

    def _init_variables(self):
        self.weights, self.biases = self.__init_NN(self.layers)

    def _init_placeholders(self):
        self.X = tf.placeholder(self.dtype, shape=[None, self.layers[0]])
        self.U = tf.placeholder(self.dtype, shape=[None, self.layers[-1]])

    def _get_loss(self, U, U_hat):
        return tf.reduce_mean(tf.square(U - U_hat))

    def _get_loss_du(self):
        return 0.0

    def __init_NN(self, layers):
        weights = []
        biases = []
        num_layers = len(layers)
        for l in range(0, num_layers-1):
            W = self.__xavier_init(size=[layers[l], layers[l+1]])
            b = tf.Variable(
                tf.zeros([1, layers[l+1]], dtype=self.dtype), dtype=self.dtype)
            weights.append(W)
            biases.append(b)
        return weights, biases

    def __xavier_init(self, size):
        in_dim = size[0]
        out_dim = size[1]
        xavier_stddev = np.sqrt(2/(in_dim + out_dim))

        return tf.Variable(tf.truncated_normal(
            [in_dim, out_dim],
            stddev=xavier_stddev), dtype=self.dtype)

    def __NN(self, X):
        Z = 2.0*(X - self.lower_bound) / \
            (self.upper_bound - self.lower_bound) - 1.0

        for l in range(len(self.weights)-1):
            W = self.weights[l]
            b = self.biases[l]
            Z = tf.tanh(tf.add(tf.matmul(Z, W), b))

        W = self.weights[-1]
        b = self.biases[-1]
        U = tf.add(tf.matmul(Z, W), b)

        return U

    def train_BFGS(self, X, U):
        self.optimizer_BFGS.minimize(self.sess, {self.X: X, self.U: U})

    def predict(self, X):
        return self.sess.run(self.U_hat, {self.X: X})
Ejemplo n.º 8
0
def optimize_path(skel_ours,
                  skel_ours_2d,
                  images,
                  intrinsics,
                  path_skel,
                  ground_rot,
                  shape_orig=None,
                  use_huber=False,
                  weight_smooth=0.01,
                  show=False,
                  frames_ignore=None,
                  resample=True,
                  depth_init=10.,
                  p_constraints=None,
                  smooth_mode=SmoothMode.ACCEL):
    """Optimize 3D path so that it matches the 2D corresponding observations.

    Args:
        skel_ours (Skeleton):
            3D skeleton from LFD.
        skel_ours_2d (Skeleton):
            2D feature points from LFD.
        images (dict):
            Color images for debug, keyed by frame_ids.
        camera_name (str):
            Initialize intrinsics matrix based on name of camera.
        path_skel (str):
            Path of input file from LFD on disk, used to create paths for
            intermediate result.
        shape_orig (tuple):
            Height and width of original images before LFD scaled them.
        use_huber (bool):
            Deprecated.
        weight_smooth (float):
            Smoothness term weight.
        winsorize_limit (float):
            Outlier detection parameter.
        show (bool):
            Show debug visualizations.
        frames_ignore (set):
            Deprecated.
        resample (bool):
            Fill in missing poses by interpolating using Blender's IK.
        depth_init (float):
            Initial depth for LFD poses.
        p_constraints (str):
            Path to 3D constraints scenelet file.
        smooth_mode (SmoothMode):
            Smooth velocity or acceleration.
    """

    # scale 2D detections to canonical camera coordinates
    np_poses_2d = \
        skel_ours_2d.poses[:, :2, :] \
        - np.expand_dims(intrinsics[:2, 2], axis=1)
    np_poses_2d[:, 0, :] /= intrinsics[0, 0]
    np_poses_2d[:, 1, :] /= intrinsics[1, 1]

    n_frames = skel_ours.poses.shape[0]
    np_translation = np.zeros(shape=(n_frames, 3), dtype=np.float32)
    np_translation[:, 1] = -1.
    np_translation[:, 2] = \
        np.random.uniform(-depth_init * 0.25, depth_init * 0.25,
                          np_translation.shape[0]) \
        + depth_init
    np_rotation = np.zeros(shape=(n_frames, 3), dtype=np.float32)

    frame_ids = np.array(skel_ours.get_frames(), dtype=np.float32)
    np_visibility = skel_ours_2d.get_confidence_matrix(frame_ids=frame_ids,
                                                       dtype='f4')

    if p_constraints is not None:
        sclt_cnstr = Scenelet.load(p_constraints)
        np_cnstr_mask = np.zeros(shape=(len(frame_ids),
                                        Joint.get_num_joints()),
                                 dtype=np.float32)
        np_cnstr = np.zeros(shape=(len(frame_ids), 3, Joint.get_num_joints()),
                            dtype=np.float32)
        for frame_id, confs in sclt_cnstr.confidence.items():
            lin_id = None
            for j, conf in confs.items():
                if conf > 0.5:
                    if lin_id is None:
                        lin_id = next(
                            lin_id_
                            for lin_id_, frame_id_ in enumerate(frame_ids)
                            if frame_id_ == frame_id)
                    np_cnstr_mask[lin_id, j] = conf
                    np_cnstr[lin_id, :, j] = \
                        sclt_cnstr.skeleton.get_joint_3d(
                          joint_id=j, frame_id=frame_id)
    else:
        np_cnstr_mask = None
        np_cnstr = None

    spans = skel_ours.get_actor_empty_frames()
    dt = frame_ids[1:].astype(np.float32) \
         - frame_ids[:-1].astype(np.float32)
    dt_pos_inv = np.reciprocal(dt, dtype=np.float32)
    dt_vel_inv = np.divide(np.float32(2.), dt[1:] + dt[:-1])
    # ensure smoothness weight multipliers are not affected by
    # actor-transitions
    if skel_ours.n_actors > 1 and len(spans):
        for lin_id in range(len(dt)):
            frame_id0 = frame_ids[lin_id]
            frame_id1 = frame_ids[lin_id + 1]
            span = next((span_ for span_ in spans if span_[0] == frame_id0),
                        None)
            if span is not None:
                assert frame_id1 == span[1], "No"
                dt[lin_id] = 0.
                dt_pos_inv[lin_id] = 0.
                dt_vel_inv[lin_id] = 0.
                dt_vel_inv[lin_id - 1] = 1. / dt[lin_id - 1]

    forwards = np.array([
        skel_ours.get_forward(frame_id, estimate_ok=True, k=0)
        for frame_id in skel_ours.get_frames()
    ])
    # from alignment import get_angle
    # xs = np.hstack((
    # np.ones(shape=(len(forwards), 1)),
    # np.zeros(shape=(len(forwards), 2))
    # ))
    # print(xs.shape)
    print(forwards.shape)
    unit_x = np.array((1., 0., 0.))
    np_angles = [-np.arctan2(forward[2], forward[0]) for forward in forwards]
    print(forwards, np_angles)
    # ank_diff = \
    #     np.exp(
    #        -2. * np.max(
    #           [
    #               np.linalg.norm(
    #                  (skel_ours.poses[1:, :, joint]
    #                   - skel_ours.poses[:-1, :, joint]).T
    #                  * dt_pos_inv, axis=0
    #               ).astype(np.float32)
    #               for joint in {Joint.LANK, Joint.RANK}
    #           ],
    #           axis=0
    #        )
    #     )
    # assert ank_diff.shape == (skel_ours.poses.shape[0]-1,), \
    #     "Wrong shape: %s" % repr(ank_diff.shape)

    # cam_angle = [np.deg2rad(-8.)]
    assert np.isclose(ground_rot[1], 0.) and np.isclose(ground_rot[2], 0.), \
        "Assumed only x rotation"
    # assert ground_rot[0] <= 0, "Negative means looking down, why looknig up?"
    cam_angle = [np.deg2rad(ground_rot[0])]
    # assert False, "Fixed angle!"
    device_name = '/gpu:0' if tf.test.is_gpu_available() else '/cpu:0'
    devices = {device_name}
    for device in devices:
        with Timer(device, verbose=True):
            graph = tf.Graph()
            with graph.as_default(), tf.device(device):
                tf_visibility = tf.Variable(np.tile(np_visibility, (1, 2, 1)),
                                            name='visibility',
                                            trainable=False,
                                            dtype=tf.float32)
                tf_dt_pos_inv = \
                    tf.Variable(np.tile(dt_pos_inv, (1, 3)).reshape(-1, 3),
                                name='dt_pos_inv', trainable=False,
                                dtype=tf.float32)
                tf_dt_vel_inv = \
                    tf.constant(np.tile(dt_vel_inv, (1, 3)).reshape(-1, 3),
                                name='dt_vel_inv', dtype=tf.float32)

                # input data
                pos_3d_in = tf.Variable(skel_ours.poses.astype(np.float32),
                                        trainable=False,
                                        name='pos_3d_in',
                                        dtype=tf.float32)
                pos_2d_in = tf.Variable(np_poses_2d.astype(np.float32),
                                        trainable=False,
                                        name='pos_2d_in',
                                        dtype=tf.float32)

                params_camera = tf.Variable(initial_value=cam_angle,
                                            dtype=tf.float32,
                                            trainable=True)

                cam_sn = tf.sin(params_camera)
                cam_cs = tf.cos(params_camera)
                transform_camera = tf.reshape(tf.stack([
                    1., 0., 0., 0., 0., cam_cs[0], cam_sn[0], 0., 0.,
                    -cam_sn[0], cam_cs[0], 0., 0., 0., 0., 1.
                ],
                                                       axis=0),
                                              shape=(4, 4))

                # 3D translation
                translation = tf.Variable(np_translation, name='translation')
                # 3D rotation (Euler XYZ)
                rotation = tf.Variable(np_rotation, name='rotation')
                fw_angles = tf.Variable(np_angles, name='angles')

                # rotation around y
                my_zeros = tf.zeros((n_frames, 1))
                my_ones = tf.ones((n_frames, 1))
                c = tf.cos(tf.slice(rotation, [0, 1], [n_frames, 1]))
                s = tf.sin(tf.slice(rotation, [0, 1], [n_frames, 1]))
                t0 = tf.concat([c, my_zeros, -s, my_zeros], axis=1)
                t1 = tf.concat([my_zeros, my_ones, my_zeros, my_zeros], axis=1)
                t2 = tf.concat([s, my_zeros, c, my_zeros], axis=1)
                t3 = tf.concat([my_zeros, my_zeros, my_zeros, my_ones], axis=1)
                transform = tf.stack([t0, t1, t2, t3],
                                     axis=2,
                                     name="transform")

                transform = tf.einsum('ij,ajk->aik', transform_camera,
                                      transform)[:, :3, :3]

                # transform to 3d
                pos_3d = tf.matmul(transform, pos_3d_in) \
                    + tf.tile(tf.expand_dims(translation, 2),
                              [1, 1, int(pos_3d_in.shape[2])])

                # constraints
                loss_cnstr = None
                if np_cnstr is not None:
                    constraints = tf.Variable(np_cnstr,
                                              trainable=False,
                                              name='constraints',
                                              dtype=tf.float32)
                    constraints_mask = tf.Variable(np_cnstr_mask,
                                                   trainable=False,
                                                   name='constraints_mask',
                                                   dtype=tf.float32)
                    cnstr_diff = tf.reduce_sum(tf.squared_difference(
                        pos_3d, constraints),
                                               axis=1,
                                               name='constraints_difference')
                    cnstr_diff_masked = tf.multiply(
                        constraints_mask,
                        cnstr_diff,
                        name='constraints_difference_masked')
                    loss_cnstr = tf.reduce_sum(cnstr_diff_masked,
                                               name='constraints_loss')

                # perspective divide
                pos_2d = tf.divide(
                    tf.slice(pos_3d, [0, 0, 0], [n_frames, 2, -1]),
                    tf.slice(pos_3d, [0, 2, 0], [n_frames, 1, -1]))

                if use_huber:
                    diff = huber_loss(pos_2d_in, pos_2d, 1.)
                    masked = diff * tf_visibility
                    loss_reproj = tf.nn.l2_loss(masked)
                    lg.info("Doing huber on reprojection, NOT translation")
                else:
                    # re-projection loss
                    diff = pos_2d - pos_2d_in
                    # mask loss by 2d key-point visibility
                    masked = diff * tf_visibility
                    loss_reproj = tf.nn.l2_loss(masked)
                    lg.info("NOT doing huber")

                sys.stderr.write(
                    "TODO: Move huber to translation, not reconstruction\n")

                # translation smoothness
                dx = tf.multiply(
                    x=0.5,
                    y=tf.add(
                        pos_3d[1:, :, Joint.LHIP] - pos_3d[:-1, :, Joint.LHIP],
                        pos_3d[1:, :, Joint.RHIP] - pos_3d[:-1, :, Joint.RHIP],
                    ),
                    name="average_hip_displacement_3d")
                tf_velocity = tf.multiply(dx, tf_dt_pos_inv)

                tf_acceleration_z = tf.multiply(x=dx[1:, 2:3] - dx[:-1, 2:3],
                                                y=tf_dt_vel_inv[:, 2:3],
                                                name="acceleration_z")

                if smooth_mode == SmoothMode.VELOCITY:
                    # if GT, use full smoothness to fix 2-frame flicker
                    if np_cnstr is not None:
                        print('Smoothing all velocity!')
                        loss_transl_smooth = \
                            weight_smooth * tf.nn.l2_loss(tf_velocity)
                    else:  # Normal mode, don't oversmooth screen-space
                        loss_transl_smooth = \
                            weight_smooth * tf.nn.l2_loss(tf_velocity[:, 2:3])
                elif smooth_mode == SmoothMode.ACCEL:
                    loss_transl_smooth = \
                        weight_smooth * tf.nn.l2_loss(tf_acceleration_z)
                else:
                    raise RuntimeError(
                        'Unknown smooth mode: {}'.format(smooth_mode))

                if show:
                    sqr_accel_z = weight_smooth * tf.square(tf_acceleration_z)

                if weight_smooth > 0.:
                    lg.info("Smoothing in time!")
                    loss = loss_reproj + loss_transl_smooth
                else:
                    lg.warning("Not smoothing!")
                    loss = loss_reproj

                if loss_cnstr is not None:
                    loss += 1000 * loss_cnstr

                # hip0 = tf.nn.l2_normalize(pos_3d[:-1, :, Joint.RHIP] - pos_3d[:-1, :, Joint.LHIP])
                # hip1 = tf.nn.l2_normalize(pos_3d[1:, :, Joint.RHIP] - pos_3d[1:, :, Joint.RHIP])
                # dots = tf.reduce_sum(tf.multiply(hip0, hip1), axis=1)
                # print(dots)
                # loss_dot = tf.nn.l2_loss(1. - dots)
                # loss_ang = fw_angles + rotation[:, 1]
                # print(loss_ang)
                # loss_ang = tf.square(loss_ang[1:] - loss_ang[:-1])
                # print(loss_ang)
                # two_pi_sqr = tf.constant((2. * 3.14159)**2., dtype=tf.float32)
                # print(two_pi_sqr)
                # loss_ang = tf.reduce_mean(tf.where(loss_ang > two_pi_sqr, loss_ang - two_pi_sqr, loss_ang))
                # print(loss_ang)
                # loss += loss_ang

                #
                # optimize
                #
                optimizer = ScipyOptimizerInterface(
                    loss,
                    var_list=[translation, rotation],
                    options={'gtol': 1e-12},
                    var_to_bounds={rotation: (-np.pi / 2., np.pi / 2.)})

            with tf.Session(graph=graph) as session:
                session.run(tf.global_variables_initializer())

                optimizer.minimize(session)
                np_pos_3d_out, np_pos_2d_out, np_transl_out, np_masked, \
                np_acceleration, np_loss_transl_smooth, np_dt_vel = \
                    session.run([pos_3d, pos_2d, translation, masked,
                                 tf_acceleration_z, loss_transl_smooth,
                                 tf_dt_vel_inv])
                if show:
                    o_sqr_accel_z = session.run(sqr_accel_z)
                o_vel = session.run(tf_velocity)
                o_dx = session.run(dx)
                o_rot = session.run(rotation)
                # o_dx, o_dx2 = session.run([accel_bak, acceleration2])
                # assert np.allclose(o_dx, o_dx2), "no"
                o_cam = session.run(fetches=[params_camera])
                print("camera angle: %s" % np.rad2deg(o_cam[0]))
                # o_losses = session.run([loss_reproj, loss_transl_smooth, loss_dot, loss_ang])
                o_losses = session.run([loss_reproj, loss_transl_smooth])
                print('losses: {}'.format(o_losses))
                # o_dots = session.run(dots)
                # with open('tmp/dots.txt', 'w') as fout:
                #     fout.write('\n'.join((str(e) for e in o_dots.tolist())))

    fixed_frames = []
    # for lin_frame_id in range(np_transl_out.shape[0]):
    #     if np_transl_out[lin_frame_id, 2] < 0.:
    #         print("Correcting frame_id %d: %s"
    #               % (skel_ours.get_lin_id_for_frame_id(lin_frame_id),
    #                  np_transl_out[lin_frame_id, :]))
    #         if lin_frame_id > 0:
    #             np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id-1, :]
    #         else:
    #             np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id+1, :]
    #         fixed_frames.append(lin_frame_id)

    # debug_forwards(skel_ours.poses, np_pos_3d_out, o_rot, forwards, np_angles)

    # z_jumps = np_pos_3d_out[1:, 2, Joint.PELV] - np_pos_3d_out[:-1, 2, Joint.PELV]
    # out = scipy.stats.mstats.winsorize(z_jumps, limits=1.)
    # plt.figure()
    # plt.plot(pos_3d[:, 2, Joint.PELV])
    # plt.show()
    # sys.exit(0)
    # diff = np.linalg.norm(out - displ, axis=1)
    if len(fixed_frames):
        print("Re-optimizing...")
        with tf.Session(graph=graph) as session:
            np_pos_3d_out, np_pos_2d_out, np_transl_out = \
                session.run(fetches=[pos_3d, pos_2d, translation],
                            feed_dict={transform: np_transl_out})

    if show:
        lim_fr = [105, 115, 135]
        fig = plt.figure()
        accel_thr = 0.  # np.percentile(o_sqr_accel_z, 25)

        ax = plt.subplot2grid((2, 2), (0, 0), colspan=2)
        # print("np_masked:%s" % np_masked)
        # plt.plot(np_masked[:, )
        ax.plot(np.linalg.norm(np_acceleration[lim_fr[0]:lim_fr[1]], axis=1),
                '--o',
                label='accel')
        ax.add_artist(Line2D([0, len(o_sqr_accel_z)], [accel_thr, accel_thr]))
        # plt.plot(np_dt_vel[:, 0], label='dt velocity')
        # plt.plot(np.linalg.norm(np_f_accel, axis=1), '--x', label='f_accel')
        # plt.plot(ank_diff, label='ank_diff')
        ax.plot(o_sqr_accel_z[lim_fr[0]:lim_fr[1] + 1],
                '--x',
                label='loss accel_z')
        ax.legend()

        ax2 = plt.subplot2grid((2, 2), (1, 0), aspect='equal')
        ax2.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV],
                 np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x')
        for i, vel in enumerate(o_vel):
            if not (lim_fr[0] <= i <= lim_fr[1]):
                continue

            p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV]
            p1 = np_pos_3d_out[i, [0, 2], Joint.PELV]
            ax2.annotate(
                "%f = ((%g - %g) + (%g - %g)) * %g = %g" %
                (vel[2], np_pos_3d_out[i + 1, 2, Joint.LHIP],
                 np_pos_3d_out[i, 2, Joint.LHIP], np_pos_3d_out[i + 1, 2,
                                                                Joint.RHIP],
                 np_pos_3d_out[i, 2, Joint.RHIP], np_dt_vel[i, 2], o_dx[i, 2]),
                xy=((p0[0] + p1[0]) / 2., (p0[1] + p1[1]) / 2.))
        ax2.set_title('velocities')

        ax1 = plt.subplot2grid((2, 2), (1, 1), aspect='equal')
        ax1.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV],
                 np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x')
        for i, lacc in enumerate(o_sqr_accel_z):
            if not (lim_fr[0] <= i <= lim_fr[1]):
                continue
            if lacc > accel_thr:
                p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV]
                ax1.annotate("%.3f" % np_acceleration[i], xy=(p0[0], p0[1]))
                ax.annotate("%.3f" % np.log10(lacc),
                            xy=(i - lim_fr[0], abs(np_acceleration[i])))
        ax1.set_title('accelerations')

        plt.show()

    np.set_printoptions(linewidth=200)
    np_pos_2d_out[:, 0, :] *= intrinsics[0, 0]
    np_pos_2d_out[:, 1, :] *= intrinsics[1, 1]
    np_pos_2d_out[:, 0, :] += intrinsics[0, 2]
    np_pos_2d_out[:, 1, :] += intrinsics[1, 2]

    np_poses_2d[:, 0, :] *= intrinsics[0, 0]
    np_poses_2d[:, 1, :] *= intrinsics[1, 1]
    np_poses_2d[:, 0, :] += intrinsics[0, 2]
    np_poses_2d[:, 1, :] += intrinsics[1, 2]

    out_images = {}
    if shape_orig is not None:
        frames_2d = skel_ours_2d.get_frames()
        for frame_id2 in frames_2d:
            try:
                lin_frame_id = skel_ours_2d.get_lin_id_for_frame_id(frame_id2)
            except KeyError:
                lin_frame_id = None
            frame_id = skel_ours_2d.mod_frame_id(frame_id=frame_id2)

            im = None
            if frame_id in out_images:
                im = out_images[frame_id]
            elif len(images):
                if frame_id not in images:
                    lg.warning("Not enough images, the video was probably cut "
                               "after LiftingFromTheDeep was run.")
                    continue
                im = copy.deepcopy(images[frame_id])
                im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
            else:
                im = np.zeros(
                    (shape_orig[0].astype(int), shape_orig[1].astype(int), 3),
                    dtype='i1')
            if lin_frame_id is not None:
                for jid in range(np_pos_2d_out.shape[2]):
                    if skel_ours_2d.is_visible(frame_id2, jid):
                        p2d = tuple(np_pos_2d_out[lin_frame_id, :,
                                                  jid].astype(int).tolist())
                        p2d_det = tuple(np_poses_2d[lin_frame_id, :,
                                                    jid].astype(int).tolist())
                        cv2.line(im,
                                 p2d,
                                 p2d_det,
                                 color=(100, 100, 100),
                                 thickness=3)
                        cv2.circle(im,
                                   p2d,
                                   radius=3,
                                   color=(0, 0, 200),
                                   thickness=-1)
                        cv2.circle(im,
                                   p2d_det,
                                   radius=3,
                                   color=(0, 200, 0),
                                   thickness=-1)
            out_images[frame_id] = im
            # cv2.imshow("Out", im)
            # cv2.waitKey(50)

        if False:
            # visualize
            fig = plt.figure()
            ax = fig.gca(projection='3d')
            for frame_id in range(0, np_pos_3d_out.shape[0], 1):
                j = Joint.PELV
                ax.scatter(np_pos_3d_out[frame_id, 0, j],
                           np_pos_3d_out[frame_id, 2, j],
                           -np_pos_3d_out[frame_id, 1, j],
                           marker='o')
            # smallest = np_pos_3d_out.min()
            # largest = np_pos_3d_out.max()
            ax.set_xlim3d(-5., 5.)
            ax.set_xlabel('x')
            ax.set_ylim3d(-5., 5.)
            ax.set_ylabel('y')
            ax.set_zlim3d(-5., 5.)
            ax.set_zlabel('z')

    if False:
        # visualize
        fig = plt.figure()
        ax = fig.gca(projection='3d')
        for frame_id in range(0, np_pos_3d_out.shape[0], 1):
            for j in range(np_pos_3d_out.shape[2]):
                ax.scatter(np_pos_3d_out[frame_id, 0, j],
                           np_pos_3d_out[frame_id, 2, j],
                           -np_pos_3d_out[frame_id, 1, j],
                           marker='o')
        # smallest = np_pos_3d_out.min()
        # largest = np_pos_3d_out.max()
        ax.set_xlim3d(-5., 5.)
        ax.set_xlabel('x')
        ax.set_ylim3d(-5., 5.)
        ax.set_ylabel('y')
        ax.set_zlim3d(-5., 5.)
        ax.set_zlabel('z')
    plt.show()

    assert all(a == b
               for a, b in zip(skel_ours.poses.shape, np_pos_3d_out.shape)), \
        "no"
    skel_ours.poses = np_pos_3d_out
    return skel_ours, out_images, intrinsics
Ejemplo n.º 9
0
                    #Learning Actor
                    _, soft, adv = sess.run(
                        [opt, soft_out, A],
                        feed_dict={
                            state: [ss],
                            actions: [aa],
                            q_estimation: q_approximated,
                            q_return: qq
                        })
                    #Skip 10 episodes before TRPO to avoid noise in estimting advantge function
                    if episode > 10:
                        feed_dict = [[state, [ss]], [soft_out_k, [soft]],
                                     [A_k, [adv]], [actions, [aa]],
                                     [N, [n + episode + 1]]]
                        #Calling optimizer
                        trpo_opt.minimize(sess, feed_dict=feed_dict)
                        print(soft)
                        print("adv: ", adv, " = ", qq, " - ", q_approximated)

                if episode % slice == 0:
                    print("Episode: ", episode)
                    print("Successes to all: ", success_counter / slice)
                    success_episodes.append(episode)
                    successes.append(success_counter / slice)
                    success_counter = 0

    plt.plot(success_episodes, successes)
    plt.show()
    print(successes)
except KeyboardInterrupt:
    plt.plot(success_episodes, successes)
Ejemplo n.º 10
0
cost = tf.reduce_mean(tf.pow(pred-Y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

optimizer = ScipyOptimizerInterface(cost, options={ 'maxiter': 100}, method='BFGS')

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
    sess.run(init)

    # Fit all training data
    for epoch in range(training_epochs):
        for (x, y) in zip(train_X, train_Y):
            optimizer.minimize(sess, feed_dict={X: x, Y: y} )
            # print(a)
            sess.run(cost)
            # sess.run(optimizer.minimize(cost), feed_dict={X: x, Y: y})
        #Display logs per epoch step
        if True or (epoch+1) % display_step == 0:
            c=0
            for (x, y) in zip(train_X, train_Y):
                c+= sess.run(cost, feed_dict={X: x, Y:y})
            print ("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
                "W=", sess.run(W), "b=", sess.run(b))

    print ("Optimization Finished!")

    #Graphic display
    plt.plot(train_X, train_Y, 'ro', label='Original data')
Ejemplo n.º 11
0
def tsne(X,
         perplexity=50,
         dim=2,
         theta=0.5,
         knn_method='knnparallel',
         pca_dim=50,
         exag=12.,
         exag_iter=250,
         max_iter=1000,
         verbose=False,
         print_iter=50,
         lr=200.,
         init_momentum=0.5,
         final_momentum=0.8,
         save_snapshots=False,
         optimizer='momentum',
         tf_optimizer='AdamOptimizer',
         seed=42):

    X -= X.mean(axis=0)
    N = X.shape[0]
    result = {}

    assert optimizer in (
        'momentum', 'tensorflow',
        'bfgs'), 'Available options: momentum, tensorflow and bfgs'

    if pca_dim is not None:
        result['PCA'] = PCA(n_components=pca_dim)
        X = result['PCA'].fit_transform(X)

    P = x2p(X, perplexity=perplexity, method=knn_method, verbose=verbose)
    result['P'] = P
    result['exag_iter'] = exag_iter
    result['print_iter'] = print_iter
    result['loss'] = []
    if save_snapshots:
        result['snapshots'] = []

    tf.reset_default_graph()
    tf.set_random_seed(seed)

    with tf.Session() as sess:
        step = 1

        def step_callback(Y_var):
            nonlocal step
            if step % print_iter == 0:
                print('Step: %d, error: %.16f' % (step, result['loss'][-1]))
                if save_snapshots:
                    result['snapshots'].append(Y_var.reshape((N, dim)).copy())
            if step == exag_iter:
                sess.run(tf.assign(exag_var, 1.))

            #zero mean
            sess.run(tf.assign(Y, Y - tf.reduce_mean(Y, axis=0)))
            step += 1

        def loss_callback(err):
            result['loss'].append(err)

        stddev = 1. if optimizer == 'bfgs' else 0.01
        Y = tf.Variable(
            tf.random_normal((N, dim), stddev=stddev, dtype=X.dtype))
        exag_var = tf.Variable(exag, dtype=P.dtype)

        if isinstance(P, sp.sparse.csr_matrix):
            loss = tsne_op((P.indptr, P.indices, P.data * exag_var), Y)
        else:
            loss = tsne_op(P * exag_var, Y)

        if optimizer == 'bfgs':
            opt = ScipyOptimizerInterface(loss,
                                          var_list=[Y],
                                          method='L-BFGS-B',
                                          options={
                                              'eps': 1.,
                                              'gtol': 0.,
                                              'ftol': 0.,
                                              'disp': False,
                                              'maxiter': max_iter,
                                              'maxls': 100
                                          })
            tf.global_variables_initializer().run()
            opt.minimize(sess,
                         fetches=[loss],
                         loss_callback=loss_callback,
                         step_callback=step_callback)
            Y_final = Y.eval()

        else:
            zero_mean = tf.assign(Y, Y - tf.reduce_mean(Y, axis=0))

            if optimizer == 'tensorflow':
                opt = getattr(tf.train, tf_optimizer)(learning_rate=lr)
                update = opt.minimize(loss, var_list=[Y])
            else:
                mom_var = tf.Variable(init_momentum, dtype=X.dtype)
                uY = tf.Variable(tf.zeros((N, dim), dtype=X.dtype))
                gains = tf.Variable(tf.ones((N, dim), dtype=X.dtype))
                dY = tf.gradients(loss, [Y])[0]

                gains = tf.assign(
                    gains,
                    tf.where(tf.equal(tf.sign(dY), tf.sign(uY)), gains * .8,
                             gains + .2))

                gains = tf.assign(gains, tf.maximum(gains, 0.01))
                uY = tf.assign(uY, mom_var * uY - lr * gains * dY)

                update = tf.assign_add(Y, uY)

            tf.global_variables_initializer().run()

            t = time.time()
            for i in range(1, max_iter + 1):
                if i == exag_iter:
                    if optimizer == 'momentum':
                        sess.run(tf.assign(mom_var, final_momentum))
                    sess.run(tf.assign(exag_var, 1.))

                sess.run(update)
                sess.run(zero_mean)

                if i % print_iter == 0:
                    kl = loss.eval()
                    result['loss'].append(kl)
                    if verbose:
                        print('Step: %d, error: %f (in %f sec.)' %
                              (i, kl, (time.time() - t)))
                        t = time.time()
                    if save_snapshots:
                        result['snapshots'].append(Y.eval())
            Y_final = Y.eval()

    result['Y'] = Y_final
    return result
Ejemplo n.º 12
0
                      cont_img,
                      styl_img,
                      cont_layers,
                      styl_layers,
                      cont_weights,
                      styl_weights,
                      alpha,
                      beta)

##############
## TRAINING ##
##############

with tf.Session(graph=model.graph) as sess:
    sess.run(tf.global_variables_initializer())

    optimizer = ScipyOptimizerInterface(model.total_loss, method="L-BFGS-B", options={'maxiter': num_steps})
    optimizer.minimize(sess,
                       fetches=[model.styl_loss, model.cont_loss, model.total_loss,
                                model.styl_loss_list, model.cont_loss_list,
                                model.gen_cont_act, model.gen_styl_act,
                                model.styl_act, model.cont_act,
                                model.image],
                       step_callback=model.step_callback(img_shape, save_per_step),
                       loss_callback=model.loss_callback())

    result_array = sess.run(model.image)
    result_array = utils.img_postprocess(result_array)
    utils.save_image(folder, result_array)
print("Style Transfer Complete.")
Ejemplo n.º 13
0
    # Compute loss computational graphs
    loss_op = build_loss(input_tensor, layers)

    # Minmise using LBFS optimiser
    optimizer = ScipyOptimizerInterface(loss_op, options={'maxfun': 20},
                                        var_list=[pastiche_tensor])
    
    # Perform style transfer by optmising loss
    with tf.Session() as sess:
        # Init variables
        sess.run(tf.global_variables_initializer())
        
        n_iterations = 10
        for i in range(n_iterations):
            print('Iteration:', i)
            start_time = time.time()

            # Optimise loss using optimizer
            optimizer.minimize(sess)

            # Display progress
            current_loss =  sess.run(loss_op)
            print("loss: ", current_loss)
            end_time = time.time()
            print('Iteration %d completed in %ds' % (i, end_time - start_time))
            
            pastiche = sess.run(pastiche_tensor)
            pastiche_img = deprocess_image(pastiche)
            pastiche_img.save("pastiche/{}.jpg".format(i))

Ejemplo n.º 14
0
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Start training
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # Fit training using batch data
            optimizer.minimize(sess, feed_dict={x: batch_xs, y: batch_ys})
            # print(a)
            c = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys})
            # print(c)
            # c = optimizer.minimize(sess)
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch + 1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost=",
                  "{:.9f}".format(avg_cost))

    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
Ejemplo n.º 15
0
                delta_pl = tf.matmul(p, child.pl)

                # set partial likelihood
                try:
                    node.pl *= delta_pl
                except AttributeError:
                    node.pl = delta_pl
            if node.level() == 0:
                # compute size-wise likelihoods
                pvec = tf.matmul(node.pl, tf.constant(freq, shape=(len(codons),1)),
                                 transpose_a=True)
                # and full likelihood
                lnL = tf.reduce_sum(tf.log(pvec))


    # parameter boundaries
    bounds=[(0.05, 20), (0.05, 20)] + [[1e-9, 100.]] * len(edges)
    optimizer = ScipyOptimizerInterface(
        -lnL, bounds=bounds, method='L-BFGS-B', options={'disp': True})

    with tf.Session() as session:
        # initialize starting values
        session.run(tf.global_variables_initializer())

        
        print('starting lnL', lnL.eval())
        optimizer.minimize(session)
        print('final lnL', lnL.eval())
        print('w=%s, k=%s' % (w.eval(), k.eval()))
Ejemplo n.º 16
0
    def run(self):
        content = self._load_image(self._content_image)
        h, w = content.shape[1], content.shape[2]
        style = self._load_image(self._style_image, size=(h, w))

        print("Content shape: ", content.shape)
        print("Style shape: ", style.shape)

        image = tf.Variable(style,
                            dtype=tf.float32,
                            validate_shape=False,
                            name='image')
        self._output_shape = content.shape
        self._build_vgg19(image)
        self._add_gramians()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # Calculate loss function
            sess.run(tf.global_variables_initializer())
            with tf.name_scope('losses'):
                style_losses = self._setup_style_losses(sess, image, style)
                content_losses = self._setup_content_losses(
                    sess, image, content)

                losses = content_losses + style_losses

                if self._hist_weight > 0:
                    with tf.name_scope('histogram'), tf.device('/cpu:0'):
                        hist_loss = self._setup_histogram_loss(
                            image, style, sess)
                    losses += hist_loss

                image.set_shape(
                    content.shape)  # tv loss expects explicit shape
                if self._tv_weight:
                    tv_loss = tf.image.total_variation(image[0])
                    tv_loss_weighted = tf.multiply(tv_loss,
                                                   self._tv_weight,
                                                   name='tv_loss')
                    losses += tv_loss_weighted
                loss = tf.foldl(add, losses, name='loss')

            # Set optimizator
            if self._optimizer == 'Adam':
                opt = tf.train.AdamOptimizer(10).minimize(loss)

                sess.run(tf.global_variables_initializer())
                self._set_initial_image(sess, image, content)

                self._step_callback(sess.run(image))

                for it in range(self._num_iterations):
                    _, ll, out = sess.run([opt, loss, image])
                    self._step_callback(out)
                    print("Iteration: {:3d}\tLoss = {:.6f}".format(it, ll))

            elif self._optimizer == 'L-BFGS':
                sess.run(tf.global_variables_initializer())
                self._set_initial_image(sess, image, content)
                self._step_callback(sess.run(image))

                opt = ScipyOptimizerInterface(loss,
                                              options={
                                                  'maxiter':
                                                  self._num_iterations,
                                                  'disp': self._print_iter
                                              },
                                              method='L-BFGS-B')
                opt.minimize(sess, step_callback=self._step_callback)
            else:
                raise ValueError("Unknown optimization method")

            self._save_image(self._output_image, sess.run(image))
Ejemplo n.º 17
0
    def optimize(self, X, Y, method='L-BFGS-B', callback=None, 
            maxiter=1000, **kw):
        """Optimize the model by maximising the log likelihood.

        Maximises the sum of the log likelihood given X & Y and any 
        priors with respect to any free variables.

        Args:
            X (np.ndarray | tf.Tensor): The training inputs.
            Y (np.ndarray | tf.Tensor): The training outputs.
            method (tf.train.Optimizer | str): The means by which to
                optimise. If `method` is a string, it will be passed as
                the `method` argument to the initialiser of
                `tf.contrib.opt.ScipyOptimizerInterface`. Else, it
                will be treated as an instance of `tf.train.Optimizer`
                and its `.minimize()` method will be used as the training
                step.
            callback (Callable[[np.ndarray], ...]): A function that will
                be called at each optimization step with the current value
                of the variable vector (a vector constructed by flattening
                the free state of each free `Param` and then concatenating 
                them in the order the `Param`\ s are returned by `.params`.
            maxiter (int): The maximum number of iterations of the optimizer.
            **kw: Additional keyword arguments are passed through to the
                optimizer.

        Returns:
            (scipy.OptimizeResult) The result of the optimisation.

        Examples:
            Let's construct a very simple model for demonstration 
            purposes. It has two (scalar) parameters, `.a` and `.b`, 
            which are constrained to be positive, and its likelihood is
            `10 - a - b`, regardless of X and Y.

            >>> import numbers
            >>> import numpy as np
            >>> from overrides import overrides
            >>> from gptf import Param, ParamAttributes, transforms
            >>> class Example(Model, ParamAttributes):
            ...     def __init__(self, a, b):
            ...         assert isinstance(a, numbers.Number)
            ...         assert isinstance(b, numbers.Number)
            ...         super().__init__()
            ...         self.a = Param(a, transform=transforms.Exp(0.))
            ...         self.b = Param(b, transform=transforms.Exp(0.))
            ...     @tf_method()
            ...     @overrides
            ...     def build_log_likelihood(self, X, Y):
            ...         return 10. - self.a.tensor - self.b.tensor

            We won't care about the values of X and Y.

            >>> X = Y = np.array([0.])

            .. rubric:: TensorFlow optimizers

            We can optimise the parameters of the model using a TensorFlow
            optimizer like so:

            >>> m = Example(3., 4.)
            >>> opt = tf.train.GradientDescentOptimizer(learning_rate=1)
            >>> m.optimize(X, Y, opt)  # use None for X, Y
            message: 'Finished iterations.'
            success: True
                  x: array([..., ...])

            After the optimisation, both parameters are optimised
            towards 0, but are still positive. The constraints on the 
            parameters have been respected.

            >>> print("m.a: {:.3f}".format(np.asscalar(m.a.value)))
            m.a: 0.001
            >>> print("m.b: {:.3f}".format(np.asscalar(m.b.value)))
            m.b: 0.001

            If we fix a parameter, it is not optimized:
            
            >>> m.a = 5.
            >>> m.b = 1.
            >>> m.b.fixed = True
            >>> m.optimize(X, Y, opt)
            message: 'Finished iterations.'
            success: True
                  x: array([...])
            >>> print("m.a: {:.3f}".format(np.asscalar(m.a.value)))
            m.a: 0.001
            >>> print("m.b: {:.3f}".format(np.asscalar(m.b.value)))
            m.b: 1.000

            .. rubric:: SciPy optimizers

            We can optimise the parameters of the model using a SciPy
            optimizer by provided a string value for `method`:

            >>> m = Example(3., 4.)
            >>> m.optimize(X, Y, 'L-BFGS-B', disp=False, ftol=.0001)
            message: 'SciPy optimizer completed successfully.'
            success: True
                  x: array([..., ...])

            As for TensorFlow optimizers, after the optimisation both 
            parameters are optimised towards 0, but are still positive. 
            The constraints on the parameters have been respected.

            >>> print("m.a: {:.3f}".format(np.asscalar(m.a.value)))
            m.a: 0.000
            >>> print("m.b: {:.3f}".format(np.asscalar(m.b.value)))
            m.b: 0.000

            If we fix a parameter, it is not optimized:

            >>> m.a = 5.
            >>> m.b = 1.
            >>> m.b.fixed = True
            >>> m.optimize(X, Y, 'L-BFGS-B', disp=False, ftol=.0001)
            message: 'SciPy optimizer completed successfully.'
            success: True
                  x: array([...])
            >>> print("m.a: {:.3f}".format(np.asscalar(m.a.value)))
            m.a: 0.000
            >>> print("m.b: {:.3f}".format(np.asscalar(m.b.value)))
            m.b: 1.000

            .. rubric:: Miscellaneous

            Optimisation still works, even with weird device contexts and
            session targets.

            >>> # set up a distributed execution environment
            >>> clusterdict = \\
            ...     { 'worker': ['localhost:2226']
            ...     , 'master': ['localhost:2227']
            ...     }
            >>> spec = tf.train.ClusterSpec(clusterdict)
            >>> worker = tf.train.Server(spec, job_name='worker', task_index=0)
            >>> worker.start()
            >>> master = tf.train.Server(spec, job_name='master', task_index=0)
            >>> # change m's device context
            >>> # we're about to do weird things with op placement, and we
            >>> # don't want it in the default graph where it can mess with
            >>> # other doctests, so change m's tf_graph as well.
            >>> m.tf_graph = tf.Graph()
            >>> m.tf_device = '/job:worker/task:0'
            >>> m.tf_session_target = master.target

            TensorFlow:

            >>> m.a = 4.5
            >>> m.optimize(X, Y, opt)
            message: 'Finished iterations.'
            success: True
                  x: array([...])
            >>> print("m.a: {:.3f}".format(np.asscalar(m.a.value)))
            m.a: 0.001
            >>> print("m.b: {:.3f}".format(np.asscalar(m.b.value)))
            m.b: 1.000
            
            SciPy:

            >>> m.a = 4.5
            >>> m.optimize(X, Y, 'L-BFGS-B', disp=False, ftol=.0001)
            message: 'SciPy optimizer completed successfully.'
            success: True
                  x: array([...])
            >>> print("m.a: {:.3f}".format(np.asscalar(m.a.value)))
            m.a: 0.001
            >>> print("m.b: {:.3f}".format(np.asscalar(m.b.value)))
            m.b: 1.000

        """
        assert len(X.shape) >= 1
        assert len(Y.shape) >= 1
        X_key = X if isinstance(X, tf.Tensor) else X.shape[1:]
        Y_key = Y if isinstance(Y, tf.Tensor) else Y.shape[1:]
        key = ("_Model__loss", X_key, Y_key)
        if key not in self.cache:
            # when we build placeholders, take the first dimension to be
            # of no value.
            X_tensor = (X if isinstance(X, tf.Tensor) else
                        tf.placeholder(X.dtype, (None,) + X.shape[1:]))
            Y_tensor = (Y if isinstance(Y, tf.Tensor) else
                        tf.placeholder(Y.dtype, (None,) + Y.shape[1:]))
            self.cache[key] = (self._compile_loss(X_tensor, Y_tensor),
                               X_tensor, Y_tensor)
        loss, X_tensor, Y_tensor = self.cache[key]

        feed_dict = self.feed_dict
        if not isinstance(X, tf.Tensor): feed_dict[X_tensor] = X
        if not isinstance(Y, tf.Tensor): feed_dict[Y_tensor] = Y

        variables = [p.free_state for p in self.params if not p.fixed]
        variables = utils.unique(variables)
        free_state = tf.concat([tf.reshape(v, [-1]) for v in variables], 0)

        with self.get_session() as sess:
            try:
                if type(method) is str:
                    success_msg = "SciPy optimizer completed successfully."
                    options = {'maxiter': maxiter, 'disp': True}
                    options.update(kw)
                    optimizer = ScipyOptimizerInterface(
                        loss, var_list=variables, method=method, 
                        options=options
                    )
                    optimizer.minimize(self.get_session(), feed_dict, 
                            step_callback=callback)
                else:
                    # treat method as TensorFlow optimizer.
                    success_msg = "Finished iterations."
                    opt_step = method.minimize(loss, var_list=variables, **kw)
                    for _ in range(maxiter):
                        sess.run(opt_step, feed_dict=feed_dict)
                        if callback is not None:
                            callback(sess.run(free_state))
            except KeyboardInterrupt:
                return OptimizeResult\
                        ( x=sess.run(free_state)
                        , success=False
                        , message="Keyboard interrupt."
                        )

            return OptimizeResult\
                    ( x=sess.run(free_state)
                    , success=True
                    , message=success_msg
                    )
Ejemplo n.º 18
0
class Base(object):
    def __init__(self, settings):
        self.settings = settings
        self.placeholder = self.build_placeholder()
        self.model = self.build_model()
        self.optimizer = ScipyOptimizerInterface(self.model['cost'],
                                                 method='L-BFGS-B',
                                                 options={
                                                     'maxiter':
                                                     self.settings['max_iter'],
                                                     'disp':
                                                     True
                                                 })
        init = tf.global_variables_initializer()
        self.sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=False))
        self.sess.run(init)

    def g(self, h, cost='l1'):
        if cost == 'logcosh':
            return tf.reduce_sum(0.5 * tf.log(tf.cosh(2 * h)))

        if cost == 'l1':
            return tf.reduce_sum(tf.abs(h))

        if cost == 'l1_approx':
            return tf.reduce_sum(tf.sqrt(tf.constant(10e-8) + tf.pow(h, 2)))

        if cost == 'exp':
            return tf.reduce_sum(-tf.exp(-tf.pow(h, 2) / 2.0))

    def kl_divergence(self, x, y):  # Kullback-Leibler divergence
        return tf.log(x) - tf.log(y) + (y / x) - 1.0

    def filter2toeplitz(self,
                        conv_filter):  # converts filter to Toeplitz matrix
        len_h = conv_filter.get_shape().as_list()[0]
        toplitz = list()
        for t in range(self.settings['n_input']):
            if t == 0:
                toplitz.append(
                    tf.concat([
                        conv_filter,
                        tf.zeros(self.settings['n_input'] - t - len_h)
                    ], 0))

            elif t > 0 and (t + len_h < self.settings['n_input']):
                toplitz.append(
                    tf.concat([
                        tf.zeros(t), conv_filter,
                        tf.zeros(self.settings['n_input'] - t - len_h)
                    ], 0))

            else:
                toplitz.append(
                    tf.concat([
                        tf.zeros(t),
                        conv_filter[0:self.settings['n_input'] - t]
                    ], 0))

        H = tf.transpose(
            tf.reshape(tf.concat(toplitz, 0),
                       (self.settings['n_input'], self.settings['n_input'])))
        return H

    def toeplitz2filter(self, H):  # converts Toeplitz matrix to filter
        hw = self.settings['filter_length'] / 2
        hrft = list()
        for i, c in enumerate(range(hw, self.settings['n_input'] - (hw + 1))):
            hrft.append(H[c - hw:c + hw, c])

        return tf.add_n(hrft) / (float(len(hrft)))

    def build_model(self):  # overwrite this function
        pass

    def build_placeholder(self):
        placeholder = dict()
        placeholder['x'] = tf.placeholder(tf.float32)
        placeholder['GM'] = tf.placeholder(tf.float32)
        placeholder['MASK'] = tf.placeholder(tf.float32)
        if 'filter_length' in self.settings:
            placeholder['t'] = tf.placeholder(tf.float32,
                                              [self.settings['filter_length']])

        return placeholder

    def fit(self, input_dict):
        feed_dict = dict()
        for key in list(self.placeholder.keys()):
            feed_dict[self.placeholder[key]] = input_dict[key]

        self.optimizer.minimize(self.sess, feed_dict=feed_dict)
        return self

    def get_params(self, input_dict):
        feed_dict = dict()
        for key in list(self.placeholder.keys()):
            feed_dict[self.placeholder[key]] = input_dict[key]

        out = self.sess.run(self.model, feed_dict=feed_dict)
        self.sess.close()
        tf.reset_default_graph()
        return out
Ejemplo n.º 19
0
class FMClassifier(TFPicklingBase, ClassifierMixin, BaseEstimator):
    """Factorization machine classifier.

    Parameters
    ----------
    rank : int, optional
        Rank of the underlying low-rank representation.
    batch_size : int, optional
        The batch size for learning and prediction. If there are fewer
        examples than the batch size during fitting, then the the number of
        examples will be used instead.
    n_epochs : int, optional
        The number of epochs (iterations through the training data) when
        fitting. These are counted for the positive training examples, not
        the unlabeled data.
    random_state: int, RandomState instance or None, optional
        If int, the random number generator seed. If RandomState instance,
        the random number generator itself. If None, then `np.random` will be
        used.
    lambda_v : float, optional
        L2 regularization strength for the low-rank embedding.
    lambda_beta : float, optional
        L2 regularization strength for the linear coefficients.
    init_scale : float, optional
        Standard deviation of random normal initialization.
    solver : a subclass of `tf.train.Optimizer` or str, optional
        Solver to use. If a string is passed, then the corresponding solver
        from `scipy.optimize.minimize` is used.
    solver_kwargs : dict, optional
        Additional keyword arguments to pass to `solver` upon construction.
        See the TensorFlow documentation for possible options. Typically,
        one would want to set the `learning_rate`.

    Attributes
    ----------
    n_dims_ : int
        Number of input dimensions.
    classes_ : array
        Classes from the data.
    n_classes_ : int
        Number of classes.
    is_sparse_ : bool
        Whether a model taking sparse input was fit.
    """
    def __init__(self,
                 rank=8,
                 batch_size=64,
                 n_epochs=5,
                 random_state=None,
                 lambda_v=0.0,
                 lambda_beta=0.0,
                 solver=tf.train.AdadeltaOptimizer,
                 init_scale=0.1,
                 solver_kwargs=None):
        self.rank = rank
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.random_state = random_state
        self.lambda_v = lambda_v
        self.lambda_beta = lambda_beta
        self.solver = solver
        self.init_scale = init_scale
        self.solver_kwargs = solver_kwargs

    def _set_up_graph(self):
        """Initialize TF objects (needed before fitting or restoring)."""

        # Input values.
        if self.is_sparse_:
            self._x_inds = tf.placeholder(tf.int64, [None, 2], "x_inds")
            self._x_vals = tf.placeholder(tf.float32, [None], "x_vals")
            self._x_shape = tf.placeholder(tf.int64, [2], "x_shape")
            self._x = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals, self._x_shape))
            x2 = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals * self._x_vals,
                                self._x_shape))
            matmul = tf.sparse_tensor_dense_matmul
        else:
            self._x = tf.placeholder(tf.float32, [None, self.n_dims_], "x")
            x2 = self._x * self._x
            matmul = tf.matmul

        if self._output_size == 1:
            self._y = tf.placeholder(tf.float32, [None], "y")
        else:
            self._y = tf.placeholder(tf.int32, [None], "y")

        with tf.variable_scope("fm"):
            self._v = tf.get_variable(
                "v", [self.rank, self.n_dims_, self._output_size])
            self._beta = tf.get_variable("beta",
                                         [self.n_dims_, self._output_size])
            self._beta0 = tf.get_variable("beta0", [self._output_size])

        vx = tf.stack(
            [matmul(self._x, self._v[i, :, :]) for i in range(self.rank)],
            axis=-1)
        v2 = self._v * self._v
        v2x2 = tf.stack([matmul(x2, v2[i, :, :]) for i in range(self.rank)],
                        axis=-1)
        int_term = 0.5 * tf.reduce_sum(tf.square(vx) - v2x2, axis=-1)
        self._logit_y_proba \
            = self._beta0 + matmul(self._x, self._beta) + int_term

        if self._output_size == 1:
            self._logit_y_proba = tf.squeeze(self._logit_y_proba)
            self._obj_func = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=self._logit_y_proba, labels=self._y))
            self._y_proba = tf.sigmoid(self._logit_y_proba)
        else:
            self._obj_func = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self._logit_y_proba, labels=self._y))
            self._y_proba = tf.nn.softmax(self._logit_y_proba)

        if self.lambda_v > 0:
            self._obj_func \
                += self.lambda_v * tf.reduce_sum(tf.square(self._v))

        if self.lambda_beta > 0:
            self._obj_func \
                += self.lambda_beta * tf.reduce_sum(tf.square(self._beta))

        if isinstance(self.solver, str):
            from tensorflow.contrib.opt import ScipyOptimizerInterface

            self._train_step = ScipyOptimizerInterface(
                self._obj_func,
                method=self.solver,
                options=self.solver_kwargs if self.solver_kwargs else {})
        else:
            self._train_step = self.solver(
                **self.solver_kwargs if self.solver_kwargs else {}).minimize(
                    self._obj_func)

    def _make_feed_dict(self, X, y):
        # Make the dictionary mapping tensor placeholders to input data.
        if self.is_sparse_:
            x_inds = np.vstack(X.nonzero())
            x_srt = np.lexsort(x_inds[::-1, :])
            x_inds = x_inds[:, x_srt].T.astype(np.int64)
            x_vals = np.squeeze(np.array(X[x_inds[:, 0],
                                           x_inds[:, 1]])).astype(np.float32)
            x_shape = np.array(X.shape).astype(np.int64)
            feed_dict = {
                self._x_inds: x_inds,
                self._x_vals: x_vals,
                self._x_shape: x_shape
            }
        else:
            feed_dict = {self._x: X.astype(np.float32)}

        if self._output_size == 1:
            feed_dict[self._y] = y.astype(np.float32)
        else:
            feed_dict[self._y] = y.astype(np.int32)

        return feed_dict

    def _check_data(self, X):
        """check input data

        Raises an error if number of features doesn't match.
        If the estimator has not yet been fitted, then do nothing.
        """

        if self._is_fitted:
            if X.shape[1] != self.n_dims_:
                raise ValueError("Number of features in the input data does "
                                 "not match the number assumed by the "
                                 "estimator!")

    def __getstate__(self):
        # Handles TF persistence
        state = super(FMClassifier, self).__getstate__()

        # Add attributes of this estimator
        state.update(
            dict(rank=self.rank,
                 batch_size=self.batch_size,
                 n_epochs=self.n_epochs,
                 random_state=self.random_state,
                 lambda_v=self.lambda_v,
                 lambda_beta=self.lambda_beta,
                 solver=self.solver,
                 init_scale=self.init_scale,
                 solver_kwargs=self.solver_kwargs))

        # Add fitted attributes if the model has been fitted.
        if self._is_fitted:
            state['n_dims_'] = self.n_dims_
            state['_random_state'] = self._random_state
            state['_enc'] = self._enc
            state['classes_'] = self.classes_
            state['n_classes_'] = self.n_classes_
            state['_output_size'] = self._output_size
            state['is_sparse_'] = self.is_sparse_

        return state

    def fit(self, X, y):
        """Fit the classifier.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Training data.
        y : numpy array [n_samples]
            Targets.

        Returns
        -------
        self : returns an instance of self.
        """
        _LOGGER.info("Fitting %s", re.sub(r"\s+", r" ", repr(self)))

        # Mark the model as not fitted (i.e., not fully initialized based on
        # the data).
        self._is_fitted = False

        # Call partial fit, which will initialize and then train the model.
        return self.partial_fit(X, y)

    def partial_fit(self, X, y, classes=None, monitor=None):
        """Fit the classifier.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Training data.
        y : numpy array [n_samples]
            Targets.
        classes : array, shape (n_classes,)
            Classes to be used across calls to partial_fit.  If not set in the
            first call, it will be inferred from the given targets. If
            subsequent calls include additional classes, they will fail.
        monitor : callable, optional
            The monitor is called after each iteration with the current
            iteration, a reference to the estimator, and a dictionary with
            {'loss': loss_value} representing the loss calculated by the
            objective function at this iteration.
            If the callable returns True the fitting procedure is stopped.
            The monitor can be used for various things such as computing
            held-out estimates, early stopping, model introspection,
            and snapshotting.

        Returns
        -------
        self : returns an instance of self.
        """

        X, y = check_X_y(X, y, accept_sparse='csr')

        # check target type
        target_type = type_of_target(y)
        if target_type not in ['binary', 'multiclass']:
            # Raise an error, as in
            # sklearn.utils.multiclass.check_classification_targets.
            raise ValueError("Unknown label type: %r" % y)

        # Initialize the model if it hasn't been already by a previous call.
        if not self._is_fitted:
            self._random_state = check_random_state(self.random_state)
            assert self.batch_size > 0, "batch_size <= 0"

            self.n_dims_ = X.shape[1]

            if classes is not None:
                self._enc = LabelEncoder().fit(classes)
            else:
                self._enc = LabelEncoder().fit(y)

            self.classes_ = self._enc.classes_
            self.n_classes_ = len(self.classes_)

            if self.n_classes_ <= 2:
                self._output_size = 1
            else:
                self._output_size = self.n_classes_

            if sp.issparse(X):
                self.is_sparse_ = True
            else:
                self.is_sparse_ = False

            # Instantiate the graph.  TensorFlow seems easier to use by just
            # adding to the default graph, and as_default lets you temporarily
            # set a graph to be treated as the default graph.
            self.graph_ = tf.Graph()
            with self.graph_.as_default():
                tf.set_random_seed(self._random_state.randint(0, 10000000))

                tf.get_variable_scope().set_initializer(
                    tf.random_normal_initializer(stddev=self.init_scale))

                self._build_tf_graph()

                # Train model parameters.
                self._session.run(tf.global_variables_initializer())

            # Set an attributed to mark this as at least partially fitted.
            self._is_fitted = True

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # transform targets
        if sp.issparse(y):
            y = y.toarray()
        y = self._enc.transform(y)

        # Train the model with the given data.
        with self.graph_.as_default():
            if not isinstance(self.solver, str):
                n_examples = X.shape[0]
                indices = np.arange(n_examples)

                for epoch in range(self.n_epochs):
                    self._random_state.shuffle(indices)
                    for start_idx in range(0, n_examples, self.batch_size):
                        max_ind = min(start_idx + self.batch_size, n_examples)
                        batch_ind = indices[start_idx:max_ind]
                        feed_dict = self._make_feed_dict(
                            X[batch_ind], y[batch_ind])
                        obj_val, _ = self._session.run(
                            [self._obj_func, self._train_step],
                            feed_dict=feed_dict)
                        _LOGGER.debug("objective: %.4f, epoch: %d, idx: %d",
                                      obj_val, epoch, start_idx)

                    _LOGGER.info("objective: %.4f, epoch: %d, idx: %d",
                                 obj_val, epoch, start_idx)

                    if monitor:
                        stop_early = monitor(epoch, self, {'loss': obj_val})
                        if stop_early:
                            _LOGGER.info(
                                "stopping early due to monitor function.")
                            return self
            else:
                feed_dict = self._make_feed_dict(X, y)
                self._train_step.minimize(self._session, feed_dict=feed_dict)

        return self

    def predict_log_proba(self, X):
        """Compute log p(y=1).

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Log probabilities.
        """
        if not self._is_fitted:
            raise NotFittedError("Call fit before predict_log_proba!")
        return np.log(self.predict_proba(X))

    def predict_proba(self, X):
        """Compute p(y=1).

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Probabilities.
        """

        if not self._is_fitted:
            raise NotFittedError("Call fit before predict_proba!")

        X = check_array(X, accept_sparse='csr')

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # Compute weights in batches.
        probs = []
        start_idx = 0
        n_examples = X.shape[0]
        with self.graph_.as_default():
            while start_idx < n_examples:
                X_batch = \
                    X[start_idx:min(start_idx + self.batch_size, n_examples)]
                feed_dict = self._make_feed_dict(X_batch,
                                                 np.zeros(self.n_dims_))
                start_idx += self.batch_size
                probs.append(
                    self._y_proba.eval(session=self._session,
                                       feed_dict=feed_dict))

        probs = np.concatenate(probs, axis=0)
        if probs.ndim == 1:
            return np.column_stack([1.0 - probs, probs])
        else:
            return probs

    def predict(self, X):
        """Compute the predicted class.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Predicted class.
        """
        if not self._is_fitted:
            raise NotFittedError("Call fit before predict!")
        return self.classes_[self.predict_proba(X).argmax(axis=1)]
Ejemplo n.º 20
0
class FMRegressor(TFPicklingBase, RegressorMixin, BaseEstimator):
    """Factorization machine regressor.

    Parameters
    ----------
    rank : int, optional
        Rank of the underlying low-rank representation.
    batch_size : int, optional
        The batch size for learning and prediction. If there are fewer
        examples than the batch size during fitting, then the the number of
        examples will be used instead.
    n_epochs : int, optional
        The number of epochs (iterations through the training data) when
        fitting. These are counted for the positive training examples, not
        the unlabeled data.
    random_state: int, RandomState instance or None, optional
        If int, the random number generator seed. If RandomState instance,
        the random number generator itself. If None, then `np.random` will be
        used.
    lambda_v : float, optional
        L2 regularization strength for the low-rank embedding.
    lambda_beta : float, optional
        L2 regularization strength for the linear coefficients.
    init_scale : float, optional
        Standard deviation of random normal initialization.
    solver : a subclass of `tf.train.Optimizer` or str, optional
        Solver to use. If a string is passed, then the corresponding solver
        from `scipy.optimize.minimize` is used.
    solver_kwargs : dict, optional
        Additional keyword arguments to pass to `solver` upon construction.
        See the TensorFlow documentation for possible options. Typically,
        one would want to set the `learning_rate`.

    Attributes
    ----------
    n_dims_ : int
        Number of input dimensions.
    is_sparse_ : bool
        Whether a model taking sparse input was fit.
    """
    def __init__(self,
                 rank=8,
                 batch_size=64,
                 n_epochs=5,
                 random_state=None,
                 lambda_v=0.0,
                 lambda_beta=0.0,
                 solver=tf.train.AdadeltaOptimizer,
                 init_scale=0.1,
                 solver_kwargs=None):
        self.rank = rank
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.random_state = random_state
        self.lambda_v = lambda_v
        self.lambda_beta = lambda_beta
        self.solver = solver
        self.init_scale = init_scale
        self.solver_kwargs = solver_kwargs

    def _set_up_graph(self):
        """Initialize TF objects (needed before fitting or restoring)."""

        # Input values.
        if self.is_sparse_:
            self._x_inds = tf.placeholder(tf.int64, [None, 2], "x_inds")
            self._x_vals = tf.placeholder(tf.float32, [None], "x_vals")
            self._x_shape = tf.placeholder(tf.int64, [2], "x_shape")
            self._x = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals, self._x_shape))
            x2 = tf.sparse_reorder(
                tf.SparseTensor(self._x_inds, self._x_vals * self._x_vals,
                                self._x_shape))
            matmul = tf.sparse_tensor_dense_matmul
        else:
            self._x = tf.placeholder(tf.float32, [None, self.n_dims_], "x")
            x2 = self._x * self._x
            matmul = tf.matmul

        self._sample_weight = \
            tf.placeholder(np.float32, [None], "sample_weight")

        self._y = tf.placeholder(tf.float32, [None], "y")

        with tf.variable_scope("fm"):
            self._v = tf.get_variable(
                "v", [self.rank, self.n_dims_, self._output_size])
            self._beta = tf.get_variable("beta",
                                         [self.n_dims_, self._output_size])
            self._beta0 = tf.get_variable("beta0", [self._output_size])

        vx = tf.stack(
            [matmul(self._x, self._v[i, :, :]) for i in range(self.rank)],
            axis=-1)
        v2 = self._v * self._v
        v2x2 = tf.stack([matmul(x2, v2[i, :, :]) for i in range(self.rank)],
                        axis=-1)
        int_term = 0.5 * tf.reduce_sum(tf.square(vx) - v2x2, axis=-1)
        self._y_hat \
            = self._beta0 + matmul(self._x, self._beta) + int_term

        self._y_hat = tf.squeeze(self._y_hat)
        mse = tf.square(self._y - self._y_hat)
        self._obj_func = tf.divide(
            tf.reduce_sum(tf.multiply(mse, self._sample_weight)),
            tf.reduce_sum(self._sample_weight))

        if self.lambda_v > 0:
            self._obj_func \
                += self.lambda_v * tf.reduce_sum(tf.square(self._v))

        if self.lambda_beta > 0:
            self._obj_func \
                += self.lambda_beta * tf.reduce_sum(tf.square(self._beta))

        if isinstance(self.solver, str):
            from tensorflow.contrib.opt import ScipyOptimizerInterface

            self._train_step = ScipyOptimizerInterface(
                self._obj_func,
                method=self.solver,
                options=self.solver_kwargs if self.solver_kwargs else {})
        else:
            self._train_step = self.solver(
                **self.solver_kwargs if self.solver_kwargs else {}).minimize(
                    self._obj_func)

    def _make_feed_dict(self, X, y, sample_weight=None):
        # Make the dictionary mapping tensor placeholders to input data.
        if self.is_sparse_:
            x_inds = np.vstack(X.nonzero())
            x_srt = np.lexsort(x_inds[::-1, :])
            x_inds = x_inds[:, x_srt].T.astype(np.int64)
            x_vals = np.squeeze(np.array(X[x_inds[:, 0],
                                           x_inds[:, 1]])).astype(np.float32)
            x_shape = np.array(X.shape).astype(np.int64)
            feed_dict = {
                self._x_inds: x_inds,
                self._x_vals: x_vals,
                self._x_shape: x_shape
            }
        else:
            feed_dict = {self._x: X.astype(np.float32)}

        if sample_weight is None:
            feed_dict[self._sample_weight] = np.ones(X.shape[0])
        else:
            feed_dict[self._sample_weight] = sample_weight

        feed_dict[self._y] = y.astype(np.float32)

        return feed_dict

    def _check_data(self, X):
        """check input data

        Raises an error if number of features doesn't match.
        If the estimator has not yet been fitted, then do nothing.
        """

        if self._is_fitted:
            if X.shape[1] != self.n_dims_:
                raise ValueError("Number of features in the input data does "
                                 "not match the number assumed by the "
                                 "estimator!")

    def _fit_target(self, y):
        # Store the mean and S.D. of the targets so we can have standardized
        # y for training but still make predictions on the original scale.

        self.target_mean_ = np.mean(y)

        self.target_sd_ = np.std(y - self.target_mean_)
        if self.target_sd_ <= 0:
            warn("No variance in regression targets.")

    def _transform_target(self, y):
        # Standardize the targets for fitting, and store the M and SD values
        # for prediction.

        y_centered = y - self.target_mean_

        if self.target_sd_ <= 0:
            return y_centered

        return y_centered / self.target_sd_

    def __getstate__(self):
        # Handles TF persistence
        state = super(FMRegressor, self).__getstate__()

        # Add attributes of this estimator
        state.update(
            dict(rank=self.rank,
                 batch_size=self.batch_size,
                 n_epochs=self.n_epochs,
                 random_state=self.random_state,
                 lambda_v=self.lambda_v,
                 lambda_beta=self.lambda_beta,
                 solver=self.solver,
                 init_scale=self.init_scale,
                 solver_kwargs=self.solver_kwargs))

        # Add fitted attributes if the model has been fitted.
        if self._is_fitted:
            state['n_dims_'] = self.n_dims_
            state['_random_state'] = self._random_state
            state['_output_size'] = self._output_size
            state['is_sparse_'] = self.is_sparse_
            state['target_mean_'] = self.target_mean_
            state['target_sd_'] = self.target_sd_

        return state

    def fit(self, X, y, monitor=None, sample_weight=None):
        """Fit the classifier.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Training data.
        y : numpy array [n_samples]
            Outcome.
        monitor : callable, optional
            The monitor is called after each iteration with the current
            iteration, a reference to the estimator, and a dictionary with
            {'loss': loss_value} representing the loss calculated by the
            objective function at this iteration.
            If the callable returns True the fitting procedure is stopped.
            The monitor can be used for various things such as computing
            held-out estimates, early stopping, model introspection,
            and snapshotting.
        sample_weight : numpy array of shape [n_samples,]
            Per-sample weights. Re-scale the loss per sample.
            Higher weights force the estimator to put more emphasis
            on these samples. Sample weights are normalized per-batch.

        Returns
        -------
        self : returns an instance of self.
        """
        _LOGGER.info("Fitting %s", re.sub(r"\s+", r" ", repr(self)))

        # Mark the model as not fitted (i.e., not fully initialized based on
        # the data).
        self._is_fitted = False

        # Call partial fit, which will initialize and then train the model.
        return self.partial_fit(X,
                                y,
                                monitor=monitor,
                                sample_weight=sample_weight)

    def partial_fit(self, X, y, monitor=None, sample_weight=None):
        """Fit the classifier.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Training data.
        y : numpy array [n_samples]
            Outcome.
        monitor : callable, optional
            The monitor is called after each iteration with the current
            iteration, a reference to the estimator, and a dictionary with
            {'loss': loss_value} representing the loss calculated by the
            objective function at this iteration.
            If the callable returns True the fitting procedure is stopped.
            The monitor can be used for various things such as computing
            held-out estimates, early stopping, model introspection,
            and snapshotting.
        sample_weight : numpy array of shape [n_samples,]
            Per-sample weights. Re-scale the loss per sample.
            Higher weights force the estimator to put more emphasis
            on these samples. Sample weights are normalized per-batch.

        Returns
        -------
        self : returns an instance of self.
        """

        X, y = check_X_y(X, y, accept_sparse='csr', multi_output=False)

        if sample_weight is not None:
            sample_weight = check_array(sample_weight, ensure_2d=False)

        # Initialize the model if it hasn't been already by a previous call.
        if not self._is_fitted:
            self._random_state = check_random_state(self.random_state)
            assert self.batch_size > 0, "batch_size <= 0"
            self._fit_target(y)
            y = self._transform_target(y)

            self.n_dims_ = X.shape[1]
            self._output_size = 1

            if sp.issparse(X):
                self.is_sparse_ = True
            else:
                self.is_sparse_ = False

            # Instantiate the graph.  TensorFlow seems easier to use by just
            # adding to the default graph, and as_default lets you temporarily
            # set a graph to be treated as the default graph.
            self.graph_ = tf.Graph()
            with self.graph_.as_default():
                tf.set_random_seed(self._random_state.randint(0, 10000000))

                tf.get_variable_scope().set_initializer(
                    tf.random_normal_initializer(stddev=self.init_scale))

                self._build_tf_graph()

                # Train model parameters.
                self._session.run(tf.global_variables_initializer())

            # Set an attributed to mark this as at least partially fitted.
            self._is_fitted = True
        else:
            y = self._transform_target(y)

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # transform targets
        if sp.issparse(y):
            y = y.toarray()

        # Train the model with the given data.
        with self.graph_.as_default():
            if not isinstance(self.solver, str):
                n_examples = X.shape[0]
                indices = np.arange(n_examples)

                for epoch in range(self.n_epochs):
                    self._random_state.shuffle(indices)
                    for start_idx in range(0, n_examples, self.batch_size):
                        max_ind = min(start_idx + self.batch_size, n_examples)
                        batch_ind = indices[start_idx:max_ind]

                        if sample_weight is None:
                            batch_sample_weight = None
                        else:
                            batch_sample_weight = sample_weight[batch_ind]

                        feed_dict = self._make_feed_dict(
                            X[batch_ind],
                            y[batch_ind],
                            sample_weight=batch_sample_weight)
                        obj_val, _ = self._session.run(
                            [self._obj_func, self._train_step],
                            feed_dict=feed_dict)
                        _LOGGER.debug("objective: %.4f, epoch: %d, idx: %d",
                                      obj_val, epoch, start_idx)

                    _LOGGER.info("objective: %.4f, epoch: %d, idx: %d",
                                 obj_val, epoch, start_idx)

                    if monitor:
                        stop_early = monitor(epoch, self, {'loss': obj_val})
                        if stop_early:
                            _LOGGER.info(
                                "stopping early due to monitor function.")
                            return self
            else:
                feed_dict = self._make_feed_dict(X,
                                                 y,
                                                 sample_weight=sample_weight)
                self._train_step.minimize(self._session, feed_dict=feed_dict)

        return self

    def predict(self, X):
        """Predict expected y values.

        Parameters
        ----------
        X : numpy array or sparse matrix [n_samples, n_features]
            Data.

        Returns
        -------
        numpy array [n_samples]
            Estimated regression predictions.
        """

        if not self._is_fitted:
            raise NotFittedError("Call fit before predict!")

        X = check_array(X, accept_sparse='csr')

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # Compute weights in batches.
        yhat = []
        start_idx = 0
        n_examples = X.shape[0]
        with self.graph_.as_default():
            while start_idx < n_examples:
                X_batch = \
                    X[start_idx:min(start_idx + self.batch_size, n_examples)]
                feed_dict = self._make_feed_dict(X_batch,
                                                 np.zeros(self.n_dims_))
                start_idx += self.batch_size
                yhat.append(
                    np.atleast_1d(
                        self._y_hat.eval(session=self._session,
                                         feed_dict=feed_dict)))

        yhat = np.concatenate(yhat, axis=0)

        # Put the prediction back on the scale of the target values
        # (cf. _transform_targets).
        if self.target_sd_ > 0.0:
            yhat *= self.target_sd_
        yhat += self.target_mean_

        return yhat
def minimize_task(config,
                  sess,
                  task,
                  num_starts=None,
                  num_options=None,
                  scope='minimize_task',
                  reuse=None,
                  spo_config=None,
                  dtype=None,
                  rng=None):
    '''
  Estimate task minimum using multi-start L-BFGS-B.
  '''
    if (rng is None): rng = npr.RandomState(config.seed)
    if (dtype is None): dtype = task.dtype
    if (spo_config is None): spo_config = dict()
    if (num_starts is None): num_starts = config.num_starts_fmin
    if (num_options is None): num_options = config.num_options_fmin
    with tf.variable_scope(scope, reuse=reuse) as vs:
        # Build minimization target
        shape = [num_starts, task.input_dim]
        inputs_var = tf.get_variable(
            'inputs',
            shape=shape,
            dtype=dtype,
            initializer=tf.random_uniform_initializer())

        task_op = task.tensorflow(inputs_var, noisy=False, stop_gradient=False)
        loss_op = tf.reduce_mean(task_op)

        # Find starting positions via initial random sweep
        counter, x_mins, f_mins, = 0, None, None
        while counter + num_starts <= num_options:
            inputs = rng.rand(*shape)
            outputs = np.squeeze(sess.run(task_op, {inputs_var: inputs}))
            if (counter == 0):
                x_mins, f_mins = inputs, outputs
            else:
                inputs = np.vstack([x_mins, inputs])
                outputs = np.hstack([f_mins, outputs])
                argmins = np.argpartition(outputs, num_starts - 1)[:num_starts]
                x_mins, f_mins = inputs[argmins], outputs[argmins]
            counter += num_starts
        _ = sess.run(tf.assign(inputs_var, x_mins))

        # Initialize task optimizer
        spo_config =\
        {
          'method' : 'L-BFGS-B',
          'var_list' : [inputs_var],
          'var_to_bounds' : {inputs_var : (0, 1)},
          'options' : {'maxiter' : 1024},
          **spo_config, #user-specified settings take precedence
        }
        optimizer = ScipyOptimizerInterface(loss_op, **spo_config)

        # Run task optimizer
        _ = sess.run(tf.variables_initializer([inputs_var]))
        _ = optimizer.minimize(sess)

        # Evaluate task at optimized input locations
        inputs, outputs = sess.run([inputs_var, task_op])
        argmin = np.argmin(outputs)
        x_min = inputs[argmin]
        f_min = outputs[argmin, 0]
        return x_min, f_min
Ejemplo n.º 22
0
                                          method='L-BFGS-B',
                                          options={"maxiter": 300}
                                          )

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    for i in range(iterations):
        # coordinate descent
        sess.run([update_tau])

        [sess.run([train_ibp]) for _ in range(5)]

        train_gp.minimize(sess)

        e_llike = sess.run(elbo)
        print('Iter {} : EBLO = {:.2f}'.format(i, e_llike))

    train_gp_refine.minimize(sess)

    z_eval = sess.run(z)
    z_eval = np.concatenate(z_eval)
    K_eval = sess.run(K)
    K_star_eval = sess.run(K_star)
    K_star_star_eval = sess.run(K_star_star)
    noise_eval = sess.run(noise)

    sio.savemat("./seizure_exp_{}.mat".format(exp),
                {
Ejemplo n.º 23
0
M = tf.get_variable("beta", [n, 1], "float", initializer=tf.zeros_initializer)



y_pred = tf.matmul(X, M )
loss = tf.reduce_sum(tf.pow(Y-y_pred, 2))

optim = ScipyOptimizerInterface(loss, [M])


# In[76]:

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    optim.minimize(sess, feed_dict={X:x, Y:y[:,None]})
    M_st = sess.run(M)
#     ans = sess.run(loss, feed_dict={X: x, Y: y})


# In[77]:

print(M_st.T)


# In[78]:

print(np.linalg.lstsq(x,y)[0])


print("We can see that tensorflow is giving nearly identical results to numpy.")
Ejemplo n.º 24
0
class PINN_Base:
    def __init__(self,
                 lower_bound: List[float],
                 upper_bound: List[float],
                 layers: List[int],
                 df_multiplier=1.0,
                 dtype=tf.float32,
                 use_differential_points=True,
                 use_collocation_residual=True,
                 use_dynamic_learning_rate=False,
                 optimizer_kwargs={},
                 session_config=None,
                 add_grad_ops=False):
        '''
        Inherit from this class to construct a Physics informed neural network (PINN)
        The computation graph is constructed at initialization and then finalized.

        Parameters:
            lower_bound (List[float]) : Lower bound on the input domain for each coordinate
            upper_bound (List[float]) : Upper bound on the input domain for each coordinate
            layers (List[int]) : List describing the input/output domain of MLP used by the PINN.
                List should have the form [ input_dimension, layer_width*, output_dimension]
                where layer_width* is 0 or more numbers represting the width of each fully connected layer.
                For instance, [2,20,20,20,20,1] is an MLP with a 2d input domain, 4 fully connected layers of
                width 20 and a scalar output domain.
            df_multiplier (float) : Value which multiplies the PINN residual portion of the loss.
                < 1.0 means that the effect of the residual will be reduced
                > 1.0 means that the effect of the residual will be magnified
            dtype (tf.dtype) : Data type to use for *all* computations.
                Warning! tf.float64 is often a bit more accurate but much slower!
            use_differential_points (bool) : Whether to use a separate set of differential points (X_df)
                when calculating the residual. Setting this to false switches us from the "boundary-value" paradigm
                to the "noisy-sensor" paradigm.
            use_collocation_residual (bool) : Determines if we have a 2-term or 3-term loss (see _loss for more details)
            use_dynamic_learning_rate (bool) : When using a first order optimizer, determines whether the learning rate is changable after graph compilation
            optmizer_kwargs (dict) : Args passed to first order optimizer by default
            session_config (dict) : Arguments passed to the tf.session on creation.
                This may be used to force device placement or limit the number of threads that may be used.
            add_grad_ops (bool) : If true, adds operations to compute the gradient outside of an optimizer.
                See _init_grad_ops for more details
        '''

        self.lower_bound = np.array(lower_bound)
        self.upper_bound = np.array(upper_bound)

        self.layers = layers

        self.dtype = dtype
        self.use_differential_points = use_differential_points

        self.use_collocation_residual = use_collocation_residual
        self.optimizer_kwargs = optimizer_kwargs
        self.session_config = session_config
        self.add_grad_ops = add_grad_ops
        self.use_dynamic_learning_rate = use_dynamic_learning_rate

        self.df_multiplier = df_multiplier

        self.graph = tf.Graph()
        self._build_graph()

    def _build_graph(self):
        '''
        Builds the full computation graph.
        Each _method is meant to be an integration point modifiable by subclasses
        '''

        with self.graph.as_default():

            self._init_placeholders()
            self._init_params()

            self.U_hat = self._forward(self.X)

            if self.use_differential_points:
                self.U_hat_df = self._forward(self.X_df)
            else:
                self.U_hat_df = None

            self.loss = self._loss(self.U_hat, self.U_hat_df)

            if self.add_grad_ops:
                self._init_grad_ops()

            self._init_optimizers()

            self.init = tf.global_variables_initializer()
            self.graph.finalize()

        self.sess = tf.Session(graph=self.graph)
        self.sess.run(self.init)

    def _init_placeholders(self):
        '''Initialize training inputs here'''

        self.X = tf.placeholder(self.dtype, shape=[None, self.get_input_dim()])
        if self.use_differential_points:
            self.X_df = tf.placeholder(self.dtype,
                                       shape=[None, self.get_input_dim()])
        self.U = tf.placeholder(self.dtype,
                                shape=[None, self.get_output_dim()])

        if self.use_dynamic_learning_rate:
            self.learning_rate = tf.placeholder(self.dtype, shape=())
        else:
            self.learning_rate = None

    def _init_params(self):
        '''Initialize trainable parameters here'''

        self.weights, self.biases = self._init_NN(self.layers)

    def _init_session(self):

        if self.session_config is not None:
            self.sess = tf.Session(graph=self.graph,
                                   config=self.session_config)

        else:
            self.sess = tf.Session(graph=self.graph)
        self.sess.run(self.init)

    def _forward(self, X):
        '''
        Computes U = F(X)
        '''

        U, activations = self._NN(X, self.weights, self.biases)

        # By convention we only store intermediate for a single forward-pass
        if X == self.X:
            self.activations = activations

        return U

    def _init_optimizers(self):
        '''
        Initialize optimizers
        By default LBFGS-B and Adam are initialized.
        '''

        self.optimizer_BFGS = ScipyOptimizerInterface(
            self.loss,
            method='L-BFGS-B',
            options={
                'maxiter': 50000,
                'maxfun': 50000,
                'maxcor': 50,
                'maxls': 50,
                'gtol': 1.0 * np.finfo(float).eps,
                'ftol': 1.0 * np.finfo(float).eps
            })

        if self.learning_rate is not None:
            self.optimizer_Adam = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.loss)
        else:
            self.optimizer_Adam = tf.train.AdamOptimizer(
                **self.optimizer_kwargs).minimize(self.loss)

    def _loss(self, U_hat, U_hat_df):
        '''
            Computes the loss
        '''

        # Fit a given set of points with known values
        self.mse = tf.reduce_mean(tf.square(self.U - U_hat))
        # The PINN residual for the same points as the mse
        self.loss_residual = tf.reduce_mean(
            tf.square(self._residual_collocation(U_hat)))

        if self.use_differential_points:
            # The PINN residual for a separate set of points, X_df
            # which need to have known values of U
            self.loss_residual_differential = tf.reduce_mean(
                tf.square(self._residual_differential(U_hat_df)))

            # The two sets of points gives us two residuals.
            # It can be beneficial in some cases to use both of them.
            if self.use_collocation_residual:
                return self.mse + self.loss_residual + self.df_multiplier * self.loss_residual_differential
            else:
                return self.mse + self.df_multiplier * self.loss_residual_differential
        else:
            return self.mse + self.df_multiplier * self.loss_residual

    def _residual(self, u, x, u_true=None):
        '''
            Computes the PINN residual.
            Fill in this mehtod to create a PINN for a *particular*
            differential equation.

            Parameters:
                u (tf.tensor) : Predicted value of the differential equation
                x (tf.tensor) : Input to the differential equation
                u_true (Optional[tf.tensor]) : In some cases it can be helpful to
                    use the true value of u (if its known) in the differential residual
                    rather than u which is the prediction of the neural network.
        '''

        # Fill this in with your differential equation
        return 0.0

    def _residual_collocation(self, U_hat):
        '''
            Residual for X, U with U known. Note that this is
            ignored if use_differential_points=True and use_collocation_residual=False
        '''
        return self._residual(U_hat, self.X, self.U)

    def _residual_differential(self, U_hat_df):
        '''
            Residual for X_df with U unknown.
            Ignored if use_differential_points=False
        '''
        return self._residual(U_hat_df, self.X_df)

    def _NN(self, X, weights, biases):
        '''
            A simple MLP neural network.
            Original code based on (Raissi, 2018)
        '''
        activations = []

        # Scale the inputs to be between -1 and 1 to help with optimization
        H = 2.0 * (X - self.lower_bound) / \
            (self.upper_bound - self.lower_bound) - 1.0

        # Save off the activations at each layer for visualization, etc
        activations.append(H)

        for l in range(len(weights) - 1):
            W = weights[l]
            b = biases[l]
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
            activations.append(H)

        W = weights[-1]
        b = biases[-1]
        Y = tf.add(tf.matmul(H, W), b)

        return Y, activations

    def _xavier_init(self, size):
        '''
        Xavier Initialization for layer of give size.
        Code based on (Raissi, 2018)
        '''

        in_dim = size[0]
        out_dim = size[1]
        stddev = np.sqrt(2 / (in_dim + out_dim))

        return tf.Variable(tf.truncated_normal([in_dim, out_dim],
                                               stddev=stddev,
                                               dtype=self.dtype),
                           dtype=self.dtype)

    def _layer_initializer(self, size):
        return self._xavier_init(size)

    def _bias_initializer(self, width):
        return tf.Variable(tf.zeros([1, width], dtype=self.dtype),
                           dtype=self.dtype)

    def _init_NN(self, layers: List[float]):
        '''
        Initialize the weights and biases for the MLP with given structure

        Parameters:
            layers (List[float]) : See the same param in __init__
        '''

        weights = []
        biases = []
        for l in range(len(layers) - 1):
            W = self._layer_initializer([layers[l], layers[l + 1]])
            b = self._bias_initializer(layers[l + 1])
            weights.append(W)
            biases.append(b)

        return weights, biases

    def _init_grad_ops(self):
        '''
            Adds operations for querying the gradient of the loss with respect to the trainable
            parameters outside of an optimization context.
            Also adds operations to efficiently compute Hv where H is the hessian and v a given vector.
        '''

        # Parameters as a list of list of lists, we need a list of lists instead
        all_params = self.get_all_weight_variables()
        param_array = []
        for param_list in all_params:
            for layer in param_list:
                param_array.append(layer)

        # Can't concat the list yet since the flattened array is not part of the
        # computation of the loss. So we first compute the gradients and then flatten.
        grads = tf.gradients(self.loss, param_array)
        grads_flat = []
        for grad in grads:
            grads_flat.append(tf.reshape(grad, [-1]))

        self.grads_flat = tf.concat(grads_flat, axis=0)

        # v in Hv
        self.hessian_vector = tf.placeholder(self.dtype,
                                             shape=self.grads_flat.shape)

        # As long as v is idependent of L, grad ((grad L).T v) = Hv
        prod = tf.reduce_sum(self.grads_flat * self.hessian_vector)
        self.hessian_matvec = tf.gradients(prod, param_array)

    def get_input_dim(self):
        '''Dimension of the domain of the differential equation'''
        return self.layers[0]

    def get_output_dim(self):
        '''
        Dimension of the range of the differential equation
        Note that most this code has only been tested for 1d output dimensions
        '''
        return self.layers[-1]

    def reset_session(self):
        '''
        Reset the model without rebuilding the graph.
        This is faster for multiple trials with identical architecture.
        '''
        self.sess.close()
        self._init_session()

    def cleanup(self):
        '''
        Not sure if this is actually needed. I believe
        Tensorflow has gotten better about not leaking memory at this point.
        '''
        del self.graph
        self.sess.close()

    def get_all_weights(self):
        '''
        Get all trainable parameters as a list of list of lists.
        This should return the results of tf.sess.run, not the variable ops themselves.
        Inheriting classes should modify this function and not get_weights()
        to return additional parameters.
        '''
        return self.get_weights()

    def get_all_weight_variables(self):
        '''
        Returns a list (with the same shape as in get_all_weights)
        with the tf.Variable objects corresponding to all trainable parameters
        '''
        return [self.weights, self.biases]

    def get_weights(self):
        return self.sess.run([self.weights, self.biases])

    def get_loss(self, X, U, X_df):

        if self.use_differential_points:
            feed_dict = {self.X: X, self.U: U, self.X_df: X_df}
        else:
            feed_dict = {self.X: X, self.U: U}

        return self.sess.run(self.loss, feed_dict)

    def get_loss_collocation(self, X):
        return self.sess.run(self.loss)

    def get_loss_residual(self, X):
        return self.sess.run(self.loss_residual)

    def get_activations(self, X, layer=None):
        if layer:
            return self.sess.run(self.activations[layer], {self.X: X})
        else:
            return self.sess.run(self.activations, {self.X: X})

    def _size_of_variable_list(self, variable_list):
        '''
            Used to count the total number of parameters in a list of tf.Variable objects
        '''
        l = self.sess.run(variable_list)
        return np.sum([v.size for v in l])

    def _count_params(self):
        '''
            The total number of parameters used by the model.
        '''
        params_weights = self._size_of_variable_list(self.weights)
        params_biases = self._size_of_variable_list(self.biases)

        return params_weights + params_biases

    def get_architecture_description(self):
        params = self._count_params()
        return {
            "arch_name": "base",
            "n_params": params,
            "shape": self.layers[:],
            "dtype": "float32" if self.dtype == tf.float32 else "float64"
        }

    def get_version(self):
        return tf.__version__

    def train_BFGS(self,
                   X,
                   U,
                   X_df=None,
                   print_loss=True,
                   custom_fetches=None):
        '''
            Train the model to completion using L-BFGS-B

            Parameters:
                X (np.ndarray) : (N,d_in) array of domain points
                U (np.ndarray) : (N,d_out) array of solution points such that U = F(X)
                X_df (Optional[np.ndarray]) : (M,d_in) array of domain points where U is 
                    unknown but the PINN residual should still be evaluated
                print_loss (bool) : Whether to print the loss to stdout during training
                custom_fetches (List) : Ops from the computation graph to fetch at each training step.

            Returns:
                If custom_fetches were supplied, the fetched values will be returned. 
                Otherwise nothing will be returned. 
        '''

        # TODO: Support printing loss and doing custom fetching at the same time

        if self.use_differential_points:
            feed_dict = {self.X: X, self.U: U, self.X_df: X_df}
        else:
            feed_dict = {self.X: X, self.U: U}

        if print_loss:
            self.optimizer_BFGS.minimize(self.sess,
                                         feed_dict,
                                         fetches=[self.loss],
                                         loss_callback=util.bfgs_callback)
        elif custom_fetches is not None:
            array, callback = util.make_fetches_callback()

            self.optimizer_BFGS.minimize(self.sess,
                                         feed_dict,
                                         fetches=custom_fetches,
                                         loss_callback=callback)

            return array
        else:
            self.optimizer_BFGS.minimize(self.sess, feed_dict)

    def train_Adam(self,
                   X: np.ndarray,
                   U: np.ndarray,
                   X_df=None,
                   epochs=2000,
                   learning_rate=1e-3):
        '''
            Train using Full-Batch Adam for the given number of iterations

            Parameters:
                X (np.ndarray) : (N,d_in) array of domain points
                U (np.ndarray) : (N,d_out) array of solution points such that U = F(X)
                X_df (Optional[np.ndarray]) : (M,d_in) array of domain points where U is 
                    unknown but the PINN residual should still be evaluated.
                epochs (int) : Number of epochs to train for
                learning_rate (float) : If use_dynamic_learning_rate=True, this will 
                    be the learning rate used by the optimizer
        '''

        if self.use_differential_points:
            feed_dict = {self.X: X, self.U: U, self.X_df: X_df}
        else:
            feed_dict = {self.X: X, self.U: U}

        if self.learning_rate is not None:
            feed_dict[self.learning_rate] = learning_rate

        progbar = Progbar(epochs)
        for i in range(epochs):
            _, loss = self.sess.run([self.optimizer_Adam, self.loss],
                                    feed_dict)

            progbar.update(i + 1, [("loss", loss)])

    def train_Adam_batched(self, X, U, X_df=None, batch_size=128, epochs=10):
        '''
            Train using Mini-Batch Adam for the given number of iterations

            Parameters:
                X (np.ndarray) : (N,d_in) array of domain points
                U (np.ndarray) : (N,d_out) array of solution points such that U = F(X)
                X_df (Optional[np.ndarray]) : (M,d_in) array of domain points where U is 
                    unknown but the PINN residual should still be evaluated.
                epochs (int) : Number of epochs to train for
                batch_size (int) : Mini-Batch size for stochastic training
        '''
        # TODO: Integrate with dynamic learning rate

        self._train_stochastic_optimizer(self.optimizer_Adam, X, U, X_df,
                                         batch_size, epochs)

    def _train_stochastic_optimizer(self,
                                    optimizer_opp,
                                    X,
                                    U,
                                    X_df=None,
                                    batch_size=128,
                                    epochs=10):
        '''
            Generic custom training loop for stochastic optimizers. 
            Replace optimizer_opp with e.g. RMSProp.minimize() for a different
            stochastic optimizer.
        '''

        if self.use_differential_points:
            assert (X_df is not None)

            assert (X_df.shape[0] >= X.shape[0])

        progbar = Progbar(epochs, stateful_metrics=["loss_full"])
        for epoch in range(epochs):

            X_s, U_s = shuffle(X, U)

            if X_df is not None:
                X_df_s = shuffle(X_df)
                dataset_size = X_df.shape[0]
            else:
                dataset_size = X.shape[0]

            b_c = 0
            for b in range(0, dataset_size, batch_size):

                if X_df is not None:
                    b_c_last = b_c
                    b_c = b % X_s.shape[0]

                    # X and X_df are typically different sizes,
                    # so we shuffle them at different times
                    if b_c_last > b_c:
                        X_s, U_s = shuffle(X, U)
                    X_b = X_s[b_c:(b_c + batch_size), :]
                    U_b = U_s[b_c:(b_c + batch_size), :]
                    X_df_b = X_df_s[b:(b + batch_size), :]
                    feed_dict = {self.X: X_b, self.U: U_b, self.X_df: X_df_b}
                else:
                    X_b = X_s[b:(b + batch_size), :]
                    U_b = U_s[b:(b + batch_size), :]
                    feed_dict = {self.X: X_b, self.U: U_b}

                _, loss = self.sess.run([optimizer_opp, self.loss], feed_dict)

            if X_df is not None:
                feed_dict = {self.X: X, self.U: U, self.X_df: X_df}
            else:
                feed_dict = {self.X: X, self.U: U}

            progbar.update(epoch + 1, [("loss", loss)])

    def predict(self, X):
        return self.sess.run(self.U_hat, {self.X: X})

    def get_hessian_matvec(self, v, X, U, X_df):
        '''
            Get the result of Hv for the given vector v
        '''

        if self.use_differential_points:
            feed_dict = {
                self.hessian_vector: v,
                self.X: X,
                self.U: U,
                self.X_df: X_df
            }
        else:
            feed_dict = {self.hessian_vector: v, self.X: X, self.U: U}

        h_row = self.sess.run(self.hessian_matvec, feed_dict)

        # h_row is a list, we want to return a vector
        return util.unwrap(h_row)

    def get_hessian(self, X, U, X_df):
        '''
            Get the full hessian by repeated calls to Hessian_Matvec
            Since PINNs are often small, this is feasible.
            Warning! This operation scales quadratically in time and space!
        '''

        print(
            "Warning, trying to calculate the full Hessian is infeasible for large networks!"
        )

        if self.use_differential_points:
            feed_dict = {self.X: X, self.U: U, self.X_df: X_df}
        else:
            feed_dict = {self.X: X, self.U: U}

        # We use repeated runs to avoid adding gradient ops for every
        # element of the hessian
        n = int(self.grads_flat.shape[0])
        H = np.empty((n, n))
        progbar = Progbar(n)
        for i in range(n):
            vec = np.zeros(n, dtype=np.float32)
            vec[i] = 1.0
            feed_dict[self.hessian_vector] = vec
            h_row = self.sess.run(self.hessian_matvec, feed_dict)
            h_row = util.unwrap(h_row)
            H[i, :] = h_row[:]
            progbar.update(i + 1)

        # Explicitly diagonalize so that e.g. eigenvalues are always real
        for i in range(n):
            for j in range(i + 1, n):
                H[j, i] = H[i, j]
        return H