Ejemplo n.º 1
0
def nac_complex_single_layer(x_in, out_units, epsilon = 0.000001):

    '''
    :param x_in: input feature vector
    :param out_units: number of output units of the cell
    :param epsilon: small value to avoid log(0) in the output result
    :return: associated weight matrix and output tensor
    '''

    in_shape = x_in.shape[1]

    W_hat = tf.get_variable(shape=[in_shape, out_units],
                            initializer= tf.initializers.random_uniform(minval=-2, maxval=2),
                            trainable=True, name="W_hat2")

    M_hat = tf.get_variable(shape=[in_shape, out_units],
                            initializer=tf.initializers.random_uniform(minval=-2, maxval=2),
                            trainable=True, name="M_hat2")

    W = tf.nn.tanh(W_hat) * tf.nn.sigmoid(M_hat)

    # Express Input feature in log space to learn complex functions
    x_modified = tf.asinh(x_in)

    m = tf.sinh( tf.matmul(x_modified, W) )

    return m, W
Ejemplo n.º 2
0
def arcsinh(x, alpha=1):
    """
    asignh

    See Also: logtanh
    """
    return tf.asinh(x * alpha) / alpha
Ejemplo n.º 3
0
def call_with_critical_point_scanner(f, *args):
    """Calls f(scanner, *args) in TensorFlow session-context.

  Here, `scanner` will be a function with signature
  scanner(seed:int, scale:float) -> (potential, stationarity, pos_vector).

  The function `scanner` can only perform a scan when called from within
  the TF session-context that is set up by this function.
  """
    graph = tf.Graph()
    with graph.as_default():
        t_input = tf.placeholder(tf.float64, shape=[70])
        t_v70 = tf.Variable(initial_value=numpy.zeros([70]),
                            trainable=True,
                            dtype=tf.float64)
        op_assign_input = tf.assign(t_v70, t_input)
        d = tf_so8_sugra_potential(t_v70)
        t_potential = d['potential']
        t_stationarity = tf_so8_sugra_stationarity(d['a1'], d['a2'])
        opt = contrib_opt.ScipyOptimizerInterface(tf.asinh(t_stationarity),
                                                  options=dict(maxiter=500))
        with tf.Session() as sess:
            sess.run([tf.global_variables_initializer()])

            def scanner(seed, scale):
                rng = numpy.random.RandomState(seed)
                v70 = rng.normal(scale=scale, size=[70])
                sess.run([op_assign_input], feed_dict={t_input: v70})
                opt.minimize(session=sess)
                n_ret = sess.run([t_potential, t_stationarity, t_v70])
                return n_ret

            return f(scanner, *args)
Ejemplo n.º 4
0
def arcsinh(x, alpha=1):
    """
    asignh

    See Also: logtanh
    """
    return tf.asinh(x * alpha) / alpha
Ejemplo n.º 5
0
    def __call__(self, input, reuse=False, is_training=False):

        with tf.variable_scope(self.name):
            # setup layer
            x = input
            input_size = input.shape[1].value

            g = tf.get_variable(
                "_w_g_", [input_size, self.output_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=.01),
                trainable=is_training)

            wt = tf.get_variable(
                "_w_wt_", [input_size, self.output_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=.01),
                trainable=is_training)

            mt = tf.get_variable(
                "_w_mt_", [input_size, self.output_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=.01),
                trainable=is_training)

            with tf.variable_scope('nac_w'):
                w = tf.multiply(tf.tanh(wt), tf.sigmoid(mt))

            with tf.variable_scope('simple_nac'):
                a = tf.matmul(x, w)

            with tf.variable_scope('complex_nac'):
                # m  = tf.exp( self._mult_div_nac( tf.log( tf.abs( x ) + 1e-10 ) ) )
                m = tf.sinh(tf.matmul(tf.asinh(x), w))

            with tf.variable_scope('math_gate'):
                gc = tf.sigmoid(tf.matmul(x, g))

            with tf.variable_scope('result'):
                x = (gc * a) + ((1 - gc) * m)

            # activation
            if not self.act is None:
                x = self.act(x)

            # setup dropout
            if self.dropout > 0 and is_training:
                x = tf.layers.dropout(inputs=x, rate=self.dropout)

            if not reuse: self.layer = x

            print(x)
            return x
Ejemplo n.º 6
0
  def _forward_log_det_jacobian(self, x):
    # y = sinh((arcsinh(x) + skewness) * tailweight)
    # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
    # dy/dx
    # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)

    # This is computed inside the log to avoid catastrophic cancellations
    # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
    return (tf.log(
        tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight)
        # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
        # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
        / _sqrtx2p1(x)) + tf.log(self.tailweight))
Ejemplo n.º 7
0
    def __call__(self, input):
        """
        Performs forward propagation for the NAC cell

        :param input: a tensorflow input tensor
        :return: the outputs of the forward propagation
        """
        g = tf.sigmoid(tf.matmul(self._g, input))
        a = self._add_sub_nac(input)
        m = tf.sinh(self._mult_div_nac(tf.asinh((input))))
        y = tf.multiply(g, a) + tf.multiply(1 - g, m)

        return y
Ejemplo n.º 8
0
    def _forward_log_det_jacobian(self, x):
        # y = sinh((arcsinh(x) + skewness) * tailweight)
        # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
        # dy/dx
        # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)

        # This is computed inside the log to avoid catastrophic cancellations
        # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
        return (tf.log(
            tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight)
            # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
            # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
            / _sqrtx2p1(x)) + tf.log(self.tailweight))
Ejemplo n.º 9
0
  def _inverse_log_det_jacobian(self, y):
    # x = sinh(arcsinh(y) / tailweight - skewness)
    # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
    # dx/dy
    # = cosh(arcsinh(y) / tailweight - skewness)
    #     / (tailweight * sqrt(y**2 + 1))

    # This is computed inside the log to avoid catastrophic cancellations
    # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
    return (tf.log(
        tf.cosh(tf.asinh(y) / self.tailweight - self.skewness)
        # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
        # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
        / _sqrtx2p1(y)) - tf.log(self.tailweight))
Ejemplo n.º 10
0
    def _inverse_log_det_jacobian(self, y):
        # x = sinh(arcsinh(y) / tailweight - skewness)
        # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
        # dx/dy
        # = cosh(arcsinh(y) / tailweight - skewness)
        #     / (tailweight * sqrt(y**2 + 1))

        # This is computed inside the log to avoid catastrophic cancellations
        # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
        return (tf.log(
            tf.cosh(tf.asinh(y) / self.tailweight - self.skewness)
            # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
            # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
            / _sqrtx2p1(y)) - tf.log(self.tailweight))
Ejemplo n.º 11
0
def get_scanner(output_path, maxiter=1000, stationarity_threshold=1e-7):
    """Obtains a basic TensorFlow-based scanner for extremal points."""
    graph = tf.Graph()
    with graph.as_default():
        tf_scalar_evaluator = get_tf_scalar_evaluator()
        t_input = tf.compat.v1.placeholder(tf.float64, shape=[70])
        t_v70 = tf.Variable(initial_value=numpy.zeros([70]),
                            trainable=True,
                            dtype=tf.float64)
        op_assign_input = tf.compat.v1.assign(t_v70, t_input)
        sinfo = tf_scalar_evaluator(tf.cast(t_v70, tf.complex128))
        t_potential = sinfo.potential
        #
        t_stationarity = sinfo.stationarity
        op_opt = contrib_opt.ScipyOptimizerInterface(
            tf.asinh(t_stationarity), options={'maxiter': maxiter})

        #
        def scanner(seed, scale=0.1, num_iterations=1):
            results = collections.defaultdict(list)
            rng = numpy.random.RandomState(seed)
            with graph.as_default():
                with tf.compat.v1.Session() as sess:
                    sess.run([tf.compat.v1.global_variables_initializer()])
                    for n in range(num_iterations):
                        v70 = rng.normal(scale=scale, size=[70])
                        sess.run([op_assign_input], feed_dict={t_input: v70})
                        op_opt.minimize(sess)
                        n_pot, n_stat, n_v70 = sess.run(
                            [t_potential, t_stationarity, t_v70])
                        if n_stat <= stationarity_threshold:
                            results[S_id(n_pot)].append(
                                (n, n_pot, n_stat, list(n_v70)))
                            # Overwrite output at every iteration.
                            if output_path is not None:
                                tmp_out = output_path + '.tmp'
                                with open(tmp_out, 'w') as h:
                                    h.write('n=%4d: p=%.12g s=%.12g\n' %
                                            (n, n_pot, n_stat))
                                    h.write(pprint.pformat(dict(results)))
                                os.rename(tmp_out, output_path)
            return dict(results)

        #
        return scanner
Ejemplo n.º 12
0
def transform_input_tf(flux, input_mode, distmod, is_hsc):
    base = 27.0 if is_hsc else 27.5

    tmp = flux

    if 'scaled' in input_mode:
        m = tf.maximum(tf.reduce_max(tmp), 1.0)
        tmp = tmp / m

    if 'magnitude' in input_mode:
        v = tf.asinh(tmp * 0.5)
        if 'absolute' in input_mode:
            # c = tf.constant(2.5 * np.log10(np.e), dtype=tf.float32)
            # tmp = 27.5 - v * c - distmod
            tmp = base - 2.5 * v / tf.math.log(10.0) - distmod
        else:
            tmp = base - 2.5 * v / tf.math.log(10.0)
    return tmp
Ejemplo n.º 13
0
def hyp_mlr(incoming, before_mlr_dim, num_classes, radius=1.0, reuse=tf.AUTO_REUSE,
            scope='HyperbolicMLR', mlr_geom='hyp'):
    """
    Multi-logistic regression in hyperbolic space.

    :param incoming: incoming tensor with shape [batch_size x before_mlr_dim]
    :param before_mlr_dim: last dimension of the incoming tensor
    :param num_classes: number of output classes
    :param radius: radius of the Poincaré ball
    :param scope: scope for the operation
    :param mlr_geom: TODO
    :return:
    """

    with tf.variable_scope(scope, reuse=reuse):
        A_mlr = []
        P_mlr = []
        logits_list = []
        for cl in range(num_classes):
            A_mlr.append(tf.get_variable('A_mlr' + str(cl),
                                         dtype=tf.float32,
                                         shape=[1, before_mlr_dim],
                                         initializer=tf.contrib.layers.xavier_initializer()))

            P_mlr.append(tf.get_variable('P_mlr' + str(cl),
                                         dtype=tf.float32,
                                         shape=[1, before_mlr_dim],
                                         initializer=tf.constant_initializer(0.0)))

            if mlr_geom == 'eucl':
                logits_list.append(tf.reshape(hyp_ops.tf_dot(-P_mlr[cl] + incoming, A_mlr[cl]), [-1]))

            elif mlr_geom == 'hyp':
                minus_p_plus_x = hyp_ops.tf_mob_add(-P_mlr[cl], incoming, radius)
                norm_a = hyp_ops.tf_norm(A_mlr[cl])
                lambda_px = hyp_ops.tf_lambda_x(minus_p_plus_x, radius)
                px_dot_a = hyp_ops.tf_dot(minus_p_plus_x, tf.nn.l2_normalize(A_mlr[cl]))

                logit = 2. / np.sqrt(radius) * norm_a * tf.asinh(np.sqrt(radius) * px_dot_a * lambda_px)
                logits_list.append(tf.reshape(logit, [-1]))

        logits = tf.stack(logits_list, axis=1)

    return logits
Ejemplo n.º 14
0
 def init_vars(self):
     
     # All calcs done in self.session
     self.session = (tf.get_default_session() if tf.get_default_session() else tf.InteractiveSession())
     # All dynamical equations kept in assignments
     self.assignments_forces = tuple()
     self.assignments_points = tuple()
     
     self.dt0 = 1e-5 #tf.Variable(1e-5, dtype=tf.float32)
     self.dt = tf.Variable(1e-5, dtype=tf.float32)
     self.dt_ = tf.placeholder(tf.float32)
     
     # initialize variables
     self.it_num = 0
     self.t = 0
     self.tv = [] # to save volume evolution
     self.th_mean = mean(self.params['links']['thickness']* self.link_weights )
     self.nrad_mean = mean(self.params['nodes']['radius'])
     self.r_min = float32(min(self.nrad_mean, self.th_mean)/10.)
     self.f_mild = lambda x: self.r_min * tf.asinh( x / self.r_min )
     
     # for assignments
     self.asg = {}
Ejemplo n.º 15
0
    def __init__(
            self,
            pts=[],
            edg=[],
            fixed=False,
            JSON=None,
            keep_paths=False,
            PAIRS=PAIRS,
            POW=POW,
            POWn=POWn,
            POW_SN=POW_SN,  # power extra factors of r_i+r_j in node repulsion
            **kw):
        """ 
        pts: node positions
        edg: edge list
        fixed: if nodes should remain fixed
        JSON: if network should be loaded from a json file (specify file paths or file object)
        keep_paths: whether to keep original link trajectories inside JSON file
        
        kw: 
        links = {'k': <spring constant>, 
                'thickness': <# or list of thicknesses>,
                'amplitude': <of repulsive force>,
                'ce': <cooling exponent for sim. annealing>,
                'Temp0': <initial temperature as a fraction of thickness>,
                'segs': <# of segments on each link>}
        nodes = {'amplitude': <of repulsive force>,
                'radius': <range of repulsive Gaussian force>}
        net.points contians all link points. The link_link interaction matrix
        should now be part of the net.links object, not the individual links. This allows massive
        vectorization of all interactions.
        
        net.links:
            Now we have a single net_links object that contains info of all links.
            All that separates them is the net_links.idx dict which indexes which points in
            net_links.points belong to which link.
            
            net_links also contains all methods for interactions between links.
        """
        tt.tic()
        # All calcs done in self.session
        self.session = tf.InteractiveSession()
        # All dynamical equations kept in assignments
        self.assignments = tuple()
        self.params = {
            'links': {
                'k': 1e1,
                'amplitude': 5e2,
                'thickness': .1,
                'Temp0': .5,
                'ce': 1000,
                'segs': 5,
                'weighted': 0
            },
            'nodes': {
                'amplitude': 5e2,
                'radius': 1.,
                'weighted': 0
            },
        }

        self.gnam = 'E-ELF-sim'

        self.dt0 = 1e-5  #tf.Variable(1e-5, dtype=tf.float32)
        self.dt = tf.Variable(1e-5, dtype=tf.float32)
        self.dt_ = tf.placeholder(tf.float32)

        if JSON:
            self.get_JSON(JSON, kw)

        self.it_num = 0
        kwl = (kw['links'] if 'links' in kw else {})
        kwn = (kw['nodes'] if 'nodes' in kw else {})
        self.params['links'].update(kwl)
        self.params['nodes'].update(kwn)

        self.fixed = fixed
        self.keep_paths = keep_paths
        self.PAIRS = PAIRS
        self.params['POW'] = self.POW = POW
        self.params['POWn'] = self.POWn = POWn
        self.params['POW_SN'] = self.POW_SN = POW_SN

        if not JSON:
            self.pts = array(pts)  #tf.Variable(pts, dtype=tf.float32)
            self.elist = array(
                edg, dtype=int32)[:, :2]  #tf.Variable(edg, dtype=tf.float32)
            self.link_weights = (array(edg)[:, 2]
                                 if self.params['links']['weighted'] else
                                 array([1] * len(self.elist)))
            if 'labels' in self.params['nodes']:
                self.make_link_labels()

        self.it_num = 0
        self.t = 0
        self.tv = []  # to save volume evolution
        self.curr_keys = []
        # initialize variables
        self.next_binning = 0  # it_num of next binning
        self.bin_size = 10  # initial default factor
        #self.links.bin_rand(self.bin_size)
        #self.th_mean = mean(self.links.thickness) #mean(self.params['links']['thickness'])
        self.th_mean = mean(self.params['links']['thickness'] *
                            self.link_weights)
        self.nrad_mean = mean(self.params['nodes']['radius'])
        self.r_min = float32(min(self.nrad_mean, self.th_mean) / 10.)
        self.f_mild = lambda x: self.r_min * tf.asinh(x / self.r_min)

        #self._init(**kw)
        # for assignments
        self.asg = {}

        tt.toc()
        print "Making links...",
        self.net_links(net=self, **self.params['links'])  #(**kwl)
        tt.toc()
        print "Making nodes...",
        self.net_nodes(net=self, **self.params['nodes'])  #(**kwn)

        # we'll have variable thicknesses
        #self.gnam += '-th%.3g-r%.3g'%(self.params['links']['thickness'],self.params['nodes']['radius'])

        tt.toc()
        print "initializing global variables...",
        init = tf.global_variables_initializer()
        self.session.run(init)
        tt.toc()
        print "Initial binning..."
        self.rebin()
        tt.toc()
        print "setup: dt...",
        self.setup_dt()
        tt.toc()
        print "setup: volume...",
        self.vol = tf.reduce_sum(vec_len(self.links.dp))
        #self.vol = tf.reduce_sum([self.links.lens(l) for l in self.links.idx])
        tt.toc()
        print "setup: dynamics...",
        # Define the comp group and iteraion steps
        self.step = tf.group(*self.assignments)
        tt.toc()
        print "setup: dynamics 2...",
        #self.dyn = ['dt']+(['N'] if not self.fixed else [])+['NL','LF_Int']
        self.dyn = (['N'] if not self.fixed else []) + ['NL', 'LF_Int'
                                                        ]  #+ ['dt']
        self.step2 = tf.group(*[self.asg[k][0] for k in self.dyn])
        self.step3 = tf.group(*self.asg['dt'])
        tt.toc()
        print "Done!",
Ejemplo n.º 16
0
    def construct_execution_graph(self):

        # Collect vars separately. Word embeddings are not used here.
        eucl_vars = []
        hyp_vars = []

        ################## word embeddings ###################

        # Initialize word embeddings close to 0, to have average norm equal to word_init_avg_norm.
        maxval = (3. * (word_init_avg_norm**2) / (2. * word_dim))**(1. / 3)
        initializer = tf.random_uniform_initializer(minval=-maxval,
                                                    maxval=maxval,
                                                    dtype=dtype)
        self.embeddings = tf.get_variable(
            'embeddings',
            dtype=dtype,
            shape=[len(self.word_to_id), word_dim],
            initializer=initializer)

        if inputs_geom == 'eucl':
            eucl_vars += [self.embeddings]

        ################## RNNs for sentence embeddings ###################

        if cell_type == 'TFrnn':
            assert sent_geom == 'eucl'
            cell_class = lambda h_dim: tf.contrib.rnn.BasicRNNCell(h_dim)
        elif cell_type == 'TFgru':
            assert sent_geom == 'eucl'
            cell_class = lambda h_dim: tf.contrib.rnn.GRUCell(h_dim)
        elif cell_type == 'TFlstm':
            assert sent_geom == 'eucl'
            cell_class = lambda h_dim: tf.contrib.rnn.BasicLSTMCell(h_dim)
        elif cell_type == 'rnn' and sent_geom == 'eucl':
            cell_class = lambda h_dim: rnn_impl.EuclRNN(h_dim, dtype=dtype)
        elif cell_type == 'gru' and sent_geom == 'eucl':
            cell_class = lambda h_dim: rnn_impl.EuclGRU(h_dim, dtype=dtype)
        elif cell_type == 'rnn' and sent_geom == 'hyp':
            cell_class = lambda h_dim: rnn_impl.HypRNN(
                num_units=h_dim,
                inputs_geom=inputs_geom,
                bias_geom=bias_geom,
                c_val=c_val,
                non_lin=cell_non_lin,
                fix_biases=fix_biases,
                fix_matrices=fix_matrices,
                matrices_init_eye=matrices_init_eye,
                dtype=dtype)
        elif cell_type == 'gru' and sent_geom == 'hyp':
            cell_class = lambda h_dim: rnn_impl.HypGRU(
                num_units=h_dim,
                inputs_geom=inputs_geom,
                bias_geom=bias_geom,
                c_val=c_val,
                non_lin=cell_non_lin,
                fix_biases=fix_biases,
                fix_matrices=fix_matrices,
                matrices_init_eye=matrices_init_eye,
                dtype=dtype)
        else:
            logger.error('Not valid cell type: %s and sent_geom %s' %
                         (cell_type, sent_geom))
            exit()

        # RNN 1
        with tf.variable_scope(cell_type + '1'):
            word_embeddings_1 = tf.nn.embedding_lookup(
                self.embeddings, self.word_ids_1)  # bs x num_w_s1 x dim

            cell_1 = cell_class(hidden_dim)
            initial_state_1 = cell_1.zero_state(batch_size, dtype)
            outputs_1, state_1 = tf.nn.dynamic_rnn(
                cell=cell_1,
                inputs=word_embeddings_1,
                dtype=dtype,
                initial_state=initial_state_1,
                sequence_length=self.num_words_1)
            if cell_type == 'TFlstm':
                self.sent_1 = state_1[1]
            else:
                self.sent_1 = state_1

            sent1_norm = util.tf_norm(self.sent_1)

        # RNN 2
        with tf.variable_scope(cell_type + '2'):
            word_embeddings_2 = tf.nn.embedding_lookup(self.embeddings,
                                                       self.word_ids_2)
            # tf.summary.scalar('word_emb2', tf.reduce_mean(tf.norm(word_embeddings_2, axis=2)))

            cell_2 = cell_class(hidden_dim)
            initial_state_2 = cell_2.zero_state(batch_size, dtype)
            outputs_2, state_2 = tf.nn.dynamic_rnn(
                cell=cell_2,
                inputs=word_embeddings_2,
                dtype=dtype,
                initial_state=initial_state_2,
                sequence_length=self.num_words_2)
            if cell_type == 'TFlstm':
                self.sent_2 = state_2[1]
            else:
                self.sent_2 = state_2

            sent2_norm = util.tf_norm(self.sent_2)

        tf.summary.scalar('RNN/word_emb1',
                          tf.reduce_mean(tf.norm(word_embeddings_1, axis=2)))
        tf.summary.scalar('RNN/sent1', tf.reduce_mean(sent1_norm))
        tf.summary.scalar('RNN/sent2', tf.reduce_mean(sent2_norm))

        eucl_vars += cell_1.eucl_vars + cell_2.eucl_vars
        if sent_geom == 'hyp':
            hyp_vars += cell_1.hyp_vars + cell_2.hyp_vars

        ## Compute d(s1, s2)
        if sent_geom == 'eucl':
            d_sq_s1_s2 = util.tf_euclid_dist_sq(self.sent_1, self.sent_2)
        else:
            d_sq_s1_s2 = util.tf_poinc_dist_sq(self.sent_1,
                                               self.sent_2,
                                               c=c_val)

        ##### Some summaries:

        # For summaries and debugging, we need these:
        pos_labels = tf.reshape(tf.cast(self.label_placeholder, tf.float64),
                                [-1, 1])
        neg_labels = 1. - pos_labels
        weights_pos_labels = pos_labels / tf.reduce_sum(pos_labels)
        weights_neg_labels = neg_labels / tf.reduce_sum(neg_labels)

        ################## first feed forward layer ###################

        # Define variables for the first feed-forward layer: W1 * s1 + W2 * s2 + b + bd * d(s1,s2)
        W_ff_s1 = tf.get_variable(
            'W_ff_s1',
            dtype=dtype,
            shape=[hidden_dim, before_mlr_dim],
            initializer=tf.contrib.layers.xavier_initializer())

        W_ff_s2 = tf.get_variable(
            'W_ff_s2',
            dtype=dtype,
            shape=[hidden_dim, before_mlr_dim],
            initializer=tf.contrib.layers.xavier_initializer())

        b_ff = tf.get_variable('b_ff',
                               dtype=dtype,
                               shape=[1, before_mlr_dim],
                               initializer=tf.constant_initializer(0.0))

        b_ff_d = tf.get_variable('b_ff_d',
                                 dtype=dtype,
                                 shape=[1, before_mlr_dim],
                                 initializer=tf.constant_initializer(0.0))

        eucl_vars += [W_ff_s1, W_ff_s2]
        if ffnn_geom == 'eucl' or bias_geom == 'eucl':
            eucl_vars += [b_ff]
            if additional_features == 'dsq':
                eucl_vars += [b_ff_d]
        else:
            hyp_vars += [b_ff]
            if additional_features == 'dsq':
                hyp_vars += [b_ff_d]

        if ffnn_geom == 'eucl' and sent_geom == 'hyp':  # Sentence embeddings are Euclidean after log, except the proper distance (Eucl or hyp) is kept!
            self.sent_1 = util.tf_log_map_zero(self.sent_1, c_val)
            self.sent_2 = util.tf_log_map_zero(self.sent_2, c_val)

        ####### Build output_ffnn #######
        if ffnn_geom == 'eucl':
            output_ffnn = tf.matmul(self.sent_1, W_ff_s1) + tf.matmul(
                self.sent_2, W_ff_s2) + b_ff
            if additional_features == 'dsq':  # [u, v, d(u,v)^2]
                output_ffnn = output_ffnn + d_sq_s1_s2 * b_ff_d

        else:
            assert sent_geom == 'hyp'
            ffnn_s1 = util.tf_mob_mat_mul(W_ff_s1, self.sent_1, c_val)
            ffnn_s2 = util.tf_mob_mat_mul(W_ff_s2, self.sent_2, c_val)
            output_ffnn = util.tf_mob_add(ffnn_s1, ffnn_s2, c_val)

            hyp_b_ff = b_ff
            if bias_geom == 'eucl':
                hyp_b_ff = util.tf_exp_map_zero(b_ff, c_val)
            output_ffnn = util.tf_mob_add(output_ffnn, hyp_b_ff, c_val)

            if additional_features == 'dsq':  # [u, v, d(u,v)^2]
                hyp_b_ff_d = b_ff_d
                if bias_geom == 'eucl':
                    hyp_b_ff_d = util.tf_exp_map_zero(b_ff_d, c_val)

                output_ffnn = util.tf_mob_add(
                    output_ffnn,
                    util.tf_mob_scalar_mul(d_sq_s1_s2, hyp_b_ff_d, c_val),
                    c_val)

        if ffnn_geom == 'eucl':
            output_ffnn = util.tf_eucl_non_lin(output_ffnn,
                                               non_lin=ffnn_non_lin)
        else:
            output_ffnn = util.tf_hyp_non_lin(output_ffnn,
                                              non_lin=ffnn_non_lin,
                                              hyp_output=(mlr_geom == 'hyp'
                                                          and dropout == 1.0),
                                              c=c_val)
        # Mobius dropout
        if dropout < 1.0:
            # If we are here, then output_ffnn should be Euclidean.
            output_ffnn = tf.nn.dropout(output_ffnn,
                                        keep_prob=self.dropout_placeholder)
            if (mlr_geom == 'hyp'):
                output_ffnn = util.tf_exp_map_zero(output_ffnn, c_val)

        ################## MLR ###################
        # output_ffnn is batch_size x before_mlr_dim

        A_mlr = []
        P_mlr = []
        logits_list = []
        for cl in range(num_classes):
            A_mlr.append(
                tf.get_variable(
                    'A_mlr' + str(cl),
                    dtype=dtype,
                    shape=[1, before_mlr_dim],
                    initializer=tf.contrib.layers.xavier_initializer()))
            eucl_vars += [A_mlr[cl]]

            P_mlr.append(
                tf.get_variable('P_mlr' + str(cl),
                                dtype=dtype,
                                shape=[1, before_mlr_dim],
                                initializer=tf.constant_initializer(0.0)))

            if mlr_geom == 'eucl':
                eucl_vars += [P_mlr[cl]]
                logits_list.append(
                    tf.reshape(
                        util.tf_dot(-P_mlr[cl] + output_ffnn, A_mlr[cl]),
                        [-1]))

            elif mlr_geom == 'hyp':
                hyp_vars += [P_mlr[cl]]
                minus_p_plus_x = util.tf_mob_add(-P_mlr[cl], output_ffnn,
                                                 c_val)
                norm_a = util.tf_norm(A_mlr[cl])
                lambda_px = util.tf_lambda_x(minus_p_plus_x, c_val)
                px_dot_a = util.tf_dot(minus_p_plus_x,
                                       tf.nn.l2_normalize(A_mlr[cl]))
                logit = 2. / np.sqrt(c_val) * norm_a * tf.asinh(
                    np.sqrt(c_val) * px_dot_a * lambda_px)
                logits_list.append(tf.reshape(logit, [-1]))

        self.logits = tf.stack(logits_list, axis=1)

        self.argmax_idx = tf.argmax(self.logits, axis=1, output_type=tf.int32)

        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.label_placeholder, logits=self.logits))
        tf.summary.scalar('classif/unreg_loss', self.loss)

        if reg_beta > 0.0:
            assert num_classes == 2
            distance_regularizer = tf.reduce_mean(
                (tf.cast(self.label_placeholder, dtype=dtype) - 0.5) *
                d_sq_s1_s2)

            self.loss = self.loss + reg_beta * distance_regularizer

        self.acc = tf.reduce_mean(
            tf.to_float(tf.equal(self.argmax_idx, self.label_placeholder)))
        tf.summary.scalar('classif/accuracy', self.acc)

        ######################################## OPTIMIZATION ######################################
        all_updates_ops = []

        ###### Update Euclidean parameters using Adam.
        optimizer_euclidean_params = tf.train.AdamOptimizer(learning_rate=1e-3)
        eucl_grads = optimizer_euclidean_params.compute_gradients(
            self.loss, eucl_vars)
        capped_eucl_gvs = [(tf.clip_by_norm(grad, 1.), var)
                           for grad, var in eucl_grads]  ###### Clip gradients
        all_updates_ops.append(
            optimizer_euclidean_params.apply_gradients(capped_eucl_gvs))

        ###### Update Hyperbolic parameters, i.e. word embeddings and some biases in our case.
        def rsgd(v, riemannian_g, learning_rate):
            if hyp_opt == 'rsgd':
                return util.tf_exp_map_x(v,
                                         -self.burn_in_factor * learning_rate *
                                         riemannian_g,
                                         c=c_val)
            else:
                # Use approximate RSGD based on a simple retraction.
                updated_v = v - self.burn_in_factor * learning_rate * riemannian_g
                # Projection op after SGD update. Need to make sure embeddings are inside the unit ball.
                return util.tf_project_hyp_vecs(updated_v, c_val)

        if inputs_geom == 'hyp':
            grads_and_indices_hyp_words = tf.gradients(self.loss,
                                                       self.embeddings)
            grads_hyp_words = grads_and_indices_hyp_words[0].values
            repeating_indices = grads_and_indices_hyp_words[0].indices
            unique_indices, idx_in_repeating_indices = tf.unique(
                repeating_indices)
            agg_gradients = tf.unsorted_segment_sum(
                grads_hyp_words, idx_in_repeating_indices,
                tf.shape(unique_indices)[0])

            agg_gradients = tf.clip_by_norm(agg_gradients,
                                            1.)  ######## Clip gradients
            unique_word_emb = tf.nn.embedding_lookup(
                self.embeddings, unique_indices)  # no repetitions here

            riemannian_rescaling_factor = util.riemannian_gradient_c(
                unique_word_emb, c=c_val)
            rescaled_gradient = riemannian_rescaling_factor * agg_gradients

            all_updates_ops.append(
                tf.scatter_update(
                    self.embeddings, unique_indices,
                    rsgd(unique_word_emb, rescaled_gradient,
                         lr_words)))  # Updated rarely

        if len(hyp_vars) > 0:
            hyp_grads = tf.gradients(self.loss, hyp_vars)
            capped_hyp_grads = [
                tf.clip_by_norm(grad, 1.) for grad in hyp_grads
            ]  ###### Clip gradients

            for i in range(len(hyp_vars)):
                riemannian_rescaling_factor = util.riemannian_gradient_c(
                    hyp_vars[i], c=c_val)
                rescaled_gradient = riemannian_rescaling_factor * capped_hyp_grads[
                    i]
                all_updates_ops.append(
                    tf.assign(hyp_vars[i],
                              rsgd(hyp_vars[i], rescaled_gradient,
                                   lr_ffnn)))  # Updated frequently

        self.all_optimizer_var_updates_op = tf.group(*all_updates_ops)

        self.summary_merged = tf.summary.merge_all()
        self.test_summary_writer = tf.summary.FileWriter(
            os.path.join(root_path, 'tb_28may/' + tensorboard_name + '/'))
Ejemplo n.º 17
0
    def __call__(self, x, is_training=False, filters=None):

        if not filters is None:
            self.output_size = filters

        with tf.variable_scope(self.name):

            # kernel shape
            k = [ self.kernel, self.kernel, x.shape[-1], self.output_size ] if type(self.kernel) is int \
                else [ self.kernel[0], self.kernel[1], x.shape[-1], self.output_size ]

            # kernel
            gt = tf.get_variable(
                "_w_gt_2d",
                k,
                # initializer = tf.contrib.layers.xavier_initializer(),
                initializer=tf.truncated_normal_initializer(stddev=0.02),
                trainable=is_training)

            wt = tf.get_variable(
                "_w_wt_2d",
                k,
                # initializer = tf.contrib.layers.xavier_initializer(),
                initializer=tf.truncated_normal_initializer(stddev=0.02),
                trainable=is_training)

            mt = tf.get_variable(
                "_w_mt_2d",
                k,
                # initializer = tf.contrib.layers.xavier_initializer(),
                initializer=tf.truncated_normal_initializer(stddev=0.02),
                trainable=is_training)

            strides = ( 1, self.stride, self.stride, 1 ) if type(self.stride) is int \
                    else ( 1, self.stride[0], self.stride[1], 1 )

            with tf.variable_scope('nac_w'):
                w = tf.multiply(tf.tanh(wt), tf.sigmoid(mt))

            with tf.variable_scope('simple_nac'):
                a = tf.nn.conv2d(x, w, strides, padding=self.padding)

            with tf.variable_scope('complex_nac'):
                # m = tf.exp( tf.nn.conv2d( tf.log( tf.abs( x ) + 1e-10 ), w, strides, padding = self.padding ) )
                m = tf.sinh(
                    tf.nn.conv2d(tf.asinh(x), w, strides,
                                 padding=self.padding))

            with tf.variable_scope('math_gate'):
                gc = tf.nn.sigmoid(
                    tf.nn.conv2d(x, gt, strides, padding=self.padding))

            with tf.variable_scope('result'):
                x = (gc * a) + ((1 - gc) * m)

            # if self.bias:
            #     b = tf.compat.v1.get_variable( '_b_',
            #                                 [ 1, 1, 1, self.output_size ],
            #                                 initializer = tf.constant_initializer( 0.0 ),
            #                                 trainable = is_training )
            #     x += b

            # batch normalization
            if self.bn:
                x = bn(x, is_training=is_training, name="_bn")

            # activation
            if not self.act is None:
                x = self.act(x, name="_act")

            # setup dropout
            if self.dropout > 0 and is_training:
                x = dropout(x, self.dropout, name="_dp")

        print(x)
        return x
Ejemplo n.º 18
0
def arcsinh(x, alpha=1):
    return tf.asinh(x * alpha) / alpha
Ejemplo n.º 19
0
def arsinh(x):
    result = tf.asinh(x)
    return result
Ejemplo n.º 20
0
 def _inverse(self, y):
     return tf.sinh(tf.asinh(y) / self.tailweight - self.skewness)
Ejemplo n.º 21
0
#numpy implementation
numpy_per = []
tf_per = []

for array in array_sizes:
    test_matrix = np.random.random((array, array, 2))

    start = time.time()
    test_result = np.arcsinh(test_matrix)
    end = time.time()

    numpy_per.append(end - start)

    tf_input = tf.constant(test_matrix, dtype=tf.float64)

    tf_start = time.time()
    tf_result = tf.asinh(tf_input)
    tf_end = time.time()

    tf_per.append(tf_end - tf_start)

for i in range(len(array_sizes)):
    print('Size:', array_sizes[i], ' numpy performance:',
          "%.5f" % numpy_per[i], ' tensorflow performance:',
          "%.5f" % tf_per[i])
    #print('tensorflow performance :', tf_per)
plt.figure(figsize=(5, 5))
plt.plot(array_sizes, numpy_per, color='green', label='numpy')
plt.plot(array_sizes, tf_per, color='blue', label='tensorflow')
plt.title('Tensorflow vs Numpy : Asinh')
plt.savefig('Asinh.png')
Ejemplo n.º 22
0
def reashu(t):
    return nn.where(t < 0, 0, asinh(t))
Ejemplo n.º 23
0
 def eta(self):
     return tf.asinh(self.pz / self.pt())
Ejemplo n.º 24
0
 def cn(x):
     """compressive nonlinearity."""
     return tf.asinh(4. * x) / 4.
Ejemplo n.º 25
0
    def create(labels, embeddings, **kwargs):

        word_vec = embeddings['word']
        char_vec = embeddings['char']
        model = HyperbolicRNNModel()
        model.sess = kwargs.get('sess', tf.Session())

        model.mxlen = kwargs.get('maxs', 100)
        model.maxw = kwargs.get('maxw', 100)

        hsz = int(kwargs['hsz'])
        pdrop = kwargs.get('dropout', 0.5)
        pdrop_in = kwargs.get('dropin', 0.0)
        rnntype = kwargs.get('rnntype', 'blstm')
        print(rnntype)
        layers = kwargs.get('layers', 1)
        model.labels = labels
        model.crf = bool(kwargs.get('crf', False))
        model.crf_mask = bool(kwargs.get('crf_mask', False))
        model.span_type = kwargs.get('span_type')
        model.proj = bool(kwargs.get('proj', False))
        model.feed_input = bool(kwargs.get('feed_input', False))
        model.activation_type = kwargs.get('activation', 'tanh')

        char_dsz = char_vec.dsz
        nc = len(labels)
        model.x = kwargs.get(
            'x', tf.placeholder(tf.int32, [None, model.mxlen], name="x"))
        model.xch = kwargs.get(
            'xch',
            tf.placeholder(tf.int32, [None, model.mxlen, model.maxw],
                           name="xch"))
        model.y = kwargs.get(
            'y', tf.placeholder(tf.int64, [None, model.mxlen], name="y"))
        model.lengths = kwargs.get(
            'lengths', tf.placeholder(tf.int32, [None], name="lengths"))
        model.pkeep = kwargs.get('pkeep',
                                 tf.placeholder(tf.float64, name="pkeep"))
        model.pdrop_value = pdrop
        model.pdropin_value = pdrop_in
        model.word_vocab = {}

        inputs_geom = kwargs.get("inputs_geom", "hyp")
        bias_geom = kwargs.get("bias_geom", "hyp")
        ffnn_geom = kwargs.get("ffnn_geom", "hyp")
        sent_geom = kwargs.get("sent_geom", "hyp")
        mlr_geom = kwargs.get("mlr_geom", "hyp")
        c_val = kwargs.get("c_val", 1.0)
        cell_non_lin = kwargs.get("cell_non_lin",
                                  "id")  #"id/relu/tanh/sigmoid."
        ffnn_non_lin = kwargs.get("ffnn_non_lin", "id")
        cell_type = kwargs.get("cell_type", 'rnn')
        lr_words = kwargs.get("lw_words", 0.01)
        lr_ffnn = kwargs.get("lr_ffnn", 0.01)
        optimizer = kwargs.get("optimizer", "rsgd")
        eucl_clip = kwargs.get("eucl_clip", 1.0)
        hyp_clip = kwargs.get("hyp_clip", 1.0)
        before_mlr_dim = kwargs.get("before_mlr_dim", nc)
        learn_softmax = kwargs.get("learn_softmax", True)
        batch_sz = 10

        print("C_val:", c_val)

        eucl_vars = []
        hyp_vars = []

        if word_vec is not None:
            model.word_vocab = word_vec.vocab

        # model.char_vocab = char_vec.vocab
        seed = np.random.randint(10e8)
        if word_vec is not None:
            # word_embeddings = embed(model.x, len(word_vec.vocab), word_vec.dsz,
            #                         initializer=tf.constant_initializer(word_vec.weights, dtype=tf.float32, verify_shape=True))
            with tf.variable_scope("LUT"):
                W = tf.get_variable("W",
                                    dtype=tf.float64,
                                    initializer=tf.constant_initializer(
                                        word_vec.weights,
                                        dtype=tf.float64,
                                        verify_shape=True),
                                    shape=[len(word_vec.vocab), word_vec.dsz],
                                    trainable=True)
                # e0 = tf.scatter_update(W, tf.constant(0, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, word_vec.dsz]))
                # with tf.control_dependencies([W]):
                word_embeddings = tf.nn.embedding_lookup(W, model.x)

        # Wch = tf.Variable(tf.constant(char_vec.weights, dtype=tf.float32), name="Wch")
        # ce0 = tf.scatter_update(Wch, tf.constant(0, dtype=tf.int32, shape=[1]), tf.zeros(shape=[1, char_dsz]))

        # word_char, _ = pool_chars(model.xch, Wch, ce0, char_dsz, **kwargs)
        # joint = word_char if word_vec is None else tf.concat(values=[word_embeddings, word_char], axis=2)
        # word_embeddings = tf.Print(word_embeddings, [word_embeddings], message="embeddings")

        embedseq = word_embeddings

        # embedseq = tf.nn.dropout(word_embeddings, model.pkeep)
        # if (mlr_geom == 'hyp'):
        #     embedseq = util.tf_exp_map_zero(embedseq, c_val)

        if cell_type == 'rnn' and sent_geom == 'eucl':
            cell_class = lambda h_dim: tf.contrib.rnn.BasicRNNCell(h_dim)
        if cell_type == 'rnn' and sent_geom == 'hyp':
            cell_class = lambda h_dim, layer: LorentzRNN(
                num_units=h_dim,
                inputs_geom=inputs_geom,
                bias_geom=bias_geom,
                c_val=c_val,
                non_lin=cell_non_lin,
                fix_biases=False,
                fix_matrices=False,
                matrices_init_eye=False,
                dtype=tf.float64,
                layer=layer)
        # elif cell_type == 'gru' and sent_geom == 'hyp':
        #     cell_class = lambda h_dim, layer: rnn_impl.HypGRU(num_units=h_dim,
        #                                                inputs_geom=inputs_geom,
        #                                                bias_geom=bias_geom,
        #                                                c_val=c_val,
        #                                                non_lin=cell_non_lin,
        #                                                fix_biases=False,
        #                                                fix_matrices=False,
        #                                                matrices_init_eye=False,
        #                                                dtype=tf.float64,
        #                                                layer=layer)
        # elif cell_type == 'lstm' and sent_geom == 'hyp':
        #     cell_class = lambda h_dim, layer: rnn_impl.HypLSTM(num_units=h_dim,
        #                                                inputs_geom=inputs_geom,
        #                                                bias_geom=bias_geom,
        #                                                c_val=c_val,
        #                                                non_lin=cell_non_lin,
        #                                                fix_biases=False,
        #                                                fix_matrices=False,
        #                                                matrices_init_eye=False,
        #                                                dtype=tf.float64,
        #                                                layer=layer)
        rnnout = embedseq
        for i in range(layers):
            with tf.variable_scope('rnnLayers', reuse=tf.AUTO_REUSE):
                if rnntype == 'rnn':
                    cell = cell_class(hsz, i)
                    initial_state = cell.zero_state(batch_sz, tf.float64)

                    # rnnout = tf.contrib.rnn.DropoutWrapper(cell)
                    rnnout, state = tf.nn.dynamic_rnn(cell,
                                                    rnnout, \
                                                    sequence_length=model.lengths,
                                                    initial_state=initial_state,
                                                    dtype=tf.float64)

                    eucl_vars += cell.eucl_vars
                    if sent_geom == 'hyp':
                        hyp_vars += cell.hyp_vars

                elif rnntype == 'bi':
                    cell_1 = cell_class(hsz, i)
                    cell_2 = cell_class(hsz, i)

                    init_fw = cell_1.zero_state(batch_sz, tf.float64)
                    init_bw = cell_2.zero_state(batch_sz, tf.float64)

                    rnnout, state = tf.nn.bidirectional_dynamic_rnn(
                        cell_1,
                        cell_2,
                        rnnout,
                        initial_state_fw=init_fw,
                        initial_state_bw=init_bw,
                        sequence_length=model.lengths,
                        dtype=tf.float64)
                    rnnout = tf.concat(axis=2, values=rnnout)

                    eucl_vars += cell_1.eucl_vars + cell_2.eucl_vars
                    if sent_geom == 'hyp':
                        hyp_vars += cell_1.hyp_vars + cell_2.hyp_vars

                else:
                    cell = cell_class(hsz)

                    # rnnout = tf.contrib.rnn.DropoutWrapper(cell)
                    rnnout, state = tf.nn.dynamic_rnn(
                        cell,
                        rnnout,
                        sequence_length=model.lengths,
                        dtype=tf.float64)

                    eucl_vars += cell.eucl_vars
                    if sent_geom == 'hyp':
                        hyp_vars += cell.hyp_vars

        # rnnout = tf.Print(rnnout, [rnnout], message="rnnout")

        tf.summary.histogram('RNN/rnnout', rnnout)

        # # Converts seq to tensor, back to (B,T,W)
        hout = rnnout.get_shape()[-1]
        print(rnnout.get_shape())
        # # Flatten from [B x T x H] - > [BT x H]
        with tf.variable_scope("fc"):
            rnnout_bt_x_h = tf.reshape(rnnout, [-1, hout])
            # rnnout_bt_x_h = tf.Print(rnnout_bt_x_h, [rnnout_bt_x_h], message="rnnout_bt_x_h")

            ################## first feed forward layer ###################

            # Define variables for the first feed-forward layer: W1 * s1 + W2 * s2 + b + bd * d(s1,s2)
            W_ff_s1 = tf.get_variable(
                'W_ff_s1',
                dtype=tf.float64,
                shape=[hout,
                       before_mlr_dim],  # 400, 20 -- 20 number of classes
                initializer=tf.contrib.layers.xavier_initializer(
                    dtype=tf.float64))

            tf.summary.histogram("W_ff_s1", W_ff_s1)

            # b_ff = tf.get_variable('b_ff',
            #                        dtype=tf.float64,
            #                        shape=[1, before_mlr_dim],
            #                        initializer=tf.constant_initializer(0.0))

            # # TODO(MB): ffn should be in hyperbolic space, no?
            eucl_vars += [W_ff_s1]

            # hyp_vars += [b_ff]

            # #### treat W as an update in tangent space
            # # ffnn_s1 = rnnout_bt_x_h + W_ff_s1 + b_ff
            # # cheat for now. i don't know how to multiply these together first
            ffnn_s1 = lorentz.tf_mink_dot_matrix(rnnout_bt_x_h,
                                                 tf.transpose(W_ff_s1))
            # ffnn_s1 = W_ff_s1 +  dotp * rnnout_bt_x_h
            # #### embed back into minkowski space
            # ffnn_s1 = lorentz.tf_exp_map_x(rnnout_bt_x_h, ffnn_s1, c_val)

            # print('ffnn', ffnn_s1.get_shape())
            # tf.summary.histogram("ffnn_s1", ffnn_s1)

            output_ffnn = util.tf_hyp_non_lin(
                ffnn_s1,
                non_lin=ffnn_non_lin,
                hyp_output=True,  #(mlr_geom == 'hyp'),
                c=c_val)
        tf.summary.histogram("output_ffnn", output_ffnn)
        # output_ffnn = tf.Print(output_ffnn, [output_ffnn], message="output_ffnn")
        # output_ffnn = dotp

        # ################## MLR ###################
        # # output_ffnn is batch_size x before_mlr_dim
        if not learn_softmax:
            probs = output_ffnn
        else:
            print("learning softmax in hyperbolic space")
            A_mlr = []
            P_mlr = []
            logits_list = []
            dtype = tf.float64

            print('output shape', output_ffnn.get_shape())

            with tf.variable_scope("hyper_softmax"):
                for cl in range(nc):
                    with tf.variable_scope('mlp'):
                        A_mlr.append(
                            tf.get_variable('A_mlr' + str(cl),
                                            dtype=dtype,
                                            shape=[1, before_mlr_dim],
                                            initializer=tf.contrib.layers.
                                            xavier_initializer()))
                        eucl_vars += [A_mlr[cl]]

                        P_mlr.append(
                            tf.get_variable(
                                'P_mlr' + str(cl),
                                dtype=dtype,
                                shape=[1, before_mlr_dim],
                                initializer=tf.constant_initializer(0.0)))

                        if mlr_geom == 'eucl':
                            eucl_vars += [P_mlr[cl]]
                            logits_list.append(
                                tf.reshape(
                                    util.tf_dot(-P_mlr[cl] + output_ffnn,
                                                A_mlr[cl]), [-1]))

                        elif mlr_geom == 'hyp':
                            hyp_vars += [P_mlr[cl]]
                            minus_p_plus_x = util.tf_mob_add(
                                -P_mlr[cl], output_ffnn, c_val)
                            norm_a = util.tf_norm(A_mlr[cl])
                            lambda_px = util.tf_lambda_x(minus_p_plus_x, c_val)
                            # blow-- P+X == [10, 20] tensor. A_mlr is also [10,20]. px_dot_a is [10, 1]
                            px_dot_a = util.tf_dot(
                                minus_p_plus_x, tf.nn.l2_normalize(A_mlr[cl]))
                            logit = 2. / np.sqrt(c_val) * norm_a * tf.asinh(
                                np.sqrt(c_val) * px_dot_a * lambda_px)

                            logits_list.append(logit)

        probs = tf.stack(logits_list, axis=1)

        print("probs shape", probs.get_shape())
        model.probs = tf.reshape(probs, [-1, model.mxlen, nc])
        print("reshaped probs", model.probs.get_shape())
        tf.summary.histogram("probs", model.probs)

        model.best = tf.argmax(model.probs, 2)

        model.loss = model.create_loss()

        # model.best = tf.argmax(model.probs, axis=1, output_type=tf.int32)
        #     ######################################## OPTIMIZATION ######################################
        all_updates_ops = []

        #     ###### Update Euclidean parameters using Adam.
        optimizer_euclidean_params = tf.train.AdamOptimizer(learning_rate=1e-3)
        eucl_grads = optimizer_euclidean_params.compute_gradients(
            model.loss, eucl_vars)
        capped_eucl_gvs = [(tf.clip_by_norm(grad, eucl_clip), var)
                           for grad, var in eucl_grads]  ###### Clip gradients
        all_updates_ops.append(
            optimizer_euclidean_params.apply_gradients(capped_eucl_gvs))

        ###### Update Hyperbolic parameters, i.e. word embeddings and some biases in our case.
        def rsgd(v, riemannian_g, learning_rate):
            if optimizer == 'rsgd':
                return lorentz.tf_exp_map_x(v,
                                            -model.burn_in_factor *
                                            learning_rate * riemannian_g,
                                            c=c_val)
            else:
                # Use approximate RSGD based on a simple retraction.
                updated_v = v - model.burn_in_factor * learning_rate * riemannian_g
                # Projection op after SGD update. Need to make sure embeddings are inside the unit ball.
                return util.tf_project_hyp_vecs(updated_v, c_val)

        if inputs_geom == 'hyp':
            grads_and_indices_hyp_words = tf.gradients(model.loss, W)
            grads_hyp_words = grads_and_indices_hyp_words[0].values
            # grads_hyp_words = tf.Print(grads_hyp_words, [grads_hyp_words], message="grads_hyp_words")

            repeating_indices = grads_and_indices_hyp_words[0].indices

            unique_indices, idx_in_repeating_indices = tf.unique(
                repeating_indices)
            # unique_indices = tf.Print(unique_indices, [unique_indices], message="unique_indices")
            # idx_in_repeating_indices = tf.Print(idx_in_repeating_indices, [idx_in_repeating_indices], message="idx_in_repeating_indices")

            agg_gradients = tf.unsorted_segment_sum(
                grads_hyp_words, idx_in_repeating_indices,
                tf.shape(unique_indices)[0])

            agg_gradients = tf.clip_by_norm(agg_gradients,
                                            hyp_clip)  ######## Clip gradients
            # agg_gradients = tf.Print(agg_gradients, [agg_gradients], message="agg_gradients")

            unique_word_emb = tf.nn.embedding_lookup(
                W, unique_indices)  # no repetitions here
            # unique_word_emb = tf.Print(unique_word_emb, [unique_word_emb], message="unique_word_emb")

            riemannian_rescaling_factor = util.riemannian_gradient_c(
                unique_word_emb, c=c_val)
            # riemannian_rescaling_factor = tf.Print(riemannian_rescaling_factor, [riemannian_rescaling_factor], message="rescl factor")
            rescaled_gradient = riemannian_rescaling_factor * agg_gradients
            # rescaled_gradient = tf.Print(rescaled_gradient, [rescaled_gradient], message="rescl gradient")
            all_updates_ops.append(
                tf.scatter_update(
                    W, unique_indices,
                    rsgd(unique_word_emb, rescaled_gradient,
                         lr_words)))  # Updated rarely

        if len(hyp_vars) > 0:
            hyp_grads = tf.gradients(model.loss, hyp_vars)
            capped_hyp_grads = [
                tf.clip_by_norm(grad, hyp_clip) for grad in hyp_grads
            ]  ###### Clip gradients

            for i in range(len(hyp_vars)):
                riemannian_rescaling_factor = util.riemannian_gradient_c(
                    hyp_vars[i], c=c_val)
                rescaled_gradient = riemannian_rescaling_factor * capped_hyp_grads[
                    i]
                all_updates_ops.append(
                    tf.assign(hyp_vars[i],
                              rsgd(hyp_vars[i], rescaled_gradient,
                                   lr_ffnn)))  # Updated frequently

        model.all_optimizer_var_updates_op = tf.group(*all_updates_ops)
        print("all ops: ", model.all_optimizer_var_updates_op)

        model.summary_merged = tf.summary.merge_all()

        model.test_summary_writer = tf.summary.FileWriter(
            './runs/hyper/' + str(os.getpid()), model.sess.graph)

        return model
Ejemplo n.º 26
0
 def _forward(self, x):
   return tf.sinh((tf.asinh(x) + self.skewness) * self.tailweight)
Ejemplo n.º 27
0
def stash_old(x, lamb=1.1613326990732873, alpha=0.6521334159737763):
    return _tf.where(x <= 0.0, 2.0 * _tf.tanh(alpha * x),
                     lamb * _tf.asinh(2.0 * alpha * x / lamb))
Ejemplo n.º 28
0
 def _forward(self, x):
     return tf.sinh((tf.asinh(x) + self.skewness) * self.tailweight)
Ejemplo n.º 29
0
def stash(x, lamb=1.1613855392326946, alpha=0.6520042387583171):
    return _tf.where(x <= 0.0, 2.0 * _tf.tanh(alpha * x),
                     lamb * _tf.asinh(2.0 * alpha * x / lamb))
Ejemplo n.º 30
0
# network parameter : biases #
ml_p_b1 = tf.Variable( tf.zeros( [ ml_h_hidden ] ) )
ml_p_b2 = tf.Variable( tf.zeros( [ ml_h_input  ] ) )

##
##   script - network topology
##

# network topology : input layer #
ml_g_input = tf.placeholder( tf.float32, [ None, ml_h_input ] )

# network topology : hidden layer #
ml_g_hidden = tf.nn.relu( tf.add( tf.matmul( ml_g_input, ml_p_w1 ), ml_p_b1 ) )

# network topology : output layer #
ml_g_output = tf.asinh( tf.add( tf.matmul( ml_g_hidden, ml_p_w2 ), ml_p_b2 ) )

##
##   script - network sub-topology
##

# network topology : input layer #
ml_s1_input = tf.placeholder( tf.float32, [ None, ml_h_input ] )

# network topology : output layer #
ml_s1_output =  tf.nn.relu( tf.add( tf.matmul( ml_s1_input, ml_p_w1 ),ml_p_b1 ) )

# network topology : input layer #
ml_s2_input = tf.placeholder( tf.float32, [ None, ml_h_hidden ] )

# network topology : output layer #
Ejemplo n.º 31
0
import tensorflow as tf

sess = tf.InteractiveSession()

t = tf.constant([1.8, 2.2])

target = tf.asinh(t).eval()

print target
Ejemplo n.º 32
0
  def call(self, inputs):
    self.call_weights()
    if (not isinstance(inputs, ed.RandomVariable) and
        not isinstance(self.kernel, ed.RandomVariable) and
        not isinstance(self.bias, ed.RandomVariable)):
      return super(DenseDVI, self).call(inputs)
    inputs_mean, inputs_variance, inputs_covariance = get_moments(inputs)
    kernel_mean, kernel_variance, _ = get_moments(self.kernel)
    if self.use_bias:
      bias_mean, _, bias_covariance = get_moments(self.bias)

    # E[outputs] = E[inputs] * E[kernel] + E[bias]
    mean = tf.tensordot(inputs_mean, kernel_mean, [[-1], [0]])
    if self.use_bias:
      mean = tf.nn.bias_add(mean, bias_mean)

    # Cov = E[inputs**2] Cov(kernel) + E[W]^T Cov(inputs) E[W] + Cov(bias)
    # For first term, assume Cov(kernel) = 0 on off-diagonals so we only
    # compute diagonal term.
    covariance_diag = tf.tensordot(inputs_variance + inputs_mean**2,
                                   kernel_variance, [[-1], [0]])
    # Compute quadratic form E[W]^T Cov E[W] from right-to-left. First is
    #  [..., features, features], [features, units] -> [..., features, units].
    cov_w = tf.tensordot(inputs_covariance, kernel_mean, [[-1], [0]])
    # Next is [..., features, units], [features, units] -> [..., units, units].
    w_cov_w = tf.tensordot(cov_w, kernel_mean, [[-2], [0]])
    covariance = w_cov_w
    if self.use_bias:
      covariance += bias_covariance
    covariance = tf.matrix_set_diag(
        covariance, tf.matrix_diag_part(covariance) + covariance_diag)

    if self.activation in (tf.keras.activations.relu, tf.nn.relu):
      # Compute activation's moments with variable names from Wu et al. (2018).
      variance = tf.matrix_diag_part(covariance)
      scale = tf.sqrt(variance)
      mu = mean / (scale + tf.keras.backend.epsilon())
      mean = scale * soft_relu(mu)

      pairwise_variances = (tf.expand_dims(variance, -1) *
                            tf.expand_dims(variance, -2))  # [..., units, units]
      rho = covariance / tf.sqrt(pairwise_variances +
                                 tf.keras.backend.epsilon())
      rho = tf.clip_by_value(rho,
                             -1. / (1. + tf.keras.backend.epsilon()),
                             1. / (1. + tf.keras.backend.epsilon()))
      s = covariance / (rho + tf.keras.backend.epsilon())
      mu1 = tf.expand_dims(mu, -1)  # [..., units, 1]
      mu2 = tf.matrix_transpose(mu1)  # [..., 1, units]
      a = (soft_relu(mu1) * soft_relu(mu2) +
           rho * tfp.distributions.Normal(0., 1.).cdf(mu1) *
           tfp.distributions.Normal(0., 1.).cdf(mu2))
      gh = tf.asinh(rho)
      bar_rho = tf.sqrt(1. - rho**2)
      gr = gh + rho / (1. + bar_rho)
      # Include numerically stable versions of gr and rho when multiplying or
      # dividing them. The sign of gr*rho and rho/gr is always positive.
      safe_gr = tf.abs(gr) + 0.5 * tf.keras.backend.epsilon()
      safe_rho = tf.abs(rho) + tf.keras.backend.epsilon()
      exp_negative_q = gr / (2. * math.pi) * tf.exp(
          -safe_rho / (2. * safe_gr * (1 + bar_rho)) +
          (gh - rho) / (safe_gr * safe_rho) * mu1 * mu2)
      covariance = s * (a + exp_negative_q)
    elif self.activation not in (tf.keras.activations.linear, None):
      raise NotImplementedError('Activation is {}. Deterministic variational '
                                'inference is only available if activation is '
                                'ReLU or None.'.format(self.activation))

    return ed.MultivariateNormalFullCovariance(mean, covariance)
Ejemplo n.º 33
0
 def _inverse(self, y):
   return tf.sinh(tf.asinh(y) / self.tailweight - self.skewness)
Ejemplo n.º 34
0
def ashlu(t):
    return nn.where(t < 0, asinh(t), t)
Ejemplo n.º 35
0
def self_normalizing_asinh(x):
    return 1.256734802399369 * tf.asinh(x)