Example #1
0
File: gat.py Project: qss2012/RGAT
    def setup(self,
              layer_no,
              input_feat_mat,
              hid_units,
              nb_features,
              nb_nodes,
              training,
              attn_drop,
              ffd_drop,
              adj_mat,
              n_heads,
              activation=tf.nn.elu,
              residual=False,
              concat=True):

        # self.W_heads =
        print()
        # self.a_heads = [tf.get_variable("Feedforward_Layer"+str(layer_no)+str(n),[2*hid_units,1],dtype=tf.float32) for n in range(n_heads)]
        print(layer_no)
        # tf.summary.histogram(str(layer_no), self.W_heads)

        self.W = [
            utils.weights_creation(layer_no=layer_no,
                                   i=i,
                                   nb_features=nb_features,
                                   hid_units=hid_units,
                                   n_heads=n_heads)
            for i in range(len(adj_mat))
        ]

        tf.summary.histogram(str(layer_no) + "relation", self.W)

        # self.a = [self.a_heads for _ in range(len(adj_mat))]

        self.H_all_rel = []
        start = time.time()
        zero = tf.constant(0, dtype=tf.float32)
        for i, rel_mat in enumerate(adj_mat):
            H = []
            for j in range(n_heads):

                W_r = self.W[i][j]  # FxF'
                # a_r = self.a[i][j] #2F'x1

                if len(input_feat_mat) == 1 and layer_no == 1:
                    h_pr = tf.sparse_tensor_dense_matmul(
                        input_feat_mat[0], W_r)  # N*F' matrix
                else:
                    h_pr = tf.matmul(input_feat_mat[i], W_r)

                h_pr = h_pr[np.newaxis]
                with tf.name_scope(str(layer_no) + 'sp_attn'):
                    f_1 = tf.layers.conv1d(h_pr, 1, 1)
                    f_2 = tf.layers.conv1d(h_pr, 1, 1)

                    # print((f_1).get_shape().as_list())
                    # print((f_2).get_shape().as_list())

                    f_1 = tf.squeeze(f_1, axis=0)
                    f_2 = tf.squeeze(f_2, axis=0)
                    tf.summary.histogram("f1", f_1)
                    tf.summary.histogram("f2", f_2)

                    # print()
                    # print((f_1).get_shape().as_list())
                    # print((f_2).get_shape().as_list())
                    logits = tf.sparse_add(rel_mat * f_1,
                                           rel_mat * tf.transpose(f_2))
                    # logits = f_1 + tf.transpose(f_2, [0, 2, 1])

                    # logits = tf.squeeze(logits,axis=0)

                    lrelu = tf.SparseTensor(indices=logits.indices,
                                            values=tf.nn.leaky_relu(
                                                logits.values),
                                            dense_shape=logits.dense_shape)

                    coefs = tf.sparse_softmax(lrelu)
                    # coefs = tf.Print(coefs,[coefs])
                    # print((logits).get_shape().as_list())
                    tf.summary.histogram("logits", logits.values)

                    # coefs = tf.nn.softmax(tf.sparse_add(tf.nn.leaky_relu(logits),rel_mat))

                    tf.summary.histogram("coefs", coefs.values)

                    if attn_drop != 0.0:
                        coefs = tf.SparseTensor(indices=coefs.indices,
                                                values=tf.nn.dropout(
                                                    coefs.values,
                                                    1 - attn_drop),
                                                dense_shape=coefs.dense_shape)
                    if ffd_drop != 0.0:
                        h_pr = tf.nn.dropout(h_pr, 1 - ffd_drop)

                    rest1 = time.time()

                    h_pr = tf.squeeze(h_pr, axis=0)
                    h_prime_weighted = tf.sparse_tensor_dense_matmul(
                        coefs, h_pr)
                    tf.summary.histogram("h_pr_w", h_prime_weighted)
                    h_prime_weighted = tf.contrib.layers.bias_add(
                        h_prime_weighted)
                    rest2 = time.time()

                    # if concat:
                    # 	F_ = activation(h_prime_weighted)
                    # else:  # averaged
                    # 	F_ = h_prime_weighted
                    F_ = activation(h_prime_weighted)
                    H.append(F_)

            if concat:
                self.H_all_rel.append(tf.concat(H, axis=1))

            else:
                self.H_all_rel.append(tf.add_n(H) / n_heads)

            stop = time.time()

        return self.H_all_rel
Example #2
0
    def K(self, X1, X2=None):
        r"""
        Vectorized kernel calc.
        Following notation from Beck (2017), i.e have tensors S,D,Kpp,Kp
        Input is two tensors of shape (# strings , # characters)
        and we calc the pair-wise kernel calcs between the elements (i.e n kern calcs for two lists of length n)
        D is the tensor than unrolls the recursion and allows vecotrizaiton
        """

        # Turn our inputs into lists of integers using one-hot embedding
        # first split up strings and pad to fixed length and prep for gpu
        # pad until all have length of self.maxlen
        X1 = tf.strings.split(tf.squeeze(X1, 1)).to_tensor(
            "PAD", shape=[None, self.maxlen])
        X1 = self.table.lookup(X1)
        if X2 is None:
            X2 = X1
            self.symmetric = True
        else:
            self.symmetric = False
            X2 = tf.strings.split(tf.squeeze(X2, 1)).to_tensor(
                "PAD", shape=[None, self.maxlen])
            X2 = self.table.lookup(X2)
        # keep track of original input sizes
        X1_shape = tf.shape(X1)[0]
        X2_shape = tf.shape(X2)[0]

        # prep the decay tensor D
        self.D = self._precalc()

        # turn into one-hot  i.e. shape (# strings, #characters+1, alphabet size)
        X1 = tf.one_hot(X1, self.alphabet_size + 1, dtype=tf.float64)
        X2 = tf.one_hot(X2, self.alphabet_size + 1, dtype=tf.float64)
        # remove the ones in the first column that encode the padding (i.e we dont want them to count as a match)
        X1 = X1[:, :, 1:]
        X2 = X2[:, :, 1:]

        # get indicies of all possible pairings from X and X2
        # this way allows maximum number of kernel calcs to be squished onto the GPU (rather than just doing individual rows of gram)
        indicies_2, indicies_1 = tf.meshgrid(tf.range(0,
                                                      tf.shape(X2)[0]),
                                             tf.range(0,
                                                      tf.shape(X1)[0]))
        indicies = tf.concat(
            [tf.reshape(indicies_1, (-1, 1)),
             tf.reshape(indicies_2, (-1, 1))],
            axis=1)
        # if symmetric then only calc upper matrix (fill in rest later)
        if self.symmetric:
            indicies = tf.boolean_mask(
                indicies, tf.greater_equal(indicies[:, 1], indicies[:, 0]))

        X1_full = tf.gather(X1, indicies[:, 0], axis=0)
        X2_full = tf.gather(X2, indicies[:, 1], axis=0)

        if not self.symmetric:
            # also need to calculate some extra kernel evals for the normalization terms
            X1_full = tf.concat([X1_full, X1, X2], 0)
            X2_full = tf.concat([X2_full, X1, X2], 0)

        # make similarity matrix
        self.sim = tf.linalg.matmul(
            self.W, self.W, transpose_b=True) + tf.linalg.diag(self.kappa)
        self.sim = self.sim / tf.math.maximum(tf.reduce_max(self.sim), 1)

        # Make S: the similarity tensor of shape (# strings, #characters, # characters)
        S = tf.matmul(tf.matmul(X1_full, self.sim),
                      tf.transpose(X2_full, perm=(0, 2, 1)))

        # store squared match coef
        match_sq = tf.square(self.match_decay)

        # initialize final kernel results
        k = tf.zeros((tf.shape(S)[0]), dtype=tf.float64)
        # initialize Kp for dynamic programming
        Kp = tf.ones(shape=tf.stack([tf.shape(S)[0], self.maxlen,
                                     self.maxlen]),
                     dtype=tf.float64)

        # need to do 1st step
        Kp_temp = tf.multiply(S, Kp)
        Kp_temp = tf.reduce_sum(Kp_temp, -1)
        Kp_temp = tf.reduce_sum(Kp_temp, -1)
        Kp_temp = Kp_temp * match_sq
        # add to kernel result
        k = Kp_temp * self.order_coefs[0]

        # do all remaining steps
        for i in tf.range(self.max_subsequence_length - 1):
            Kp_temp = tf.multiply(S, Kp)
            Kp_temp = match_sq * Kp_temp
            Kp_temp = tf.matmul(Kp_temp, self.D)
            # save part required for next dynamic programming step
            Kp = tf.matmul(self.D, Kp_temp, transpose_a=True)
            Kp_temp = tf.multiply(S, Kp)
            Kp_temp = tf.reduce_sum(Kp_temp, -1)
            Kp_temp = tf.reduce_sum(Kp_temp, -1)
            Kp_temp = Kp_temp * match_sq
            # add to kernel result
            k += Kp_temp * self.order_coefs[i + 1]

        k = tf.expand_dims(k, 1)

        #put results into the right places in the gram matrix and normalize
        if self.symmetric:
            # if symmetric then only put in top triangle (inc diag)
            mask = tf.linalg.band_part(
                tf.ones((X1_shape, X2_shape), dtype=tf.int64), 0, -1)
            non_zero = tf.not_equal(mask, tf.constant(0, dtype=tf.int64))
            # Extracting the indices of upper triangle elements
            indices = tf.where(non_zero)
            out = tf.SparseTensor(indices,
                                  tf.squeeze(k),
                                  dense_shape=tf.cast((X1_shape, X2_shape),
                                                      dtype=tf.int64))
            k_results = tf.sparse.to_dense(out)
            #add in mising elements (lower diagonal)
            k_results = k_results + tf.linalg.set_diag(
                tf.transpose(k_results), tf.zeros(X1_shape, dtype=tf.float64))
            # normalise
            X_diag_Ks = tf.linalg.diag_part(k_results)
            norm = tf.tensordot(X_diag_Ks, X_diag_Ks, axes=0)
            k_results = tf.divide(k_results, tf.sqrt(norm))
        else:
            # otherwise can just reshape into gram matrix
            # but first take extra kernel calcs off end of k

            # COULD SPEED THIS UP FOR PREDICTIONS, AS MANY NORM TERMS ALREADY IN GRAM

            X_diag_Ks = tf.reshape(
                k[X1_shape * X2_shape:X1_shape * X2_shape + X1_shape], (-1, ))

            X2_diag_Ks = tf.reshape(k[-X2_shape:], (-1, ))

            k = k[0:X1_shape * X2_shape]
            k_results = tf.reshape(k, [X1_shape, X2_shape])
            # normalise
            norm = tf.tensordot(X_diag_Ks, X2_diag_Ks, axes=0)
            k_results = tf.divide(k_results, tf.sqrt(norm))

        return k_results
Example #3
0
item_emb = tf.Variable(tf.random_normal([item_count, dimension], stddev=0.01),
                       name='item_emb')
user_attri_emb = tf.Variable(tf.random_normal([user_count, 20], stddev=0.01),
                             name='user_attri_emb')
item_trans_w = tf.Variable(tf.random_normal([20, 20], stddev=0.01),
                           name='item_trans_w')
item_trans_b = tf.Variable(tf.zeros([20]), name='item_trans_b')
item_attri_input = tf.placeholder("float32", [None, 20])
item_attri_emb = tf.matmul(item_attri_input, item_trans_w)  #+ item_trans_b
inference_w = tf.Variable(tf.random_normal([52, 20], stddev=0.01),
                          name='inference_w')
inference_b = tf.Variable(tf.zeros([20]), name='inference_b')

adjacent_matrix = tf.SparseTensor(
    indices=A_indexs,
    values=A_values,
    dense_shape=[user_count + item_count,
                 user_count + item_count])  #[m+n, m+n]
initial_user_emb = tf.concat([user_emb, user_attri_emb], 1)  #[m, d1+d2]
initial_item_emb = tf.concat([item_emb, item_attri_emb], 1)  #[n, d1+d2]
feature_matrix_layer0 = tf.concat([initial_user_emb, initial_item_emb],
                                  0)  #[m+n, d1+d2]
neighbor_matrix_layer1 = tf.sparse_tensor_dense_matmul(adjacent_matrix,
                                                       feature_matrix_layer0)
feature_matrix_layer1 = tf.add(feature_matrix_layer0, neighbor_matrix_layer1)
neighbor_matrix_layer2 = tf.sparse_tensor_dense_matmul(adjacent_matrix,
                                                       feature_matrix_layer1)
feature_matrix_layer2 = tf.add(feature_matrix_layer1, neighbor_matrix_layer2)
final_user_emb, final_item_emb = tf.split(feature_matrix_layer2,
                                          [user_count, item_count], 0)
Example #4
0
    def _test_set_intersection_3d(self, dtype, invalid_indices=False):
        if invalid_indices:
            indices = tf.constant(
                [
                    [0, 1, 0],
                    [0, 1, 1],  # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],
                    [1, 1, 2],  # 1,1
                    [0, 0, 0],
                    [0, 0, 2],  # 0,0
                    # 2,0
                    [2, 1, 1]  # 2,1
                    # 3,*
                ],
                tf.int64)
        else:
            indices = tf.constant(
                [
                    [0, 0, 0],
                    [0, 0, 2],  # 0,0
                    [0, 1, 0],
                    [0, 1, 1],  # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],
                    [1, 1, 2],  # 1,1
                    # 2,0
                    [2, 1, 1]  # 2,1
                    # 3,*
                ],
                tf.int64)
        sp_a = tf.SparseTensor(
            indices,
            _constant(
                [
                    1,
                    9,  # 0,0
                    3,
                    3,  # 0,1
                    1,  # 1,0
                    9,
                    7,
                    8,  # 1,1
                    # 2,0
                    5  # 2,1
                    # 3,*
                ],
                dtype),
            tf.constant([4, 2, 3], tf.int64))
        sp_b = tf.SparseTensor(
            tf.constant(
                [
                    [0, 0, 0],
                    [0, 0, 3],  # 0,0
                    # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],  # 1,1
                    [2, 0, 1],  # 2,0
                    [2, 1, 1],  # 2,1
                    [3, 0, 0],  # 3,0
                    [3, 1, 0]  # 3,1
                ],
                tf.int64),
            _constant(
                [
                    1,
                    3,  # 0,0
                    # 0,1
                    3,  # 1,0
                    7,
                    8,  # 1,1
                    2,  # 2,0
                    5,  # 2,1
                    4,  # 3,0
                    4  # 3,1
                ],
                dtype),
            tf.constant([4, 2, 4], tf.int64))

        if invalid_indices:
            with self.assertRaisesRegexp(tf.OpError, "out of order"):
                self._set_intersection(sp_a, sp_b)
        else:
            expected_indices = [
                [0, 0, 0],  # 0,0
                # 0,1
                # 1,0
                [1, 1, 0],
                [1, 1, 1],  # 1,1
                # 2,0
                [2, 1, 0],  # 2,1
                # 3,*
            ]
            expected_values = _values(
                [
                    1,  # 0,0
                    # 0,1
                    # 1,0
                    7,
                    8,  # 1,1
                    # 2,0
                    5,  # 2,1
                    # 3,*
                ],
                dtype)
            expected_shape = [4, 2, 2]
            expected_counts = [
                [
                    1,  # 0,0
                    0  # 0,1
                ],
                [
                    0,  # 1,0
                    2  # 1,1
                ],
                [
                    0,  # 2,0
                    1  # 2,1
                ],
                [
                    0,  # 3,0
                    0  # 3,1
                ]
            ]

            # Sparse to sparse.
            intersection = self._set_intersection(sp_a, sp_b)
            self._assert_set_operation(expected_indices,
                                       expected_values,
                                       expected_shape,
                                       intersection,
                                       dtype=dtype)
            self.assertAllEqual(expected_counts,
                                self._set_intersection_count(sp_a, sp_b))

            # NOTE: sparse_to_dense doesn't support uint8 and uint16.
            if dtype not in [tf.uint8, tf.uint16]:
                # Dense to sparse.
                a = tf.cast(tf.sparse_to_dense(
                    sp_a.indices,
                    sp_a.shape,
                    sp_a.values,
                    default_value="-1" if dtype == tf.string else -1),
                            dtype=dtype)
                intersection = self._set_intersection(a, sp_b)
                self._assert_set_operation(expected_indices,
                                           expected_values,
                                           expected_shape,
                                           intersection,
                                           dtype=dtype)
                self.assertAllEqual(expected_counts,
                                    self._set_intersection_count(a, sp_b))

                # Dense to dense.
                b = tf.cast(tf.sparse_to_dense(
                    sp_b.indices,
                    sp_b.shape,
                    sp_b.values,
                    default_value="-2" if dtype == tf.string else -2),
                            dtype=dtype)
                intersection = self._set_intersection(a, b)
                self._assert_set_operation(expected_indices,
                                           expected_values,
                                           expected_shape,
                                           intersection,
                                           dtype=dtype)
                self.assertAllEqual(expected_counts,
                                    self._set_intersection_count(a, b))
Example #5
0
    def test_graphattnet_attnlayer_selfloop(self):

        gcn_graph = get_graph_test()
        node_dim = gcn_graph.X.shape[1]
        edge_dim = gcn_graph.E.shape[1] - 2.0
        nb_class = gcn_graph.Y.shape[1]

        gcn_model = GraphAttNet(node_dim,
                                nb_class,
                                num_layers=1,
                                learning_rate=0.01,
                                node_indim=8,
                                nb_attention=1)
        gcn_model.create_model()

        Wa = tf.eye(node_dim)
        va = tf.ones([2, node_dim])
        # elf.Ssparse, self.Tspars
        alphas, nH = gcn_model.simple_graph_attention_layer(
            gcn_model.node_input,
            Wa,
            va,
            gcn_model.Ssparse,
            gcn_model.Tsparse,
            gcn_model.Aind,
            gcn_model.Sshape,
            gcn_model.nb_edge,
            gcn_model.dropout_p_attn,
            gcn_model.dropout_p_node,
            add_self_loop=True)
        alphas_shape = tf.shape(alphas)

        node_indices = tf.range(gcn_model.Sshape[0])
        # Sparse Idendity
        # Debug
        id_indices = tf.stack([node_indices, node_indices], axis=1)
        val = tf.squeeze(tf.matmul(gcn_model.node_input, va, transpose_b=True))
        spI = tf.SparseTensor(
            indices=id_indices,
            values=val,
            dense_shape=[gcn_model.Sshape[0], gcn_model.Sshape[0]])

        init = tf.global_variables_initializer()
        #AI=tf.sparse_add(alphas,spI)

        graph = gcn_graph
        with tf.Session() as session:
            session.run([init])

            print('### Graph', graph.X.shape, graph.F.shape[0])
            # print(graph.Sind)
            # print(graph.Tind)
            nb_node = graph.X.shape[0]
            Aind = np.array(np.stack([graph.Sind[:, 0], graph.Tind[:, 1]],
                                     axis=-1),
                            dtype='int64')
            print("Adjacency Indices:", Aind.shape, Aind)
            feed_batch = {
                gcn_model.nb_node:
                graph.X.shape[0],
                gcn_model.nb_edge:
                graph.F.shape[0],
                gcn_model.node_input:
                graph.X,
                gcn_model.Ssparse:
                np.array(graph.Sind, dtype='int64'),
                gcn_model.Sshape:
                np.array([graph.X.shape[0], graph.F.shape[0]], dtype='int64'),
                gcn_model.Tsparse:
                np.array(graph.Tind, dtype='int64'),
                gcn_model.Aind:
                Aind,
                # self.F: graph.F,
                gcn_model.y_input:
                graph.Y,
                # self.dropout_p_H: self.dropout_rate_H,
                gcn_model.dropout_p_node:
                0.0,
                gcn_model.dropout_p_attn:
                0.0,
            }
            [c_alphas, c_nH, c_alphas_shape,
             spI] = session.run([alphas, nH, alphas_shape, spI],
                                feed_dict=feed_batch)
            print('alphas', c_alphas, c_alphas_shape)
            print('spI', spI)
            #print('AI',AI)
            sp_mat = sp.coo_matrix(
                (c_alphas.values,
                 (c_alphas.indices[:, 0], c_alphas.indices[:, 1])),
                shape=(nb_node, nb_node))
            Att_dense = sp_mat.todense()
            print(Att_dense)
            self.assertTrue(c_alphas_shape[0] == 3)
            self.assertTrue(c_alphas_shape[1] == 3)

            self.assertTrue(Att_dense[0, 2] == 0)
Example #6
0
    def _build(self, inp):
        """Applies a graph convolution operation to an input tensor

        Parameters
        ----------
        inp : tf.Tensor
            input tensor to be convolved

        Returns
        -------
        tf.Tensor
            convolved tensor

        """
        assert len(inp.get_shape().as_list()) == 3, 'Graph Convolutional Layer needs 3D input.'

        self.in_shape = tuple(inp.get_shape().as_list())
        if self.in_filters is None:
            self.in_filters = self.in_shape[-1]
        assert self.in_filters == self.in_shape[-1], 'Convolution was built for different number of input filters'

        N, M, self.in_filters = inp.get_shape()
        N, M, Fin = int(N), int(M), int(self.in_filters)
        # Rescale Laplacian and store as a TF sparse tensor. Copy to not modify the shared L.
        L = scipy.sparse.csr_matrix(self.L)
        L = self.rescale_L(L, lmax=2)
        L = L.tocoo()
        indices = np.column_stack((L.row, L.col))
        L = tf.SparseTensor(indices, L.data, L.shape)
        L = tf.sparse_reorder(L)
        # Transform to Chebyshev basis
        x0 = tf.transpose(inp, perm=[1, 2, 0])  # M x Fin x N
        x0 = tf.reshape(x0, [M, Fin * N])  # M x Fin*N
        x = tf.expand_dims(x0, 0)  # 1 x M x Fin*N

        def concat(x, x_):
            x_ = tf.expand_dims(x_, 0)  # 1 x M x Fin*N
            return tf.concat([x, x_], 0)  # K x M x Fin*N

        # recursive computation of the filters
        if self.K > 1:
            x1 = tf.sparse_tensor_dense_matmul(L, x0)
            x = concat(x, x1)
        for k in range(2, self.K):
            x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0  # M x Fin*N
            x = concat(x, x2)
            x0, x1 = x1, x2
        x = tf.reshape(x, [self.K, M, Fin, N])  # K x M x Fin x N
        x = tf.transpose(x, perm=[3, 1, 2, 0])  # N x M x Fin x K
        x = tf.reshape(x, [N * M, Fin * self.K])  # N*M x Fin*K

        # Filter: Fin*out_filters filters of order K, i.e. one filterbank per feature pair.
        w_shape = [Fin * self.K, self.out_filters]
        initial = tf.truncated_normal_initializer(0, 0.1)
        self._w = tf.get_variable('w', shape=w_shape, dtype=tf.float32, initializer=initial,
                                  collections=self.WEIGHT_COLLECTIONS)
        self.variables.append(self._w)

        x = tf.matmul(x, self._w)  # N*M x out_filters
        x = tf.reshape(x, [N, M, self.out_filters])  # N x M x out_filters

        if self.bias == 'b1':
            b_shape = [1, 1, self.out_filters]
        elif self.bias == 'b2':
            b_shape = [1, M, self.out_filters]

        self._b = tf.get_variable("b", shape=b_shape, initializer=tf.constant_initializer(),
                                  collections=self.BIAS_COLLECTIONS)
        outp = x + self._b

        return outp
Example #7
0
    def K(self, X1, X2=None):
        r"""
        Vectorized kernel calc.
        Following notation from Beck (2017), i.e have tensors S,D,Kpp,Kp
        Input is two tensors of shape (# strings , # characters)
        and we calc the pair-wise kernel calcs between the elements (i.e n kern calcs for two lists of length n)
        D is the tensor than unrolls the recursion and allows vecotrizaiton
        """

        # Turn our inputs into lists of integers using one-hot embedding
        # first split up strings and pad to fixed length and prep for gpu
        # pad until all have length of self.maxlen
        # turn into one-hot  i.e. shape (# strings, #characters+1, alphabet size)
        X1 = tf.strings.split(tf.squeeze(X1, 1)).to_tensor(
            "PAD", shape=[None, self.maxlen])
        X1 = self.table.lookup(X1)
        # keep track of original input sizes
        X1_shape = tf.shape(X1)[0]
        X1 = tf.one_hot(X1, self.alphabet_size + 1, dtype=tf.float64)
        if X2 is None:
            X2 = X1
            X2_shape = X1_shape
            self.symmetric = True
        else:
            self.symmetric = False
            X2 = tf.strings.split(tf.squeeze(X2, 1)).to_tensor(
                "PAD", shape=[None, self.maxlen])
            X2 = self.table.lookup(X2)
            X2_shape = tf.shape(X2)[0]
            X2 = tf.one_hot(X2, self.alphabet_size + 1, dtype=tf.float64)

        # prep the decay tensors
        self._precalc()

        # combine all target strings and remove the ones in the first column that encode the padding (i.e we dont want them to count as a match)
        X_full = tf.concat([X1, X2], 0)[:, :, 1:]

        # get indicies of all possible pairings from X and X2
        # this way allows maximum number of kernel calcs to be squished onto the GPU (rather than just doing individual rows of gram)
        indicies_2, indicies_1 = tf.meshgrid(
            tf.range(0, X1_shape), tf.range(X1_shape,
                                            tf.shape(X_full)[0]))
        indicies = tf.concat(
            [tf.reshape(indicies_1, (-1, 1)),
             tf.reshape(indicies_2, (-1, 1))],
            axis=1)
        if self.symmetric:
            # if symmetric then only calc upper matrix (fill in rest later)
            indicies = tf.boolean_mask(
                indicies,
                tf.greater_equal(indicies[:, 1] + X1_shape, indicies[:, 0]))
        else:
            # if not symmetric need to calculate some extra kernel evals for the normalization later on
            indicies = tf.concat([
                indicies,
                tf.tile(tf.expand_dims(tf.range(tf.shape(X_full)[0]), 1),
                        (1, 2))
            ], 0)

        # make similarity matrix
        self.sim = tf.linalg.diag(self.kappa)
        #self.sim = self.sim/tf.math.maximum(tf.reduce_max(self.sim),1)

        # make kernel calcs in batches
        num_batches = tf.cast(tf.math.ceil(
            tf.shape(indicies)[0] / self.batch_size),
                              dtype=tf.int32)
        k_split = tf.TensorArray(tf.float64,
                                 size=num_batches,
                                 clear_after_read=False,
                                 infer_shape=False)

        # iterate through batches
        for j in tf.range(num_batches):
            # collect strings for this batch
            indicies_batch = indicies[self.batch_size * j:self.batch_size *
                                      (j + 1)]
            X_batch = tf.gather(X_full, indicies_batch[:, 0], axis=0)
            X2_batch = tf.gather(X_full, indicies_batch[:, 1], axis=0)

            # collect results for the batch
            result = self.kernel_calc(X_batch, X2_batch)
            k_split = k_split.write(j, result)

        # combine batch results
        k = tf.expand_dims(k_split.concat(), 1)
        k_split.close()

        # put results into the right places in the gram matrix and normalize
        if self.symmetric:
            # if symmetric then only put in top triangle (inc diag)
            mask = tf.linalg.band_part(
                tf.ones((X1_shape, X2_shape), dtype=tf.int64), 0, -1)
            non_zero = tf.not_equal(mask, tf.constant(0, dtype=tf.int64))

            # Extracting the indices of upper triangle elements
            indices = tf.where(non_zero)
            out = tf.SparseTensor(indices,
                                  tf.squeeze(k),
                                  dense_shape=tf.cast((X1_shape, X2_shape),
                                                      dtype=tf.int64))
            k_results = tf.sparse.to_dense(out)

            # add in mising elements (lower diagonal)
            k_results = k_results + tf.linalg.set_diag(
                tf.transpose(k_results), tf.zeros(X1_shape, dtype=tf.float64))

            # normalise
            X_diag_Ks = tf.linalg.diag_part(k_results)
            norm = tf.tensordot(X_diag_Ks, X_diag_Ks, axes=0)
            k_results = tf.divide(k_results, tf.sqrt(norm))
        else:
            # otherwise can just reshape into gram matrix
            # but first take extra kernel calcs off end of k and use them to normalise
            X_diag_Ks = tf.reshape(
                k[X1_shape * X2_shape:X1_shape * X2_shape + X1_shape], (-1, ))
            X2_diag_Ks = tf.reshape(k[-X2_shape:], (-1, ))
            k = k[0:X1_shape * X2_shape]
            k_results = tf.transpose(tf.reshape(k, [X2_shape, X1_shape]))

            # normalise
            norm = tf.tensordot(X_diag_Ks, X2_diag_Ks, axes=0)
            k_results = tf.divide(k_results, tf.sqrt(norm))

        return k_results
Example #8
0
    threshold_v = tf.constant(35.0)

with tf.name_scope('neurons'):
    v = tf.Variable(tf.fill([n], resting_v), name="potential")
    u = tf.Variable(b * c, name="recovery")

with tf.name_scope('synapses'):
    n_s = tf.constant(100, name='n_s', dtype=tf.int32)
    prepost = tf.Variable(tf.random_uniform([tf.cast(n_s, tf.int64), 2],
                                            0,
                                            tf.cast(n, tf.int64),
                                            dtype=tf.int64),
                          dtype=tf.int64)
    s = tf.SparseTensor(
        prepost,
        tf.fill([n_s], 1),
        dense_shape=[tf.cast(n, tf.int64),
                     tf.cast(n, tf.int64)])
    w = tf.constant(30.0)

with tf.name_scope('input'):
    rand_n = tf.Variable(tf.random_uniform([n], 0, 2, dtype=tf.int32))
    rand_i = tf.Variable(tf.random_uniform([n], 0, 10.0, dtype=tf.float32))
    i_in = tf.Variable(tf.cast(rand_n, tf.float32) * rand_i)
    i_op = tf.assign(v, v + i_in)

with tf.name_scope('fire'):
    fire_log = tf.Variable(tf.zeros([n], dtype=tf.int32), dtype=tf.int32)
    firing_op = s * tf.cast(tf.greater(v, 30.0), tf.int32)
    potentiate_op = tf.assign(
        v,
Example #9
0
    iterator = dataset.make_one_shot_iterator()
    next_element = iterator.get_next()
    return next_element


tf.set_random_seed(123)
global_step = tf.get_variable('global_step', [],
                              initializer=tf.constant_initializer(0),
                              trainable=False)

next_element = build_dataset(filenames)
label, features = next_element['label'], next_element['features']
values = tf.strings.to_hash_bucket_strong(features.values, num_buckets,
                                          [1234, 5678])
feature_ids = tf.SparseTensor(indices=features.indices, \
                              values=values, \
                              dense_shape=features.dense_shape)

weight_initer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
embeddings = tf.get_variable('embeddings',
                             shape=(num_buckets, emb_dim),
                             dtype=tf.float32,
                             initializer=weight_initer)
emb = tf.nn.embedding_lookup_sparse(embeddings,
                                    feature_ids,
                                    sp_weights=None,
                                    combiner='mean',
                                    max_norm=None,
                                    name=None)
model = LR(emb_dim, use_bias=True)
Example #10
0
def model(features, labels, mode, params):
    batch_size = params['batch_size']
    doc_max_length = params['doc_max_length']
    embed_dim = params['embed_dim']
    conf = params['config']

    if 'doc_indices' in features:
        doc = tf.SparseTensor(indices=features['doc_indices'],
                              values=features['doc_values'],
                              dense_shape=(batch_size * doc_max_length, 64))
        doc_net = cdssm_tower(mode, doc, features['doc_length'],
                              doc_max_length, embed_dim, 'doc', params,
                              conf['activation'])
    else:
        bert_config = r'bert\xlm_bert_convert_dis_query_layer3\xlm_config_dis.json'
        doc_net = bert(bert_config, mode, params['hidden_units'][-1],
                       features['doc_ids'], features['doc_mask'],
                       features['doc_type'], conf['activation'],
                       params['init_checkpoint'])
    con_net = content_net(features['content'], doc_net.shape[-1], mode,
                          conf['activation'])
    tf.summary.histogram('doc_net', doc_net)
    tf.summary.histogram('con_net', con_net)

    # l2-normalize
    if conf['query_l2']:
        doc_net = tf.nn.l2_normalize(doc_net,
                                     axis=1,
                                     epsilon=1e-3,
                                     name='l2_normalize_query')
    if conf['con_l2']:
        con_net = tf.nn.l2_normalize(con_net,
                                     axis=1,
                                     epsilon=1e-3,
                                     name='l2_normalize_content')

    if mode == tf.estimator.ModeKeys.EVAL:
        # split multiple (2) tests:
        doc_tests = tf.split(doc_net, 2, 0)
        con_tests = tf.split(con_net, 2, 0)
        doc_net = doc_tests[0]
        con_net = con_tests[0]
        doc_tests = doc_tests[1:]
        con_tests = con_tests[1:]
    # similarity matrix
    similarities = conf['sfunc'](doc_net, con_net)

    # loss
    loss, similarity, accuracy, loss_name = conf['loss'](mode, params,
                                                         similarities,
                                                         conf['version'])

    total_loss = loss + tf.add_n(tf.losses.get_regularization_losses())

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'url': features['url'],
            'cos_sim': similarity,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    tf.summary.scalar(loss_name, loss)
    tf.summary.scalar("accuracy", accuracy)

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {
            loss_name: tf.metrics.mean(loss),
            'accuracy': tf.metrics.mean(accuracy)
        }
        for i, (doc_net_, con_net_) in enumerate(zip(doc_tests, con_tests)):
            similarities_ = conf['sfunc'](doc_net_, con_net_)
            loss_, similarity_, accuracy_, loss_name_ = conf['loss'](
                mode, params, similarities_, conf['version'])
            metrics['accuracy_gdi{}'.format(i)] = tf.metrics.mean(accuracy_)
        return tf.estimator.EstimatorSpec(mode,
                                          loss=total_loss,
                                          eval_metric_ops=metrics)
    """
    if params['init_checkpoint']:
        tvars = tf.trainable_variables()
        (assignment_map, initialized_variable_names) = get_assignment_map_from_checkpoint(tvars, params['init_checkpoint'])
        tf.train.init_from_checkpoint( params['init_checkpoint'], assignment_map)
    """
    # Create training op.
    assert mode == tf.estimator.ModeKeys.TRAIN
    with tf.variable_scope('train_op'):
        global_step = tf.train.get_global_step()
        learning_rate = tf.train.exponential_decay(
            learning_rate=params['starter_learning_rate'],
            global_step=global_step,
            decay_steps=params['stepvalue'],
            decay_rate=params['gamma'],
            staircase=True)
        if 'optimizer' in conf:
            optimizer = conf['optimizer'](learning_rate)
        else:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.minimize(total_loss, global_step=global_step)
    tf.summary.scalar("learning_rate", learning_rate)
    return tf.estimator.EstimatorSpec(mode, loss=total_loss, train_op=train_op)
Example #11
0
 def build_trt_forward_pass_graph(self,
                                  input_tensors,
                                  gpu_id=0,
                                  checkpoint=None):
     """Wrapper around _build_forward_pass_graph which converts graph using
 TF-TRT"""
     import tensorflow.contrib.tensorrt as trt
     # Default parameters
     trt_params = {
         "batch_size_per_gpu": 64,
         "trt_max_workspace_size_bytes": (4096 << 20) - 1000,
         "trt_precision_mode": "FP32",
         "trt_minimum_segment_size": 10,
         "trt_is_dynamic_op": True,
         "trt_maximum_cached_engines": 1
     }
     # Update params from user config
     for key in trt_params:
         if key in self.params:
             trt_params[key] = self.params[key]
     # Create temporary graph which will contain the native TF graph
     tf_config = tf.ConfigProto()
     tf_config.gpu_options.allow_growth = True
     temp_graph = tf.Graph()
     input_map = {}
     # We have to deconstruct SparseTensors into their 3 internal tensors
     # (indicies, values, dense_shape). This maps each tensor name to a list of
     # all 3 tensor names in its SparseTensor.
     output_sparse_tensor_map = {}
     with temp_graph.as_default() as tf_graph:
         with tf.Session(config=tf_config) as tf_sess:
             # Create temporary input placeholders used to build native TF graph
             input_placeholders = {'source_tensors': []}
             for i, original_input in enumerate(
                     input_tensors['source_tensors']):
                 name = 'input_map_%d' % i
                 input_placeholders['source_tensors'].append(
                     tf.placeholder(shape=original_input.shape,
                                    dtype=original_input.dtype,
                                    name=name))
                 # And map it back to original input
                 input_map[name] = original_input
             # Build native graph
             loss, outputs = self._build_forward_pass_graph(
                 input_placeholders, gpu_id=gpu_id)
             # Gather output tensors
             output_node_names = []
             output_node_names_and_ports = []
             for x in outputs:
                 if isinstance(x, tf.SparseTensor):
                     components = [
                         x.indices.name, x.values.name, x.dense_shape.name
                     ]
                     fetch_names = [
                         tensor.split(':')[0] for tensor in components
                     ]
                     # Remove duplicates (i.e. if SparseTensor is output of one node)
                     fetch_names = list(set(fetch_names))
                     output_node_names.extend(fetch_names)
                     output_node_names_and_ports.extend(components)
                     # Add all components to map so SparseTensor can be reconstructed
                     # from tensor components which will be outputs of new graph
                     for tensor in components:
                         output_sparse_tensor_map[tensor] = components
                 else:
                     output_node_names.append(x.name.split(':')[0])
                     output_node_names_and_ports.append(x.name)
             # Restore checkpoint here because we have to freeze the graph
             tf_saver = tf.train.Saver()
             tf_saver.restore(save_path=checkpoint, sess=tf_sess)
             frozen_graph = tf.graph_util.convert_variables_to_constants(
                 tf_sess,
                 tf_sess.graph_def,
                 output_node_names=output_node_names)
             num_nodes = len(frozen_graph.node)
             print('Converting graph using TensorFlow-TensorRT...')
             frozen_graph = trt.create_inference_graph(
                 input_graph_def=frozen_graph,
                 outputs=output_node_names,
                 max_batch_size=trt_params["batch_size_per_gpu"],
                 max_workspace_size_bytes=trt_params[
                     "trt_max_workspace_size_bytes"],
                 precision_mode=trt_params["trt_precision_mode"],
                 minimum_segment_size=trt_params[
                     "trt_minimum_segment_size"],
                 is_dynamic_op=trt_params["trt_is_dynamic_op"],
                 maximum_cached_engines=trt_params[
                     "trt_maximum_cached_engines"])
             # Remove unused inputs from input_map.
             inputs_to_remove = []
             for k in input_map:
                 if k not in [node.name for node in frozen_graph.node]:
                     inputs_to_remove.append(k)
             for k in inputs_to_remove:
                 del input_map[k]
             print('Total node count before and after TF-TRT conversion:',
                   num_nodes, '->', len(frozen_graph.node))
             print(
                 'TRT node count:',
                 len([
                     1 for n in frozen_graph.node
                     if str(n.op) == 'TRTEngineOp'
                 ]))
     # Perform calibration for INT8 precision mode
     if self.params.get("trt_precision_mode", "FP32").upper() == 'INT8':
         with tf.Session(config=tf_config) as tf_sess:
             calib_graph = frozen_graph
             num_iterations = 10
             print('Calibrating INT8...')
             outputs = tf.import_graph_def(
                 calib_graph,
                 input_map=input_map,
                 return_elements=output_node_names_and_ports,
                 name='')
             self._num_objects_per_step = [
                 self._get_num_objects_per_step(worker_id)
                 for worker_id in range(self.num_gpus)
             ]
             results_per_batch = iterate_data(self,
                                              tf_sess,
                                              compute_loss=False,
                                              mode='infer',
                                              verbose=False,
                                              num_steps=num_iterations)
             frozen_graph = trt.calib_graph_to_infer_graph(calib_graph)
             del calib_graph
             print('INT8 graph created.')
             print('Nodes INT8:', len(frozen_graph.node))
     # Import TRT converted graph to default graph, mapping it to the original
     # input tensors.
     outputs = tf.import_graph_def(
         frozen_graph,
         input_map=input_map,
         return_elements=output_node_names_and_ports,
         name='')
     # Reconstruct SparseTensors
     final_outputs = []
     for tensor in outputs:
         if tensor.name in output_sparse_tensor_map:
             component_names = output_sparse_tensor_map[tensor.name]
             # Find tensors in outputs for components
             component_tensors = [[x for x in outputs if x.name == name][0]
                                  for name in component_names]
             # Remove all components from outputs so we don't create duplicates of
             # this SparseTensor
             for x in component_tensors:
                 if x in outputs:
                     outputs.remove(x)
             final_outputs.append(tf.SparseTensor(*component_tensors))
         else:
             final_outputs.append(tensor)
     return loss, final_outputs
    def build_model(self):
        with tf.device('/gpu:0'):
            with tf.variable_scope('deephawkes') as scope:
                with tf.variable_scope('embedding'):
                    x_vector = tf.nn.dropout(tf.nn.embedding_lookup(self.embedding, self.x), 
                                             self.dropout_prob)
                    # (total_number of sequence, n_steps, n_input)
                with tf.variable_scope('RNN'):
                    x_vector = tf.transpose(x_vector, [1,0,2])
                    # (n_steps, total_number of sequence, n_input)
                    x_vector = tf.reshape(x_vector, [-1, self.n_input])
                    # (n_steps*total_number of sequence, n_input)


                    # Split to get a li/st of 'n_steps' tensors of shape (n_sequences*batch_size, n_input)
                    x_vector = tf.split(x_vector, self.n_steps, 0)

                    outputs, _ = rnn.static_rnn(self.gru_fw_cell, x_vector, dtype=tf.float32)

                    hidden_states = tf.transpose(tf.stack(outputs), [1, 0, 2])
                    # (total_number of sequence, n_steps, n_hidden_gru)

                    # filter according to the length
                    hidden_states = tf.reshape(hidden_states,[-1,2*self.n_hidden_gru])
                    #   (total_number of sequence*n_step, 2*n_hidden_gru)

                    rnn_index = tf.reshape(self.rnn_index,[-1,1])
                    #   (total_number of sequence*n_step,1)


                    hidden_states = tf.multiply(rnn_index,hidden_states)
                    #   (total_number of sequence*n_step, 2*n_hidden_gru)

                    hidden_states = tf.reshape(hidden_states,[-1,self.n_steps,2*self.n_hidden_gru])
                    #   (total_number of sequence,n_step,2*n_hidden_gru)

                    hidden_states = tf.reduce_sum(hidden_states, reduction_indices=[1])
                    #   (total_number of sequence,2*n_hidden_gru)

                with tf.variable_scope('SumPooling'):
                    # sumpooling

                    time_weight = tf.reshape(self.time_weight,[-1,1])
                    #   (n_time_interval,1)
                    #   time_interval_index    (total_number of sequence,n_time_interval)
                    time_weight = tf.matmul(self.time_interval_index,time_weight)
                    #   (total_number of sequence,1)

                    hidden_graph_value = tf.multiply(time_weight,hidden_states)
                    #   (total_number of sequence,2*n_hidden_gru)

                    hidden_graph_value = tf.reshape(hidden_graph_value,[-1])
                    #   (total_number of sequence*2*n_hidden_gru)

                    hidden_graph = tf.SparseTensor(indices = self.x_indict, values=hidden_graph_value, dense_shape=[self.batch_size, self.n_sequences, 2 * self.n_hidden_gru])

                    hidden_graph = tf.sparse_reduce_sum(hidden_graph, axis=1)
                    # self.batch_size, 2 * self.n_hidden_gru
        
                with tf.variable_scope('dense'):
                    dense1 = self.activation(tf.add(tf.matmul(hidden_graph, self.weights['dense1']), self.biases['dense1']))
                    dense2 = self.activation(tf.add(tf.matmul(dense1, self.weights['dense2']), self.biases['dense2']))
                    pred = self.activation(tf.add(tf.matmul(dense2, self.weights['out']), self.biases['out']))
                    print pred.get_shape()
                return pred
def main():
  # Change these for different models
  FEATURE_SIZE = 124
  LABEL_SIZE = 2
  TRAIN_TFRECORDS_FILE = "data/a8a_train.libsvm.tfrecords"
  VALIDATE_TFRECORDS_FILE = "data/a8a_test.libsvm.tfrecords"

  learning_rate = FLAGS.learning_rate
  epoch_number = FLAGS.epoch_number
  thread_number = FLAGS.thread_number
  batch_size = FLAGS.batch_size
  validate_batch_size = FLAGS.validate_batch_size
  min_after_dequeue = FLAGS.min_after_dequeue
  capacity = thread_number * batch_size + min_after_dequeue
  mode = FLAGS.mode
  checkpoint_dir = FLAGS.checkpoint_dir
  if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
  tensorboard_dir = FLAGS.tensorboard_dir
  if not os.path.exists(tensorboard_dir):
    os.makedirs(tensorboard_dir)

  def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    return serialized_example

  # Read TFRecords files for training
  filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(TRAIN_TFRECORDS_FILE),
      num_epochs=epoch_number)
  serialized_example = read_and_decode(filename_queue)
  batch_serialized_example = tf.train.shuffle_batch(
      [serialized_example],
      batch_size=batch_size,
      num_threads=thread_number,
      capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  features = tf.parse_example(batch_serialized_example,
                              features={
                                  "label": tf.FixedLenFeature(
                                      [], tf.float32),
                                  "ids": tf.VarLenFeature(tf.int64),
                                  "values": tf.VarLenFeature(tf.float32),
                              })
  batch_labels = features["label"]
  batch_ids = features["ids"]
  batch_values = features["values"]

  # Read TFRecords file for validation
  validate_filename_queue = tf.train.string_input_producer(
      tf.train.match_filenames_once(VALIDATE_TFRECORDS_FILE),
      num_epochs=epoch_number)
  validate_serialized_example = read_and_decode(validate_filename_queue)
  validate_batch_serialized_example = tf.train.shuffle_batch(
      [validate_serialized_example],
      batch_size=validate_batch_size,
      num_threads=thread_number,
      capacity=capacity,
      min_after_dequeue=min_after_dequeue)
  validate_features = tf.parse_example(
      validate_batch_serialized_example,
      features={
          "label": tf.FixedLenFeature(
              [], tf.float32),
          "ids": tf.VarLenFeature(tf.int64),
          "values": tf.VarLenFeature(tf.float32),
      })
  validate_batch_labels = validate_features["label"]
  validate_batch_ids = validate_features["ids"]
  validate_batch_values = validate_features["values"]

  # Define the model
  input_units = FEATURE_SIZE
  hidden1_units = 128
  hidden2_units = 32
  hidden3_units = 8
  output_units = LABEL_SIZE

  def full_connect(inputs, weights_shape, biases_shape, is_train=True):
    with tf.device('/cpu:0'):
      weights = tf.get_variable("weights",
                                weights_shape,
                                initializer=tf.random_normal_initializer())
      biases = tf.get_variable("biases",
                               biases_shape,
                               initializer=tf.random_normal_initializer())
      layer = tf.matmul(inputs, weights) + biases

      if FLAGS.enable_bn and is_train:
        mean, var = tf.nn.moments(layer, axes=[0])
        scale = tf.get_variable("scale",
                                biases_shape,
                                initializer=tf.random_normal_initializer())
        shift = tf.get_variable("shift",
                                biases_shape,
                                initializer=tf.random_normal_initializer())
        layer = tf.nn.batch_normalization(layer, mean, var, shift, scale,
                                          FLAGS.bn_epsilon)
    return layer

  def sparse_full_connect(sparse_ids,
                          sparse_values,
                          weights_shape,
                          biases_shape,
                          is_train=True):
    with tf.device('/cpu:0'):
      weights = tf.get_variable("weights",
                                weights_shape,
                                initializer=tf.random_normal_initializer())
      biases = tf.get_variable("biases",
                               biases_shape,
                               initializer=tf.random_normal_initializer())
    return tf.nn.embedding_lookup_sparse(weights,
                                         sparse_ids,
                                         sparse_values,
                                         combiner="sum") + biases

  def full_connect_relu(inputs, weights_shape, biases_shape, is_train=True):
    return tf.nn.relu(full_connect(inputs, weights_shape, biases_shape,
                                   is_train))

  def dnn_inference(sparse_ids, sparse_values, is_train=True):
    with tf.variable_scope("layer1"):
      sparse_layer = sparse_full_connect(sparse_ids, sparse_values,
                                         [input_units, hidden1_units],
                                         [hidden1_units], is_train)
      layer = tf.nn.relu(sparse_layer)
    with tf.variable_scope("layer2"):
      layer = full_connect_relu(layer, [hidden1_units, hidden2_units],
                                [hidden2_units], is_train)
    with tf.variable_scope("layer3"):
      layer = full_connect_relu(layer, [hidden2_units, hidden3_units],
                                [hidden3_units], is_train)
    if FLAGS.enable_dropout and is_train:
      layer = tf.nn.dropout(layer, FLAGS.dropout_keep_prob)
    with tf.variable_scope("output"):
      layer = full_connect(layer, [hidden3_units, output_units],
                           [output_units], is_train)
    return layer

  def lr_inference(sparse_ids, sparse_values, is_train=True):
    with tf.variable_scope("logistic_regression"):
      layer = sparse_full_connect(sparse_ids, sparse_values,
                                  [input_units, output_units], [output_units])
    return layer

  def wide_and_deep_inference(sparse_ids, sparse_values, is_train=True):
    return lr_inference(sparse_ids, sparse_values, is_train) + dnn_inference(
        sparse_ids, sparse_values, is_train)

  def inference(sparse_ids, sparse_values, is_train=True):
    print("Use the model: {}".format(FLAGS.model))
    if FLAGS.model == "lr":
      return lr_inference(sparse_ids, sparse_values, is_train)
    elif FLAGS.model == "dnn":
      return dnn_inference(sparse_ids, sparse_values, is_train)
    elif FLAGS.model == "wide_and_deep":
      return wide_and_deep_inference(sparse_ids, sparse_values, is_train)
    else:
      print("Unknown model, exit now")
      exit(1)

  logits = inference(batch_ids, batch_values, True)
  batch_labels = tf.to_int64(batch_labels)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,
                                                                 batch_labels)
  loss = tf.reduce_mean(cross_entropy, name='loss')

  print("Use the optimizer: {}".format(FLAGS.optimizer))
  if FLAGS.optimizer == "sgd":
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  elif FLAGS.optimizer == "momentum":
    # optimizer = tf.train.MomentumOptimizer(learning_rate)
    print("Not support optimizer: {} yet, exit now".format(FLAGS.optimizer))
    exit(1)
  elif FLAGS.optimizer == "adadelta":
    optimizer = tf.train.AdadeltaOptimizer(learning_rate)
  elif FLAGS.optimizer == "adagrad":
    optimizer = tf.train.AdagradOptimizer(learning_rate)
  elif FLAGS.optimizer == "adam":
    optimizer = tf.train.AdamOptimizer(learning_rate)
  elif FLAGS.optimizer == "ftrl":
    optimizer = tf.train.FtrlOptimizer(learning_rate)
  elif FLAGS.optimizer == "rmsprop":
    optimizer = tf.train.RMSPropOptimizer(learning_rate)
  else:
    print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer))
    exit(1)

  with tf.device('/cpu:0'):
    global_step = tf.Variable(0, name='global_step', trainable=False)
  train_op = optimizer.minimize(loss, global_step=global_step)

  tf.get_variable_scope().reuse_variables()

  # Define accuracy op for train data
  train_accuracy_logits = inference(batch_ids, batch_values, False)
  train_softmax = tf.nn.softmax(train_accuracy_logits)
  train_correct_prediction = tf.equal(
      tf.argmax(train_softmax, 1), batch_labels)
  train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction,
                                          tf.float32))

  # Define auc op for train data
  batch_labels = tf.cast(batch_labels, tf.int32)
  sparse_labels = tf.reshape(batch_labels, [-1, 1])
  derived_size = tf.shape(batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(1, [indices, sparse_labels])
  outshape = tf.pack([derived_size, LABEL_SIZE])
  new_train_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, train_auc = tf.contrib.metrics.streaming_auc(train_softmax,
                                                  new_train_batch_labels)

  # Define accuracy op for validate data
  validate_accuracy_logits = inference(validate_batch_ids,
                                       validate_batch_values, False)
  validate_softmax = tf.nn.softmax(validate_accuracy_logits)
  validate_batch_labels = tf.to_int64(validate_batch_labels)
  validate_correct_prediction = tf.equal(
      tf.argmax(validate_softmax, 1), validate_batch_labels)
  validate_accuracy = tf.reduce_mean(tf.cast(validate_correct_prediction,
                                             tf.float32))

  # Define auc op for validate data
  validate_batch_labels = tf.cast(validate_batch_labels, tf.int32)
  sparse_labels = tf.reshape(validate_batch_labels, [-1, 1])
  derived_size = tf.shape(validate_batch_labels)[0]
  indices = tf.reshape(tf.range(0, derived_size, 1), [-1, 1])
  concated = tf.concat(1, [indices, sparse_labels])
  outshape = tf.pack([derived_size, LABEL_SIZE])
  new_validate_batch_labels = tf.sparse_to_dense(concated, outshape, 1.0, 0.0)
  _, validate_auc = tf.contrib.metrics.streaming_auc(validate_softmax,
                                                     new_validate_batch_labels)

  # Define inference op
  sparse_index = tf.placeholder(tf.int64, [None, 2])
  sparse_ids = tf.placeholder(tf.int64, [None])
  sparse_values = tf.placeholder(tf.float32, [None])
  sparse_shape = tf.placeholder(tf.int64, [2])
  inference_ids = tf.SparseTensor(sparse_index, sparse_ids, sparse_shape)
  inference_values = tf.SparseTensor(sparse_index, sparse_values, sparse_shape)
  inference_logits = inference(inference_ids, inference_values, False)
  inference_softmax = tf.nn.softmax(inference_logits)
  inference_op = tf.argmax(inference_softmax, 1)

  # Initialize saver and summary
  checkpoint_file = checkpoint_dir + "/checkpoint.ckpt"
  steps_to_validate = FLAGS.steps_to_validate
  tf.scalar_summary("loss", loss)
  tf.scalar_summary("train_accuracy", train_accuracy)
  tf.scalar_summary("train_auc", train_auc)
  tf.scalar_summary("validate_accuracy", validate_accuracy)
  tf.scalar_summary("validate_auc", validate_auc)
  saver = tf.train.Saver()
  keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1])
  keys = tf.identity(keys_placeholder)

  # Create session to run
  with tf.Session() as sess:
    summary_op = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter(tensorboard_dir, sess.graph)
    sess.run(tf.initialize_all_variables())
    sess.run(tf.initialize_local_variables())

    if mode == "train":
      ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        print("Continue training from the model {}".format(
            ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)

      # Get coordinator and run queues to read data
      coord = tf.train.Coordinator()
      threads = tf.train.start_queue_runners(coord=coord, sess=sess)

      start_time = datetime.datetime.now()
      try:
        while not coord.should_stop():
          _, loss_value, step = sess.run([train_op, loss, global_step])

          if step % steps_to_validate == 0:
            train_accuracy_value, train_auc_value, validate_accuracy_value, auc_value, summary_value = sess.run(
                [train_accuracy, train_auc, validate_accuracy, validate_auc,
                 summary_op])
            end_time = datetime.datetime.now()
            print(
                "[{}] Step: {}, loss: {}, train_acc: {}, train_auc: {}, valid_acc: {}, valid_auc: {}".format(
                    end_time - start_time, step, loss_value,
                    train_accuracy_value, train_auc_value,
                    validate_accuracy_value, auc_value))

            writer.add_summary(summary_value, step)
            saver.save(sess, checkpoint_file, global_step=step)
            start_time = end_time
      except tf.errors.OutOfRangeError:
        print("Done training after reading all data")
        print("Exporting trained model to {}".format(FLAGS.model_path))
        model_exporter = exporter.Exporter(saver)
        model_exporter.init(
            sess.graph.as_graph_def(),
            named_graph_signatures={
                'inputs': exporter.generic_signature({"keys": keys_placeholder,
                                                      "indexs": sparse_index,
                                                      "ids": sparse_ids,
                                                      "values": sparse_values,
                                                      "shape": sparse_shape}),
                'outputs': exporter.generic_signature(
                    {"keys": keys,
                     "softmax": inference_softmax,
                     "prediction": inference_op})
            })
        model_exporter.export(FLAGS.model_path,
                              tf.constant(FLAGS.export_version), sess)
      finally:
        coord.request_stop()

      # Wait for threads to exit
      coord.join(threads)

    elif mode == "export":
      print("Start to export model directly")

      # Load the checkpoint files
      ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        print("Load the model from {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
      else:
        print("No checkpoint found, exit now")
        exit(1)

      # Export the model files
      print("Exporting trained model to {}".format(FLAGS.model_path))
      model_exporter = exporter.Exporter(saver)
      model_exporter.init(
          sess.graph.as_graph_def(),
          named_graph_signatures={
              'inputs': exporter.generic_signature({"keys": keys_placeholder,
                                                    "indexs": sparse_index,
                                                    "ids": sparse_ids,
                                                    "values": sparse_values,
                                                    "shape": sparse_shape}),
              'outputs': exporter.generic_signature(
                  {"keys": keys,
                   "softmax": inference_softmax,
                   "prediction": inference_op})
          })
      model_exporter.export(FLAGS.model_path,
                            tf.constant(FLAGS.export_version), sess)

    elif mode == "inference":
      print("Start to run inference")
      start_time = datetime.datetime.now()

      inference_result_file_name = "./inference_result.txt"
      inference_test_file_name = "./data/a8a_test.libsvm"
      labels = []
      feature_ids = []
      feature_values = []
      feature_index = []
      ins_num = 0
      for line in open(inference_test_file_name, "r"):
        tokens = line.split(" ")
        labels.append(int(tokens[0]))

        feature_num = 0
        for feature in tokens[1:]:
          feature_id, feature_value = feature.split(":")
          feature_ids.append(int(feature_id))
          feature_values.append(float(feature_value))
          feature_index.append([ins_num, feature_num])
          feature_num += 1
        ins_num += 1

      ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
      if ckpt and ckpt.model_checkpoint_path:
        print("Use the model {}".format(ckpt.model_checkpoint_path))
        saver.restore(sess, ckpt.model_checkpoint_path)
      else:
        print("No model found, exit now")
        exit(1)

      prediction, prediction_softmax = sess.run(
          [inference_op, inference_softmax],
          feed_dict={sparse_index: feature_index,
                     sparse_ids: feature_ids,
                     sparse_values: feature_values,
                     sparse_shape: [ins_num, FEATURE_SIZE]})

      end_time = datetime.datetime.now()
      print("[{}] Inference result: {}".format(end_time - start_time,
                                               prediction))

      # Compute accuracy
      label_number = len(labels)
      correct_label_number = 0
      for i in range(label_number):
        if labels[i] == prediction[i]:
          correct_label_number += 1
      accuracy = float(correct_label_number) / label_number

      # Compute auc
      expected_labels = np.array(labels)
      predict_labels = prediction_softmax[:, 0]
      fpr, tpr, thresholds = metrics.roc_curve(expected_labels,
                                               predict_labels,
                                               pos_label=0)
      auc = metrics.auc(fpr, tpr)
      print("For inference data, accuracy: {}, auc: {}".format(accuracy, auc))

      # Save inference result into file
      np.savetxt(inference_result_file_name, prediction, delimiter=",")
      print("Save result to file: {}".format(inference_result_file_name))
Example #14
0
    def _parse_function(self, sequence_example_proto):
        """Parse a SequenceExample in the AutoDL/TensorFlow format.

        Args:
          sequence_example_proto: a SequenceExample with "x_dense_input" or sparse
              input representation.
        Returns:
          An array of tensors. For first edition of AutoDl challenge, returns a
              pair `(features, labels)` where `features` is a Tensor of shape
                [sequence_size, row_count, col_count, num_channels]
              and `labels` a Tensor of shape
                [output_dim, ]
        """
        sequence_features = {}
        for i in range(self.metadata_.get_bundle_size()):
            if self.metadata_.is_sparse(i):
                sequence_features[self._feature_key(
                    i, "sparse_col_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_row_index")] = tf.VarLenFeature(tf.int64)
                sequence_features[self._feature_key(
                    i, "sparse_value")] = tf.VarLenFeature(tf.float32)
            elif self.metadata_.is_compressed(i):
                sequence_features[self._feature_key(
                    i, "compressed")] = tf.VarLenFeature(tf.string)
            else:
                sequence_features[self._feature_key(
                    i, "dense_input")] = tf.FixedLenSequenceFeature(
                    self.metadata_.get_tensor_size(i), dtype=tf.float32)
        print('sequence_features')
        print(sequence_features)
        contexts, features = tf.parse_single_sequence_example(
            sequence_example_proto,
            context_features={
                "label_index": tf.VarLenFeature(tf.int64),
                "label_score": tf.VarLenFeature(tf.float32)
            },
            sequence_features=sequence_features)
        print('features')
        print(features)
        sample = []
        for i in range(self.metadata_.get_bundle_size()):
            key_dense = self._feature_key(i, "dense_input")
            row_count, col_count = self.metadata_.get_matrix_size(i)
            num_channels = self.metadata_.get_num_channels(i)
            sequence_size = self.metadata_.get_sequence_size()
            fixed_matrix_size = row_count > 0 and col_count > 0
            row_count = row_count if row_count > 0 else None
            col_count = col_count if col_count > 0 else None
            if key_dense in features:
                f = features[key_dense]
                if not fixed_matrix_size:
                    raise ValueError("To parse dense data, the tensor shape should " +
                                     "be known but got {} instead..." \
                                     .format((sequence_size, row_count, col_count)))
                f = tf.reshape(f, [sequence_size, row_count, col_count, num_channels])
                sample.append(f)

            sequence_size = sequence_size if sequence_size > 0 else None
            key_compressed = self._feature_key(i, "compressed")
            if key_compressed in features:
                compressed_images = features[key_compressed].values
                decompress_image_func = \
                    lambda x: dataset_utils.decompress_image(x, num_channels=num_channels)
                # `images` here is a 4D-tensor of shape [T, H, W, C], some of which
                # might be unknown
                images = tf.map_fn(
                    decompress_image_func,
                    compressed_images, dtype=tf.float32)
                images.set_shape([sequence_size, row_count, col_count, num_channels])
                sample.append(images)

            key_sparse_val = self._feature_key(i, "sparse_value")
            if key_sparse_val in features:
                key_sparse_col = self._feature_key(i, "sparse_col_index")
                key_sparse_row = self._feature_key(i, "sparse_row_index")
                sparse_col = features[key_sparse_col].values
                sparse_row = features[key_sparse_row].values
                sparse_val = features[key_sparse_val]
                indices = sparse_val.indices
                indices = tf.concat([
                    tf.reshape(indices[:, 0], [-1, 1]),
                    tf.reshape(sparse_row, [-1, 1]),
                    tf.reshape(sparse_col, [-1, 1])
                ], 1)
                sparse_tensor = tf.sparse_reorder(
                    tf.SparseTensor(
                        indices, sparse_val.values,
                        [sequence_size, row_count, col_count]))
                # TODO: see how we can keep sparse tensors instead of
                # returning dense ones.
                tensor = tf.sparse_tensor_to_dense(sparse_tensor)
                tensor = tf.reshape(tensor,
                                    [sequence_size, row_count, col_count, 1])
                sample.append(tensor)

        labels = tf.sparse_to_dense(
            contexts["label_index"].values,
            (self.metadata_.get_output_size(),),
            contexts["label_score"].values,
            validate_indices=False)
        # sparse_tensor = tf.sparse.SparseTensor(indices=(contexts["label_index"].values,),
        #                                       values=contexts["label_score"].values,
        #                                       dense_shape=(self.metadata_.get_output_size(),))
        # labels = tf.sparse.to_dense(sparse_tensor, validate_indices=False)
        sample.append(labels)
        return sample
Example #15
0
    def testTwoThreads(self):
        with self.test_session() as sess:
            # Two threads, the first generates (0..24, "a").
            num_a = 25
            zero64 = tf.constant(0, dtype=tf.int64)
            examples = tf.Variable(zero64)
            counter = examples.count_up_to(num_a)
            sparse_counter = tf.SparseTensor(
                indices=tf.reshape(zero64, [1, 1]),
                values=tf.pack([tf.cast(counter, tf.float32)]),
                shape=[1])

            # The second generates (99, "b") 35 times and then stops.
            num_b = 35
            ninety_nine = tf.train.limit_epochs(
                tf.constant(99, dtype=tf.int64), num_b)
            sparse_ninety_nine = tf.SparseTensor(
                indices=tf.reshape(zero64, [1, 1]),
                values=tf.pack([tf.cast(ninety_nine, tf.float32)]),
                shape=[1])

            # These get joined together and grouped into batches of 5.
            batch_size = 5
            batched = tf.train.shuffle_batch_join(
                [[counter, sparse_counter, "a"],
                 [ninety_nine, sparse_ninety_nine, "b"]],
                batch_size=batch_size,
                capacity=32,
                min_after_dequeue=16,
                seed=223607)

            tf.initialize_all_variables().run()
            threads = tf.train.start_queue_runners()

            # Should see the "a" and "b" threads mixed together.
            all_a = []
            seen_b = 0
            saw_both = 0
            num_batches = (num_a + num_b) // batch_size
            for i in range(num_batches):
                results = sess.run(batched)
                tf.logging.info("Batch %d: %s", i, results[0])
                self.assertEqual(len(results[0]), batch_size)
                self.assertEqual(len(results[2]), batch_size)
                self.assertAllEqual(results[0], results[1].values)
                self.assertAllEqual(
                    results[1].indices,
                    np.vstack((np.arange(batch_size), np.zeros(batch_size))).T)
                self.assertAllEqual(results[1].shape, [batch_size, 1])
                which_a = [i for i, s in enumerate(results[2]) if s == b"a"]
                which_b = [i for i, s in enumerate(results[2]) if s == b"b"]
                self.assertEqual(len(which_a) + len(which_b), batch_size)
                if which_a and which_b: saw_both += 1
                all_a.extend([results[0][i] for i in which_a])
                seen_b += len(which_b)
                self.assertAllEqual([99] * len(which_b),
                                    [results[0][i] for i in which_b])

            # Some minimum level of mixing of the results of both threads.
            self.assertGreater(saw_both, 1)

            # Saw all the items from "a", but scrambled.
            self.assertItemsEqual(all_a, range(num_a))
            deltas = [all_a[i + 1] - all_a[i] for i in range(len(all_a) - 1)]
            self.assertFalse(all(d == deltas[0] for d in deltas))
            self.assertEqual(seen_b, num_b)

            # Reached the limit.
            with self.assertRaises(tf.errors.OutOfRangeError):
                sess.run(batched)
            for thread in threads:
                thread.join()
Example #16
0
    def __init__(self,
                 A,
                 X,
                 L,
                 K=1,
                 p_val=0.10,
                 p_test=0.05,
                 p_nodes=0.0,
                 n_hidden=None,
                 max_iter=2000,
                 tolerance=100,
                 scale=False,
                 seed=0,
                 verbose=True):
        """
        Parameters
        ----------
        A : scipy.sparse.spmatrix
            Sparse unweighted adjacency matrix
        X : scipy.sparse.spmatrix
            Sparse attribute matirx
        L : int
            Dimensionality of the node embeddings
        K : int
            Maximum distance to consider
        p_val : float
            Percent of edges in the validation set, 0 <= p_val < 1
        p_test : float
            Percent of edges in the test set, 0 <= p_test < 1
        p_nodes : float
            Percent of nodes to hide (inductive learning), 0 <= p_nodes < 1
        n_hidden : list(int)
            A list specifying the size of each hidden layer, default n_hidden=[512]
        max_iter :  int
            Maximum number of epoch for which to run gradient descent
        tolerance : int
            Used for early stopping. Number of epoch to wait for the score to improve on the validation set
        scale : bool
            Whether to apply the up-scaling terms.
        seed : int
            Random seed used to split the edges into train-val-test set
        verbose : bool
            Verbosity.
        """
        tf.reset_default_graph()
        tf.set_random_seed(seed)
        np.random.seed(seed)

        X = X.astype(np.float32)

        # completely hide some nodes from the network for inductive evaluation
        if p_nodes > 0:
            A = self.__setup_inductive(A, X, p_nodes)
        else:
            self.X = tf.SparseTensor(*sparse_feeder(X))
            self.feed_dict = None
            # self.X = tf.sparse_placeholder(tf.float32)
            # self.feed_dict = {self.X: sparse_feeder(X)}

        self.N, self.D = X.shape
        self.L = L
        self.max_iter = max_iter
        self.tolerance = tolerance
        self.scale = scale
        self.verbose = verbose

        if n_hidden is None:
            n_hidden = [512]
        self.n_hidden = n_hidden

        # hold out some validation and/or test edges
        # pre-compute the hops for each node for more efficient sampling
        if p_val + p_test > 0:
            train_ones, val_ones, val_zeros, test_ones, test_zeros = train_val_test_split_adjacency(
                A=A,
                p_val=p_val,
                p_test=p_test,
                seed=seed,
                neg_mul=1,
                every_node=True,
                connected=False,
                undirected=(A != A.T).nnz == 0)
            A_train = edges_to_sparse(train_ones, self.N)
            hops = get_hops(A_train, K)
        else:
            hops = get_hops(A, K)
        scale_terms = {
            h if h != -1 else max(hops.keys()) + 1:
            hops[h].sum(1).A1 if h != -1 else hops[1].shape[0] -
            hops[h].sum(1).A1
            for h in hops
        }
        self.__build()
        self.__dataset_generator(hops, scale_terms)
        self.__build_loss()

        # setup the validation set for easy evaluation
        if p_val > 0:
            val_edges = np.row_stack((val_ones, val_zeros))
            self.neg_val_energy = -self.energy_kl(val_edges)
            self.val_ground_truth = A[val_edges[:, 0], val_edges[:, 1]].A1
            self.val_early_stopping = True
        else:
            self.val_early_stopping = False

        # setup the test set for easy evaluation
        if p_test > 0:
            test_edges = np.row_stack((test_ones, test_zeros))
            self.neg_test_energy = -self.energy_kl(test_edges)
            self.test_ground_truth = A[test_edges[:, 0], test_edges[:, 1]].A1

        # setup the inductive test set for easy evaluation
        if p_nodes > 0:
            self.neg_ind_energy = -self.energy_kl(self.ind_pairs)
Example #17
0
def dense_to_sparse(tensor):
    tensor = tf.convert_to_tensor(tensor)
    indices = tf.where(tf.not_equal(tensor, tf.constant(0, tensor.dtype)))
    values = tf.gather_nd(tensor, indices)
    shape = tf.shape(tensor, out_type=tf.int64)
    return tf.SparseTensor(indices, values, shape)
Example #18
0
    def inference(self):
        """
        forward propagation
        :return: labels for each sample
        """
        v = tf.Variable(tf.truncated_normal(shape=[self.p, self.k],
                                            mean=0,
                                            stddev=0.01),
                        dtype='float32')

        # Factorization Machine
        with tf.variable_scope('FM'):
            b = tf.get_variable('bias',
                                shape=[2],
                                initializer=tf.zeros_initializer())
            w1 = tf.get_variable('w1',
                                 shape=[self.p, 2],
                                 initializer=tf.truncated_normal_initializer(
                                     mean=0, stddev=1e-2))
            # shape of [None, 2]
            self.linear_terms = tf.add(tf.matmul(self.X, w1), b)

            # shape of [None, 1]
            self.interaction_terms = tf.multiply(
                0.5,
                tf.reduce_mean(tf.subtract(
                    tf.pow(tf.matmul(self.X, v), 2),
                    tf.matmul(tf.pow(self.X, 2), tf.pow(v, 2))),
                               1,
                               keep_dims=True))
            # shape of [None, 2]
            self.y_fm = tf.add(self.linear_terms, self.interaction_terms)

        # three-hidden-layer neural network, network shape of (200-200-200)
        with tf.variable_scope('DNN', reuse=False):
            # embedding layer
            # zeros_cv = tf.constant([])
            # oh_cv_v =  tf.gather(v, self.oh_feature_inds)
            # for feat in config.CV_COLS:
            #     if self.cv_feature_dict[feat].values:
            #         oh_cv_v = tf.concat([oh_cv_v,zeros_cv],0)
            #     else:
            #         temp_cv = tf.gather(v, self.cv_feature_dict[feat].values)
            #         temp_cv = tf.reduce_mean(temp_cv, 0)
            #         #a/sum(self.cv_feature_dict[feat].values)
            #         oh_cv_v = tf.concat([oh_cv_v,temp_cv],0)
            #1.temp = tf.multiply(v,self.X,0?1)
            #embedding = tf.segment_mean(tf.multiply(v,tf.transpose(self.X,perm = [1,0])),self.field_inds)
            maxcol_zero = tf.get_variable('zeor_line',
                                          shape=[1, self.k],
                                          initializer=tf.zeros_initializer())
            v_concat = tf.concat([v, maxcol_zero], 0)
            sp_tensor = tf.SparseTensor(
                indices=self.sp_inds,
                values=self.sp_value,
                dense_shape=[
                    self.all_field_cnt * self.X.shape[1].value, self.max_cols
                ])
            embedding = tf.nn.embedding_lookup_sparse(v_concat,
                                                      sp_tensor,
                                                      None,
                                                      combiner='mean')
            y_embedding_input = tf.reshape(embedding,
                                           [-1, self.all_field_cnt * self.k])
            # first hidden layer
            w1 = tf.get_variable('w1_dnn',
                                 shape=[self.all_field_cnt * self.k, 200],
                                 initializer=tf.truncated_normal_initializer(
                                     mean=0, stddev=1e-2))
            b1 = tf.get_variable('b1_dnn',
                                 shape=[200],
                                 initializer=tf.constant_initializer(0.001))
            y_hidden_l1 = tf.nn.relu(tf.matmul(y_embedding_input, w1) + b1)
            # second hidden layer
            w2 = tf.get_variable('w2',
                                 shape=[200, 200],
                                 initializer=tf.truncated_normal_initializer(
                                     mean=0, stddev=1e-2))
            b2 = tf.get_variable('b2',
                                 shape=[200],
                                 initializer=tf.constant_initializer(0.001))
            y_hidden_l2 = tf.nn.relu(tf.matmul(y_hidden_l1, w2) + b2)
            # third hidden layer
            w3 = tf.get_variable('w1',
                                 shape=[200, 200],
                                 initializer=tf.truncated_normal_initializer(
                                     mean=0, stddev=1e-2))
            b3 = tf.get_variable('b1',
                                 shape=[200],
                                 initializer=tf.constant_initializer(0.001))
            y_hidden_l3 = tf.nn.relu(tf.matmul(y_hidden_l2, w3) + b3)
            # output layer
            w_out = tf.get_variable(
                'w_out',
                shape=[200, 2],
                initializer=tf.truncated_normal_initializer(mean=0,
                                                            stddev=1e-2))
            b_out = tf.get_variable('b_out',
                                    shape=[2],
                                    initializer=tf.constant_initializer(0.001))
            self.y_dnn = tf.nn.relu(tf.matmul(y_hidden_l3, w_out) + b_out)
        # add FM output and DNN output
        self.y_out = tf.add(self.y_fm, self.y_dnn)
        self.y_out_prob = tf.nn.softmax(self.y_out)
Example #19
0
def octree_bilinear_v3(pts, data, octree, depth):
    with tf.variable_scope('octree_linear'):
        mask = tf.constant([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1],
                            [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]],
                           dtype=tf.float32)
        if octree_key64:
            masku = tf.constant([
                0, 4294967296, 65536, 4295032832, 1, 4294967297, 65537,
                4295032833
            ],
                                dtype=tf.int64)
        else:
            masku = tf.constant([0, 65536, 256, 65792, 1, 65537, 257, 65793],
                                dtype=tf.int32)

        maskc = 1 - mask

        xyzf, ids = tf.split(pts, [3, 1], 1)
        xyzf = xyzf - 0.5  # since the value is defined on the center of each voxel
        xyzi = tf.floor(xyzf)  # the integer part  (N, 3)
        frac = xyzf - xyzi  # the fraction part (N, 3)

        key = tf.cast(tf.concat([xyzi, ids], axis=1), dtype=tf_uints)
        key = tf.cast(octree_encode_key(key), dtype=tf_intk)
        # Cast the key to `int32` since the `add` below does not support `uint64`
        # The size effect is that the batch_size must be smaller than 128
        key = tf.expand_dims(key, 1) + masku  # (N, 8),
        key = tf.cast(tf.reshape(key, [-1]), dtype=tf_uintk)

        idx = octree_search_key(key, octree, depth)  # (N*8,)
        flgs = idx > -1  # filtering flags
        idx = tf.boolean_mask(idx, flgs)

        npt = tf.shape(xyzi)[0]
        ids = tf.reshape(tf.range(npt), [-1, 1])
        ids = tf.reshape(tf.tile(ids, [1, 8]), [-1])  # (N*8,)
        ids = tf.boolean_mask(ids, flgs)

        frac = maskc - tf.expand_dims(frac, axis=1)
        weight = tf.abs(tf.reshape(tf.reduce_prod(frac, axis=2), [-1]))
        weight = tf.boolean_mask(weight, flgs)

        indices = tf.concat([tf.expand_dims(ids, 1),
                             tf.expand_dims(idx, 1)], 1)
        indices = tf.cast(indices, tf.int64)
        data = tf.squeeze(data, [0, 3])  # (C, H)
        h = tf.shape(data)[1]
        mat = tf.SparseTensor(indices=indices,
                              values=weight,
                              dense_shape=[npt, h])

        # channel, max_channel = int(data.shape[0]), 512
        # if channel > max_channel:
        #   num = channel // max_channel
        #   remain = channel % max_channel
        #   splits = [max_channel] * num
        #   if remain != 0:
        #     splits.append(remain)
        #     num += 1
        #   output_split = [None] * num
        #   data_split = tf.split(data, splits, axis=0)
        #   for i in range(num):
        #     with tf.name_scope('mat_%d' % i):
        #       output_split[i] = tf.sparse.sparse_dense_matmul(
        #           mat, data_split[i], adjoint_a=False, adjoint_b=True)
        #   output = tf.concat(output_split, axis=1)
        # else:
        #   output = tf.sparse.sparse_dense_matmul(mat, data, adjoint_a=False, adjoint_b=True)

        output = tf.sparse.sparse_dense_matmul(mat,
                                               data,
                                               adjoint_a=False,
                                               adjoint_b=True)
        norm = tf.sparse.sparse_dense_matmul(mat, tf.ones([h, 1]))
        output = tf.div(output, norm + 1.0e-10)  # avoid dividing by zeros
        output = tf.expand_dims(tf.expand_dims(tf.transpose(output), 0), -1)
    return output
Example #20
0
    def DoOneRun(self,
                 run_id,
                 rf_number,
                 nn_replication,
                 prefix='',
                 seed=0,
                 batch_count=1):
        batch_size = self.config.batch_size

        self.config.rf_number = rf_number
        self.config.rf_file_name = ('features_' + prefix + '_' +
                                    str(rf_number) + '_' + str(run_id) +
                                    '.pkl')
        srf = rf.GenerateOrLoadRF(self.config, seed=run_id + 2718281828 + seed)

        if isinstance(nn_replication, (list, tuple)):
            self.skeleton.SetReplication(nn_replication)
        else:
            self.skeleton.SetReplication(
                [int(x * nn_replication) for x in self.original_replication])
        with tf.Graph().as_default(), tf.Session('') as sess:
            examples = self.get_inputs(batch_size)

            # Calculate the exact gram matrix for the batch
            gram = tf.reshape(kf.Kernel(self.skeleton, examples, examples),
                              [batch_size, batch_size])

            # Calculate the approximate gram matrix using a neural net
            rep, _ = NN.NeuralNet(self.skeleton, self.config, examples)
            srep = tf.squeeze(rep)
            approx_gram = tf.matmul(srep, tf.transpose(srep))

            # Normalize the approximate gram matrix to so that the norm of
            # each element is 1.
            norms = tf.reshape(tf.sqrt(tf.diag_part(approx_gram)), [-1, 1])
            nn_gram = tf.div(approx_gram, tf.matmul(norms,
                                                    tf.transpose(norms)))

            # Compute the approximate gram matrix using random features
            parameters = tf.constant(
                np.zeros((rf_number,
                          self.config.number_of_classes)).astype(np.float32))
            rand_features = tf.SparseTensor(srf.features[0], srf.features[1],
                                            srf.features[2])
            _, rf_vectors = rf.RandomFeaturesGraph(
                self.skeleton, self.config.number_of_classes, examples,
                rf_number, rand_features, parameters, srf.weights)
            rf_gram = tf.matmul(rf_vectors, rf_vectors, transpose_b=True)
            sess.run(tf.global_variables_initializer())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess, coord)
            RF_K_stat = Stat()
            NN_K_stat = Stat()
            for i in xrange(batch_count):
                gram_np, nn_gram_np, rf_gram_np, approx_gram_np = sess.run(
                    [gram, nn_gram, rf_gram, approx_gram])
                RF_K_stat.AddToStat(gram_np, rf_gram_np)
                NN_K_stat.AddToStat(gram_np, nn_gram_np)
            coord.request_stop()
            coord.join(threads)
            return NN_K_stat, RF_K_stat
Example #21
0
    def _test_set_size_3d(self, dtype, invalid_indices=False):
        if invalid_indices:
            indices = tf.constant(
                [
                    [0, 1, 0],
                    [0, 1, 1],  # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],
                    [1, 1, 2],  # 1,1
                    [0, 0, 0],
                    [0, 0, 2],  # 0,0
                    # 2,0
                    [2, 1, 1]  # 2,1
                ],
                tf.int64)
        else:
            indices = tf.constant(
                [
                    [0, 0, 0],
                    [0, 0, 2],  # 0,0
                    [0, 1, 0],
                    [0, 1, 1],  # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],
                    [1, 1, 2],  # 1,1
                    # 2,0
                    [2, 1, 1]  # 2,1
                ],
                tf.int64)

        sp = tf.SparseTensor(
            indices,
            _constant(
                [
                    1,
                    9,  # 0,0
                    3,
                    3,  # 0,1
                    1,  # 1,0
                    9,
                    7,
                    8,  # 1,1
                    # 2,0
                    5  # 2,1
                ],
                dtype),
            tf.constant([3, 2, 3], tf.int64))

        if invalid_indices:
            with self.assertRaisesRegexp(tf.OpError, "out of order"):
                self._set_size(sp)
        else:
            self.assertAllEqual(
                [
                    [
                        2,  # 0,0
                        1
                    ],  # 0,1
                    [
                        1,  # 1,0
                        3
                    ],  # 1,1
                    [
                        0,  # 2,0
                        1
                    ]  # 2,1
                ],
                self._set_size(sp))
Example #22
0
def tr_te_dataset(data_tr,
                  data_te,
                  batch_size,
                  mode="basic",
                  user_info=None,
                  vad_userId_map=None,
                  n2v_vectors=None,
                  train_data=None,
                  userId_map=None):
    # https://www.tensorflow.org/performance/performance_guide makes me think that I'm doing
    # something wrong, because my GPU usage hovers near 0 usually. That's v disappointing. I hope
    # I can speed it up hugely...
    # This is going to take in the output of data_tr and data_te, and turn them into
    # things we can sample from.

    # The only worry I have is, I don't know exactly how to do the whole "masking" part in here..

    # The way it works is, load_train_data just loads in training data, while load_tr_te_data
    # has goal-vectors as well. These are the ones that you drop-out. So, this really should be fine.

    assert type(data_tr) != type(None)
    assert type(data_te) != type(None)
    assert type(batch_size) != type(None)

    data_tr = data_tr.astype(np.float32)
    data_tr_coo = data_tr.tocoo()

    data_te = data_te.astype(np.float32)
    data_te_coo = data_te.tocoo()

    n_items = data_tr_coo.shape[1]
    if mode == "one_hot":
        assert type(user_info) != type(None)
        assert type(vad_userId_map) != type(None)
        vad_user_info_cut = user_info[user_info['userId'].isin(
            vad_userId_map.values())]
        vad_user_info_cut = vad_user_info_cut.set_index(keys='userId')
        vad_user_info_matrix = vad_user_info_cut.loc[
            vad_userId_map.values()].values
        vad_user_info_matrix = vad_user_info_matrix.astype(np.float32)

        vad_data_tr = np.concatenate((data_tr.todense(), vad_user_info_matrix),
                                     axis=1)
        vad_data_tr = tf.convert_to_tensor(vad_data_tr)
        vad_data_te = np.concatenate((data_te.todense(), vad_user_info_matrix),
                                     axis=1)
        vad_data_te = tf.convert_to_tensor(vad_data_te)

        samples_tr = tf.data.Dataset.from_tensor_slices(vad_data_tr)
        samples_te = tf.data.Dataset.from_tensor_slices(vad_data_te)
        dataset = tf.data.Dataset.zip(
            (samples_tr, samples_te)).shuffle(10000).batch(batch_size,
                                                           drop_remainder=True)

        expected_shape = tf.TensorShape(
            [batch_size, n_items + vad_user_info_cut.shape[1]])
    elif mode == "node2vec":
        assert type(n2v_vectors) != type(None)
        assert type(train_data) != type(None)
        assert type(userId_map) != type(None)
        n2v_tr = get_weighted_sum(n2v_vectors, data_tr.todense(),
                                  train_data.todense(), userId_map)
        vad_data_tr = np.concatenate((data_tr.todense(), n2v_tr), axis=1)
        vad_data_tr = tf.convert_to_tensor(vad_data_tr)
        n2v_te = get_weighted_sum(n2v_vectors, data_te.todense(),
                                  train_data.todense(), userId_map)
        vad_data_te = np.concatenate((data_te.todense(), n2v_te), axis=1)
        vad_data_te = tf.convert_to_tensor(vad_data_te)

        samples_tr = tf.data.Dataset.from_tensor_slices(vad_data_tr)
        samples_te = tf.data.Dataset.from_tensor_slices(vad_data_te)
        dataset = tf.data.Dataset.zip(
            (samples_tr, samples_te)).shuffle(10000).batch(batch_size,
                                                           drop_remainder=True)

        expected_shape = tf.TensorShape(
            [batch_size, n_items + n2v_vectors.vectors.shape[1]])
    elif mode == "node2vec_user_info":
        assert type(n2v_vectors) != type(None)
        assert type(user_info) != type(None)
        assert type(userId_map) != type(None)
        assert type(vad_userId_map) != type(None)
        user_info_cut = user_info[user_info['userId'].isin(
            userId_map.values())]
        user_info_cut = user_info_cut.set_index(keys='userId')
        user_info_matrix = user_info_cut.loc[userId_map.values()].values
        user_info_matrix = user_info_matrix.astype(np.float32)

        vad_user_info_cut = user_info[user_info['userId'].isin(
            vad_userId_map.values())]
        vad_user_info_cut = vad_user_info_cut.set_index(keys='userId')
        vad_user_info_matrix = vad_user_info_cut.loc[
            vad_userId_map.values()].values
        vad_user_info_matrix = vad_user_info_matrix.astype(np.float32)

        n2v = get_weighted_sum(n2v_vectors, vad_user_info_matrix,
                               user_info_matrix, userId_map)
        vad_data_tr = np.concatenate((data_tr.todense(), n2v), axis=1)
        vad_data_tr = tf.convert_to_tensor(vad_data_tr)
        vad_data_te = np.concatenate((data_te.todense(), n2v), axis=1)
        vad_data_te = tf.convert_to_tensor(vad_data_te)

        samples_tr = tf.data.Dataset.from_tensor_slices(vad_data_tr)
        samples_te = tf.data.Dataset.from_tensor_slices(vad_data_te)
        dataset = tf.data.Dataset.zip(
            (samples_tr, samples_te)).shuffle(10000).batch(batch_size,
                                                           drop_remainder=True)

        expected_shape = tf.TensorShape(
            [batch_size, n_items + n2v_vectors.vectors.shape[1]])
    else:
        indices = np.mat([data_tr_coo.row, data_tr_coo.col]).transpose()
        sparse_data_tr = tf.SparseTensor(indices, data_tr_coo.data,
                                         data_tr_coo.shape)

        indices = np.mat([data_te_coo.row, data_te_coo.col]).transpose()
        sparse_data_te = tf.SparseTensor(indices, data_te_coo.data,
                                         data_te_coo.shape)

        samples_tr = tf.data.Dataset.from_tensor_slices(sparse_data_tr)
        samples_te = tf.data.Dataset.from_tensor_slices(sparse_data_te)

        # 10000 might be too big to sample from... Not sure how that's supposed to work with batch anyways.
        dataset = tf.data.Dataset.zip(
            (samples_tr, samples_te)).shuffle(10000).batch(batch_size,
                                                           drop_remainder=True)

        dataset = dataset.map(lambda x, y: (tf.sparse_tensor_todense(x),
                                            tf.sparse_tensor_todense(y)))

        expected_shape = tf.TensorShape([batch_size, n_items])

    dataset = dataset.apply(
        tf.contrib.data.assert_element_shape((expected_shape, expected_shape)))

    # dataset = dataset.skip(15)

    return dataset
Example #23
0
    def _test_sparse_set_difference_3d(self, dtype, invalid_indices=False):
        if invalid_indices:
            indices = tf.constant(
                [
                    [0, 1, 0],
                    [0, 1, 1],  # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],
                    [1, 1, 2],  # 1,1
                    [0, 0, 0],
                    [0, 0, 2],  # 0,0
                    # 2,0
                    [2, 1, 1]  # 2,1
                    # 3,*
                ],
                tf.int64)
        else:
            indices = tf.constant(
                [
                    [0, 0, 0],
                    [0, 0, 2],  # 0,0
                    [0, 1, 0],
                    [0, 1, 1],  # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],
                    [1, 1, 2],  # 1,1
                    # 2,0
                    [2, 1, 1]  # 2,1
                    # 3,*
                ],
                tf.int64)
        sp_a = tf.SparseTensor(
            indices,
            _constant(
                [
                    1,
                    9,  # 0,0
                    3,
                    3,  # 0,1
                    1,  # 1,0
                    9,
                    7,
                    8,  # 1,1
                    # 2,0
                    5  # 2,1
                    # 3,*
                ],
                dtype),
            tf.constant([4, 2, 3], tf.int64))
        sp_b = tf.SparseTensor(
            tf.constant(
                [
                    [0, 0, 0],
                    [0, 0, 3],  # 0,0
                    # 0,1
                    [1, 0, 0],  # 1,0
                    [1, 1, 0],
                    [1, 1, 1],  # 1,1
                    [2, 0, 1],  # 2,0
                    [2, 1, 1],  # 2,1
                    [3, 0, 0],  # 3,0
                    [3, 1, 0]  # 3,1
                ],
                tf.int64),
            _constant(
                [
                    1,
                    3,  # 0,0
                    # 0,1
                    3,  # 1,0
                    7,
                    8,  # 1,1
                    2,  # 2,0
                    5,  # 2,1
                    4,  # 3,0
                    4  # 3,1
                ],
                dtype),
            tf.constant([4, 2, 4], tf.int64))

        if invalid_indices:
            with self.assertRaisesRegexp(tf.OpError, "out of order"):
                self._set_difference(sp_a, sp_b, False)
            with self.assertRaisesRegexp(tf.OpError, "out of order"):
                self._set_difference(sp_a, sp_b, True)
        else:
            # a-b
            expected_indices = [
                [0, 0, 0],  # 0,0
                [0, 1, 0],  # 0,1
                [1, 0, 0],  # 1,0
                [1, 1, 0],  # 1,1
                # 2,*
                # 3,*
            ]
            expected_values = _values(
                [
                    9,  # 0,0
                    3,  # 0,1
                    1,  # 1,0
                    9,  # 1,1
                    # 2,*
                    # 3,*
                ],
                dtype)
            expected_shape = [4, 2, 1]
            expected_counts = [
                [
                    1,  # 0,0
                    1  # 0,1
                ],
                [
                    1,  # 1,0
                    1  # 1,1
                ],
                [
                    0,  # 2,0
                    0  # 2,1
                ],
                [
                    0,  # 3,0
                    0  # 3,1
                ]
            ]

            difference = self._set_difference(sp_a, sp_b, True)
            self._assert_set_operation(expected_indices,
                                       expected_values,
                                       expected_shape,
                                       difference,
                                       dtype=dtype)
            self.assertAllEqual(expected_counts,
                                self._set_difference_count(sp_a, sp_b))

            # b-a
            expected_indices = [
                [0, 0, 0],  # 0,0
                # 0,1
                [1, 0, 0],  # 1,0
                # 1,1
                [2, 0, 0],  # 2,0
                # 2,1
                [3, 0, 0],  # 3,0
                [3, 1, 0]  # 3,1
            ]
            expected_values = _values(
                [
                    3,  # 0,0
                    # 0,1
                    3,  # 1,0
                    # 1,1
                    2,  # 2,0
                    # 2,1
                    4,  # 3,0
                    4,  # 3,1
                ],
                dtype)
            expected_shape = [4, 2, 1]
            expected_counts = [
                [
                    1,  # 0,0
                    0  # 0,1
                ],
                [
                    1,  # 1,0
                    0  # 1,1
                ],
                [
                    1,  # 2,0
                    0  # 2,1
                ],
                [
                    1,  # 3,0
                    1  # 3,1
                ]
            ]

            difference = self._set_difference(sp_a, sp_b, False)
            self._assert_set_operation(expected_indices,
                                       expected_values,
                                       expected_shape,
                                       difference,
                                       dtype=dtype)
            self.assertAllEqual(expected_counts,
                                self._set_difference_count(sp_a, sp_b, False))
def parse_csv_line(line, vocabulary, config):
    # tf.decode_csv converts CSV records to tensors. Not read CSV files!
    # Standard procedure to read any file is with tf.data.TextLineDataset
    # After reading the file into a tensor (NUM_LINES x 1), we interpret the tensor as being in CSV format
    # Each line in that tensor is a scalar string
    # Which means we assume every row of tensor (corresponding to every line in file) has
    # multiple columns delimited by the specified delimiter
    # The output we get is a tensor (NUM_LINES, NUM_COLUMNS)
    fields = tf.decode_csv(line, config['data']['csv_column_defaults'])

    # Note that INPUT_CSV_COLUMNS is (1 x NUM_COLUMNS) while fields is (NUM_LINES, NUM_COLUMNS)
    # So zipping gives NUM_COLUMNS tuples (COLUMN_NAME, (NUM_LINES x 1)), from which we create a dict
    features = dict(zip(config['data']['csv_columns'], fields))

    # Split string into characters
    # IMPORTANT NOTE: tf.string_split returns a SparseTensor of rank 2,
    # the strings split according to the delimiter. Read more about how SparseTensors are represented
    text = tf.string_split([features[config['data']['csv_columns'][0]]],
                           delimiter="")

    # Once we have character SparseTensors, we need to encode the characters as numbers
    # Traditional way is to have one hot encoding or a one hot encoding + embedding matrix
    # When you use one hot encoding + embedding matrix, you are basically choosing a row of embedding matrix
    # So to make it faster, tensorflow expects input to embedding layer as the index of the row,
    # instead of having one hot vectors to be multiplied with embedding matrix
    # So we will maintain a Vocabulary where every character we care about has an associated number as 1-to-1
    # This looks like a map operation for which tensorflow has tf.map_fn

    # Now note that SparseTensors do not support all usual Tensor operations
    # To use tf.map_fn on a SparseTensor, we have to create a new SparseTensor in the following way

    # Also note that embedding layer will expect indexes of dtype tf.int64
    # Also, the vocabulary dict stores values as int64

    text_idx = tf.SparseTensor(
        text.indices,
        tf.map_fn(vocabulary.text2idx, text.values, dtype=tf.int64),
        text.dense_shape)

    # We have to convert this SparseTensor back to dense to support future operations
    text_idx = tf.sparse_tensor_to_dense(text_idx)  # Shape - (1, T)
    text_idx = tf.squeeze(text_idx)  # Shape - (T,)

    # We also require lengths of every input sequence as inputs to model
    # This ia because we will create batches of variable length input
    # where all sequences are forced to same length by padding at the end with 0s
    # This batch will be passed to an Dynamic RNN which will use sequence lengths
    # to mask the outputs appropriately. The RNN will be unrolled to the common length though
    # This method enables us to do mini batch SGD for variable length inputs

    input_sequence_lengths = tf.size(text_idx)  # Scalar

    # We are done with processing text (which is out input to Tacotron)
    # Lets move onto audio (which will be our targets)
    # This part is standard code for obtaining MFCC from audio as given in TF documentation
    # You can read more about what are fourier transform, spectrograms and MFCCs to get an idea

    audio_binary = tf.read_file(features[config['data']['csv_columns'][1]])

    # Sample rate used in paper is 16000, channel count should be 1 for tacotron 2
    # STFT configuration values specified in paper
    waveform = ffmpeg.decode_audio(
        audio_binary,
        file_format='wav',
        samples_per_second=config['data']['wav_sample_rate'],
        channel_count=1)

    stfts = tf.contrib.signal.stft(tf.transpose(waveform),
                                   frame_length=config['data']['frame_length'],
                                   frame_step=config['data']['frame_step'],
                                   fft_length=config['data']['fft_length'])
    magnitude_spectrograms = tf.abs(stfts)
    num_spectrogram_bins = magnitude_spectrograms.shape[-1].value

    # These are to be set according to human speech. Values specified in the paper
    lower_edge_hertz, upper_edge_hertz, num_mel_bins = config['data']['lower_edge_hertz'], \
                                                       config['data']['upper_edge_hertz'], \
                                                       config['data']['num_mel_bins']

    linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, config['data']['wav_sample_rate'],
        lower_edge_hertz, upper_edge_hertz)
    mel_spectrograms = tf.tensordot(magnitude_spectrograms,
                                    linear_to_mel_weight_matrix, 1)

    mel_spectrograms = tf.squeeze(
        mel_spectrograms)  # Removes all dimensions that are 1

    # This finishes processing of audio
    # Now we build the targets and inputs to the decoder

    # We append a frame of 0s at the end of targets to signal end of target
    end_tensor = tf.tile([[0.0]],
                         multiples=[1, tf.shape(mel_spectrograms)[-1]])
    targets = tf.concat([mel_spectrograms, end_tensor], axis=0)

    # We append a frame of 0s at the start of decoder_inputs to set input at t=1
    start_tensor = tf.tile([[0.0]],
                           multiples=[1, tf.shape(mel_spectrograms)[-1]])
    target_inputs = tf.concat([start_tensor, mel_spectrograms], axis=0)

    # Again, we require lengths of every target sequence as inputs to model
    # This ia because we will create batches of variable length input
    # where all sequences are forced to same length by padding at the end with 0s
    # This batch will be passed to an Dynamic RNN which will use sequence lengths
    # to mask the outputs appropriately. The RNN will be unrolled to the common length though
    # This method enables us to do mini batch SGD for variable length inputs
    target_sequence_lengths = tf.shape(targets)[0]

    # Now we return the values that our model requires as a dict (just like old feed_dict structure)
    return {
        'inputs': text_idx,
        'targets': targets,
        'input_sequence_lengths': input_sequence_lengths,
        'target_sequence_lengths': target_sequence_lengths,
        'target_inputs': target_inputs,
        'debug_data': waveform
    }
Example #25
0
def build_sparse_matrix(L):
    L = L.tocoo()
    indices = np.column_stack((L.row, L.col))
    L = tf.SparseTensor(indices, L.data, L.shape)
    return tf.sparse_reorder(L)
Example #26
0
def random_sparse(shape, nnz):
    max_index = np.prod(shape)
    indices = np.random.choice(max_index, nnz, replace=False)
    indices.sort()
    indices = np.stack(np.unravel_index(indices, shape), axis=-1)
    return tf.SparseTensor(indices, np.random.normal(size=(nnz, )), shape)
Example #27
0
    def sample_body(n,
                    sample,
                    n_produced=0,
                    n_total_drawn=0,
                    eff=1.0,
                    is_sampled=None,
                    weights_scaling=0.):
        eff = tf.reduce_max(input_tensor=[eff, ztf.to_real(1e-6)])

        n_to_produce = n - n_produced

        if isinstance(
                limits,
                EventSpace):  # EXPERIMENTAL(Mayou36): added to test EventSpace
            limits.create_limits(n=n)

        do_print = settings.get_verbosity() > 5
        if do_print:
            print_op = tf.print("Number of samples to produce:", n_to_produce,
                                " with efficiency ", eff,
                                " with total produced ", n_produced,
                                " and total drawn ", n_total_drawn,
                                " with weights scaling", weights_scaling)
        with tf.control_dependencies([print_op] if do_print else []):
            n_to_produce = tf.identity(n_to_produce)
        if dynamic_array_shape:
            n_to_produce = tf.cast(ztf.to_real(n_to_produce) / eff * 1.1,
                                   dtype=tf.int32) + 10  # just to make sure
            # TODO: adjustable efficiency cap for memory efficiency (prevent too many samples at once produced)
            max_produce_cap = tf.cast(8e5, dtype=tf.int32)
            safe_to_produce = tf.maximum(
                max_produce_cap,
                n_to_produce)  # protect against overflow, n_to_prod -> neg.
            n_to_produce = tf.minimum(
                safe_to_produce,
                max_produce_cap)  # introduce a cap to force serial
            new_limits = limits
        else:
            # TODO(Mayou36): add cap for n_to_produce here as well
            if multiple_limits:
                raise WorkInProgressError(
                    "Multiple limits for fixed event space not yet implemented"
                )
            is_not_sampled = tf.logical_not(is_sampled)
            (lower, ), (upper, ) = limits.limits
            lower = tuple(
                tf.boolean_mask(tensor=low, mask=is_not_sampled)
                for low in lower)
            upper = tuple(
                tf.boolean_mask(tensor=up, mask=is_not_sampled)
                for up in upper)
            new_limits = limits.with_limits(limits=((lower, ), (upper, )))
            draw_indices = tf.where(is_not_sampled)

        rnd_sample, thresholds_unscaled, weights, weights_max, n_drawn = sample_and_weights(
            n_to_produce=n_to_produce, limits=new_limits, dtype=dtype)

        n_drawn = tf.cast(n_drawn, dtype=tf.int32)
        if run.numeric_checks:
            assert_op_n_drawn = tf.compat.v1.assert_non_negative(n_drawn)
            tfdeps = [assert_op_n_drawn]
        else:
            tfdeps = []
        with tf.control_dependencies(tfdeps):
            n_total_drawn += n_drawn

            probabilities = prob(rnd_sample)
        shape_rnd_sample = tf.shape(input=rnd_sample)[0]
        if run.numeric_checks:
            assert_prob_rnd_sample_op = tf.compat.v1.assert_equal(
                tf.shape(input=probabilities), shape_rnd_sample)
            tfdeps = [assert_prob_rnd_sample_op]
        else:
            tfdeps = []
        # assert_weights_rnd_sample_op = tf.assert_equal(tf.shape(weights), shape_rnd_sample)
        # print_op = tf.print("shapes: ", tf.shape(weights), shape_rnd_sample, "shapes end")
        with tf.control_dependencies(tfdeps):
            probabilities = tf.identity(probabilities)
        if prob_max is None or weights_max is None:  # TODO(performance): estimate prob_max, after enough estimations -> fix it?
            # TODO(Mayou36): This control dependency is needed because otherwise the max won't be determined
            # correctly. A bug report on will be filled (WIP).
            # The behavior is very odd: if we do not force a kind of copy, the `reduce_max` returns
            # a value smaller by a factor of 1e-14
            # with tf.control_dependencies([probabilities]):
            # UPDATE: this works now? Was it just a one-time bug?

            # safety margin, predicting future, improve for small samples?
            weights_maximum = tf.reduce_max(input_tensor=weights)
            weights_clipped = tf.maximum(weights, weights_maximum * 1e-5)
            # prob_weights_ratio = probabilities / weights
            prob_weights_ratio = probabilities / weights_clipped
            # min_prob_weights_ratio = tf.reduce_min(prob_weights_ratio)
            max_prob_weights_ratio = tf.reduce_max(
                input_tensor=prob_weights_ratio)
            ratio_threshold = 50000000.
            # clipping means that we don't scale more for a certain threshold
            # to properly account for very small numbers, the thresholds should be scaled to match the ratio
            # but if a weight of a sample is very low (compared to the other weights), this would force the acceptance
            # of other samples to decrease strongly. We introduce a cut here, meaning that any event with an acceptance
            # chance of less then 1 in ratio_threshold will be underestimated.
            # TODO(Mayou36): make ratio_threshold a global setting
            # max_prob_weights_ratio_clipped = tf.minimum(max_prob_weights_ratio,
            #                                             min_prob_weights_ratio * ratio_threshold)
            max_prob_weights_ratio_clipped = max_prob_weights_ratio
            weights_scaling = tf.maximum(
                weights_scaling, max_prob_weights_ratio_clipped * (1 + 1e-2))
        else:
            weights_scaling = prob_max / weights_max
            min_prob_weights_ratio = weights_scaling

        weights_scaled = weights_scaling * weights * (1 + 1e-8
                                                      )  # numerical epsilon
        random_thresholds = thresholds_unscaled * weights_scaled
        if run.numeric_checks:
            invalid_probs_weights = tf.greater(probabilities, weights_scaled)
            failed_weights = tf.boolean_mask(tensor=weights_scaled,
                                             mask=invalid_probs_weights)
            failed_probs = tf.boolean_mask(tensor=probabilities,
                                           mask=invalid_probs_weights)

            def bias_print():
                tf.print(
                    "HACK WARNING: if the following is NOT empty, your sampling _may_ be biased."
                    " Failed weights:", failed_weights, " failed probs",
                    failed_probs)

            # tf.cond(tf.not_equal(tf.shape(input=failed_weights), [0]), bias_print, lambda: None)

            assert_no_failed_probs = tf.compat.v1.assert_equal(
                tf.shape(input=failed_weights), [0])
            # assert_op = [print_op]
            assert_op = [assert_no_failed_probs]
            # for weights scaled more then ratio_threshold
            # assert_op = [tf.assert_greater_equal(x=weights_scaled, y=probabilities,
            #                                      data=[tf.shape(failed_weights), failed_weights, failed_probs],
            #                                      message="Not all weights are >= probs so the sampling "
            #                                              "will be biased. If a custom `sample_and_weights` "
            #                                              "was used, make sure that either the shape of the "
            #                                              "custom sampler (resp. it's weights) overlap better "
            #                                              "or decrease the `max_weight`")]
            #
            # # check disabled (below not added to deps)
            # assert_scaling_op = tf.assert_less(weights_scaling / min_prob_weights_ratio, z.constant(ratio_threshold),
            #                                    data=[weights_scaling, min_prob_weights_ratio],
            #                                    message="The ratio between the probabilities from the pdf and the"
            #                                    f"probability from the sampler is higher "
            #                                    f" then {ratio_threshold}. This will most probably bias the sampling. "
            #                                    f"Use importance sampling or, to disable this check, do"
            #                                    f"zfit.run.numeric_checks = False")
            # assert_op.append(assert_scaling_op)
        else:
            assert_op = []
        with tf.control_dependencies(assert_op):
            take_or_not = probabilities > random_thresholds
        take_or_not = take_or_not[0] if len(
            take_or_not.shape) == 2 else take_or_not
        filtered_sample = tf.boolean_mask(tensor=rnd_sample,
                                          mask=take_or_not,
                                          axis=0)

        n_accepted = tf.shape(input=filtered_sample)[0]
        n_produced_new = n_produced + n_accepted
        if not dynamic_array_shape:
            indices = tf.boolean_mask(tensor=draw_indices, mask=take_or_not)
            current_sampled = tf.sparse.to_dense(tf.SparseTensor(
                indices=indices,
                values=tf.broadcast_to(input=(True, ), shape=(n_accepted, )),
                dense_shape=(tf.cast(n, dtype=tf.int64), )),
                                                 default_value=False)
            is_sampled = tf.logical_or(is_sampled, current_sampled)
            indices = indices[:, 0]
        else:
            indices = tf.range(n_produced, n_produced_new)

        sample_new = sample.scatter(indices=tf.cast(indices, dtype=tf.int32),
                                    value=filtered_sample)

        # efficiency (estimate) of how many samples we get
        eff = tf.reduce_max(
            input_tensor=[ztf.to_real(
                n_produced_new), ztf.to_real(1.)]) / tf.reduce_max(
                    input_tensor=[ztf.to_real(n_total_drawn),
                                  ztf.to_real(1.)])
        return n, sample_new, n_produced_new, n_total_drawn, eff, is_sampled, weights_scaling
Example #28
0
 def testBatchedSparseTensorInferedShapeEnqueueMany(self):
     sparse = tf.SparseTensor(indices=[[0]], values=[1.0], shape=[1])
     self.assertAllEqual(sparse.shape.get_shape().as_list(), [1])
     batched = tf.train.batch([sparse], batch_size=2, enqueue_many=True)
     self.assertAllEqual(batched.shape.get_shape().as_list(), [1])
Example #29
0
def crnn_fn(features, labels, mode, params):
    """
    :param features: dict {
                            'image'
                            'images_width'
                            'corpora'
                            }
    :param labels: labels. flattend (1D) array with encoded label (one code per character)
    :param mode:
    :param params: dict {
                            'Params'
                        }
    :return:
    """

    parameters = params.get('Params')
    assert isinstance(parameters, Params)

    if mode != tf.estimator.ModeKeys.TRAIN:
        parameters.keep_prob_dropout = 1.0

    conv = deep_cnn(features['image'], (mode == tf.estimator.ModeKeys.TRAIN),
                    summaries=False)

    logprob, raw_pred = deep_bidirectional_lstm(conv,
                                                features['corpus'],
                                                params=parameters,
                                                summaries=False)

    # Compute seq_len from image width
    n_pools = CONST.DIMENSION_REDUCTION_W_POOLING  # 2x2 pooling in dimension W on layer 1 and 2
    seq_len_inputs = tf.divide(
        features['image_width'], n_pools, name='seq_len_input_op') - 1

    predictions_dict = {'prob': logprob, 'raw_predictions': raw_pred}

    if not mode == tf.estimator.ModeKeys.PREDICT:
        # Alphabet and codes
        keys = [c for c in parameters.alphabet.encode('latin1')]
        values = parameters.alphabet_codes

        # Convert string label to code label
        with tf.name_scope('str2code_conversion'):
            table_str2int = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(
                    keys, values, key_dtype=tf.int64, value_dtype=tf.int64),
                -1)
            splitted = tf.string_split(labels, delimiter='')
            values_int = tf.cast(
                tf.squeeze(tf.decode_raw(splitted.values, tf.uint8)), tf.int64)
            codes = table_str2int.lookup(values_int)
            codes = tf.cast(codes, tf.int32)
            sparse_code_target = tf.SparseTensor(splitted.indices, codes,
                                                 splitted.dense_shape)

        seq_lengths_labels = tf.bincount(
            tf.cast(sparse_code_target.indices[:, 0],
                    tf.int32),  #array of labels length
            minlength=tf.shape(predictions_dict['prob'])[1])

        # Loss
        # ----
        # >>> Cannot have longer labels than predictions -> error

        with tf.control_dependencies([
                tf.less_equal(sparse_code_target.dense_shape[1],
                              tf.reduce_max(tf.cast(seq_len_inputs, tf.int64)))
        ]):
            loss_ctc = tf.nn.ctc_loss(
                labels=sparse_code_target,
                inputs=predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                preprocess_collapse_repeated=False,
                ctc_merge_repeated=True,
                ignore_longer_outputs_than_inputs=
                True,  # returns zero gradient in case it happens -> ema loss = NaN
                time_major=True)
            loss_ctc = tf.reduce_mean(loss_ctc)
            loss_ctc = tf.Print(loss_ctc, [loss_ctc], message='* Loss : ')

        global_step = tf.train.get_or_create_global_step()
        # # Create an ExponentialMovingAverage object
        ema = tf.train.ExponentialMovingAverage(decay=0.99,
                                                num_updates=global_step,
                                                zero_debias=True)
        # Create the shadow variables, and add op to maintain moving averages
        maintain_averages_op = ema.apply([loss_ctc])
        loss_ema = ema.average(loss_ctc)

        # Train op
        # --------
        if parameters.learning_rate_decay:
            learning_rate = tf.train.exponential_decay(
                parameters.learning_rate,
                global_step,
                parameters.learning_rate_steps,
                parameters.learning_rate_decay,
                staircase=True)
        else:
            learning_rate = tf.constant(parameters.learning_rate)

        if parameters.optimizer == 'ada':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate)
        elif parameters.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate, beta1=0.5,
                epsilon=1e-07)  # at 1e-08 sometimes exploding gradient
        elif parameters.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(learning_rate)

        if not parameters.train_cnn:
            trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          'deep_bidirectional_lstm')
            print('Training LSTM only')
        else:
            trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        opt_op = optimizer.minimize(loss_ctc,
                                    global_step=global_step,
                                    var_list=trainable)

        with tf.control_dependencies(update_ops + [opt_op]):
            train_op = tf.group(maintain_averages_op)

        # Summaries
        # ---------
        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('losses/ctc_loss', loss_ctc)
    else:
        loss_ctc, train_op = None, None

    if mode in [
            tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT,
            tf.estimator.ModeKeys.TRAIN
    ]:
        with tf.name_scope('code2str_conversion'):
            keys = tf.cast(parameters.alphabet_decoding_codes, tf.int64)
            values = [c for c in parameters.alphabet_decoding]
            table_int2str = tf.contrib.lookup.HashTable(
                tf.contrib.lookup.KeyValueTensorInitializer(keys, values), '?')

            sparse_code_pred, log_probability = tf.nn.ctc_beam_search_decoder(
                predictions_dict['prob'],
                sequence_length=tf.cast(seq_len_inputs, tf.int32),
                merge_repeated=False,
                beam_width=100,
                top_paths=parameters.nb_logprob)
            # confidence value

            predictions_dict['score'] = log_probability

            sequence_lengths_pred = [
                tf.bincount(tf.cast(sparse_code_pred[i].indices[:, 0],
                                    tf.int32),
                            minlength=tf.shape(predictions_dict['prob'])[1])
                for i in range(parameters.top_paths)
            ]

            pred_chars = [
                table_int2str.lookup(sparse_code_pred[i])
                for i in range(parameters.top_paths)
            ]

            list_preds = [
                get_words_from_chars(pred_chars[i].values,
                                     sequence_lengths=sequence_lengths_pred[i])
                for i in range(parameters.top_paths)
            ]

            predictions_dict['words'] = tf.stack(list_preds)

            tf.summary.text('predicted_words',
                            predictions_dict['words'][0][:10])

    # Evaluation ops
    # --------------
    if mode == tf.estimator.ModeKeys.EVAL:
        with tf.name_scope('evaluation'):
            CER = tf.metrics.mean(tf.edit_distance(
                sparse_code_pred[0], tf.cast(sparse_code_target,
                                             dtype=tf.int64)),
                                  name='CER')

            # Convert label codes to decoding alphabet to compare predicted and groundtrouth words
            target_chars = table_int2str.lookup(
                tf.cast(sparse_code_target, tf.int64))
            target_words = get_words_from_chars(target_chars.values,
                                                seq_lengths_labels)
            accuracy = tf.metrics.accuracy(target_words,
                                           predictions_dict['words'][0],
                                           name='accuracy')

            eval_metric_ops = {
                'eval/accuracy': accuracy,
                'eval/CER': CER,
            }
            CER = tf.Print(CER, [CER], message='-- CER : ')
            accuracy = tf.Print(accuracy, [accuracy], message='-- Accuracy : ')

    else:
        eval_metric_ops = None

    export_outputs = {
        'predictions': tf.estimator.export.PredictOutput(predictions_dict)
    }

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions_dict,
        loss=loss_ctc,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs=export_outputs,
        scaffold=tf.train.Scaffold()
        # scaffold=tf.train.Scaffold(init_fn=None)  # Specify init_fn to restore from previous model
    )
Example #30
0
def rotate_gconv_kernels(kernel, periodicity=2 * np.pi, diskMask=True):
    """ Rotates the set of SE2 kernels.
        Rotation of SE2 kernels involves planar rotations and a shift in orientation,
        see e.g. the left-regular representation L_g of the roto-translation group on SE(2) images,
        (Eq. 3) of the MICCAI 2018 paper.

        INPUT:
            - kernel, a tensor flow tensor with expected shape:
                [Height, Width, nbOrientations, ChannelsIN, ChannelsOUT]

        INPUT (optional):
            - periodicity, rotate in total over 2*np.pi or np.pi
            - disk_mask, True or False, specifying whether or not to mask the kernels spatially

        OUTPUT:
            - set_of_rotated_kernels, a tensorflow tensor with dimensions:
                [nbOrientations, Height, Width, nbOrientations, ChannelsIN, ChannelsOUT]
              I.e., for each rotation angle a rotated (shift-twisted) version of the input kernel.
    """

    # Rotation of an SE2 kernel consists of two parts:
    # PART 1. Planar rotation
    # PART 2. A shift in theta direction

    # Unpack the shape of the input kernel
    kernelSizeH, kernelSizeW, orientations_nb, channelsIN, channelsOUT = map(
        int, kernel.shape)

    # PART 1 (planar rotation)
    # Flatten the baseline kernel
    # Resulting shape: [kernelSizeH*kernelSizeW,orientations_nb*channelsIN*channelsOUT]
    #
    kernel_flat = tf.reshape(
        kernel, [kernelSizeH * kernelSizeW, orientations_nb * channelsIN * channelsOUT])

    # Generate a set of rotated kernels via rotation matrix multiplication
    # For efficiency purpose, the rotation matrix is implemented as a sparse matrix object
    # Result: The non-zero indices and weights of the rotation matrix
    idx, vals = MultiRotationOperatorMatrixSparse(
        [kernelSizeH, kernelSizeW],
        orientations_nb,
        periodicity=periodicity,
        diskMask=diskMask)

    # The corresponding sparse rotation matrix
    # Resulting shape: [nbOrientations*kernelSizeH*kernelSizeW,kernelSizeH*kernelSizeW]
    #
    rotOp_matrix = tf.SparseTensor(
        idx, vals,
        [orientations_nb * kernelSizeH * kernelSizeW, kernelSizeH * kernelSizeW])

    # Matrix multiplication (each 2D plane is now rotated)
    # Resulting shape: [nbOrientations*kernelSizeH*kernelSizeW, orientations_nb*channelsIN*channelsOUT]
    #
    kernels_planar_rotated = tf.sparse.sparse_dense_matmul(
        tf.cast(rotOp_matrix, kernel_flat.dtype), kernel_flat)
    kernels_planar_rotated = tf.reshape(
        kernels_planar_rotated, [orientations_nb, kernelSizeH, kernelSizeW, orientations_nb, channelsIN, channelsOUT])

    # PART 2 (shift in theta direction)
    set_of_rotated_kernels = [None] * orientations_nb
    for orientation in range(orientations_nb):
        # [kernelSizeH,kernelSizeW,orientations_nb,channelsIN,channelsOUT]
        kernels_temp = kernels_planar_rotated[orientation]
        # [kernelSizeH,kernelSizeW,channelsIN,channelsOUT,orientations_nb]
        kernels_temp = tf.transpose(a=kernels_temp, perm=[0, 1, 3, 4, 2])
        # [kernelSizeH*kernelSizeW*channelsIN*channelsOUT*orientations_nb]
        kernels_temp = tf.reshape(
            kernels_temp, [kernelSizeH * kernelSizeW * channelsIN * channelsOUT, orientations_nb])
        # Roll along the orientation axis
        roll_matrix = tf.constant(
            np.roll(np.identity(orientations_nb), orientation, axis=1), dtype=tf.float32)
        kernels_temp = tf.matmul(kernels_temp, roll_matrix)
        kernels_temp = tf.reshape(
            kernels_temp, [kernelSizeH, kernelSizeW, channelsIN, channelsOUT, orientations_nb])  # [Nx,Ny,Nin,Nout,Ntheta]
        kernels_temp = tf.transpose(a=kernels_temp, perm=[0, 1, 4, 2, 3])
        set_of_rotated_kernels[orientation] = kernels_temp

    return tf.stack(set_of_rotated_kernels)