Ejemplo n.º 1
0
def create_feed_dict(batch, wLabels=True, dtype='train'):
    X, Y, DepEdges, SrlEdges = zip(*batch)

    x_paragraph, x_sentences, x_entity, x_len_paragraph, x_len_sentences, x_len_entity, x_mask_sentence, para_id_list, entity_list = get_placeholder_values(
        X)
    #     print (x_entity)
    feed_dict = {}
    feed_dict[input_x_paragraph] = x_paragraph
    feed_dict[input_x_sentence] = x_sentences
    feed_dict[input_x_entity] = x_entity
    feed_dict[input_x_len_paragraph] = x_len_paragraph
    feed_dict[input_x_len_sentence] = x_len_sentences
    feed_dict[input_x_len_entity] = x_len_entity
    feed_dict[input_x_mask_sentence] = x_mask_sentence

    if wLabels:
        y_known, y_start, y_end = get_placeholder_labels(Y)
        feed_dict[input_y_known] = y_known
        feed_dict[input_y_start] = y_start
        feed_dict[input_y_end] = y_end

    de_adj_in, de_adj_out = get_adj(DepEdges, batch_size, paragraph_size,
                                    num_deLabel)
    srl_adj_in, srl_adj_out = get_adj(SrlEdges, batch_size, paragraph_size,
                                      num_srlLabel)

    for i in range(batch_size):
        for lbl in range(num_deLabel):
            feed_dict[de_adj_mat_in[i][lbl]] = tf.SparseTensorValue(
                indices=np.array(
                    [de_adj_in[i][lbl].row, de_adj_in[i][lbl].col]).T,
                values=de_adj_in[i][lbl].data,
                dense_shape=de_adj_in[i][lbl].shape)
            feed_dict[de_adj_mat_out[i][lbl]] = tf.SparseTensorValue(
                indices=np.array(
                    [de_adj_out[i][lbl].row, de_adj_out[i][lbl].col]).T,
                values=de_adj_out[i][lbl].data,
                dense_shape=de_adj_out[i][lbl].shape)

    for i in range(batch_size):
        for lbl in range(num_srlLabel):
            feed_dict[srl_adj_mat_in[i][lbl]] = tf.SparseTensorValue(
                indices=np.array(
                    [srl_adj_in[i][lbl].row, srl_adj_in[i][lbl].col]).T,
                values=srl_adj_in[i][lbl].data,
                dense_shape=srl_adj_in[i][lbl].shape)
            feed_dict[srl_adj_mat_out[i][lbl]] = tf.SparseTensorValue(
                indices=np.array(
                    [srl_adj_out[i][lbl].row, srl_adj_out[i][lbl].col]).T,
                values=srl_adj_out[i][lbl].data,
                dense_shape=srl_adj_out[i][lbl].shape)

    if dtype != 'train':
        feed_dict[dropout] = 1.0
        feed_dict[rec_dropout] = 1.0

    return feed_dict, para_id_list, entity_list
Ejemplo n.º 2
0
def _convert_sparse_matrix_to_sparse_tensor(X, got_limit = False, limit = 5):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    if got_limit:
        coo.data[coo.data > limit] = limit
    return (
        tf.SparseTensorValue(indices, coo.col, coo.shape),
        tf.SparseTensorValue(indices, coo.data, coo.shape),
    )
Ejemplo n.º 3
0
    def testMakeOutputDictErrorSparse(self):
        schema = self.toSchema({'a': tf.VarLenFeature(tf.string)})

        # SparseTensor that cannot be represented as VarLenFeature.
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]),
                                 values=np.array([10.0, 20.0, 30.0]),
                                 dense_shape=(1, 20))
        }
        with self.assertRaisesRegexp(
                ValueError, 'cannot be decoded by ListColumnRepresentation'):
            impl_helper.to_instance_dicts(schema, fetches)

        # SparseTensor of invalid rank.
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 0, 1), (0, 0, 2),
                                                   (0, 0, 3)]),
                                 values=np.array([10.0, 20.0, 30.0]),
                                 dense_shape=(1, 10, 10))
        }
        with self.assertRaisesRegexp(
                ValueError, 'cannot be decoded by ListColumnRepresentation'):
            impl_helper.to_instance_dicts(schema, fetches)

        # SparseTensor with indices that are out of order.
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]),
                                 values=np.array([10.0, 20.0, 30.0]),
                                 dense_shape=(3, 20))
        }
        with self.assertRaisesRegexp(ValueError,
                                     'Encountered out-of-order sparse index'):
            impl_helper.to_instance_dicts(schema, fetches)

        # SparseTensors with different batch dimension sizes.
        schema = self.toSchema({
            'a': tf.VarLenFeature(tf.string),
            'b': tf.VarLenFeature(tf.string)
        })
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 0)]),
                                 values=np.array([10.0]),
                                 dense_shape=(1, 20)),
            'b':
            tf.SparseTensorValue(indices=np.array([(0, 0)]),
                                 values=np.array([10.0]),
                                 dense_shape=(2, 20))
        }
        with self.assertRaisesRegexp(
                ValueError,
                r'Inconsistent batch sizes: "\w" had batch dimension \d, "\w" had batch'
                r' dimension \d'):
            impl_helper.to_instance_dicts(schema, fetches)
Ejemplo n.º 4
0
    def create_feed_dict(self, batch, wLabels=True, dtype='train'):
        X, Y, et_idx, ETEdges, DepEdges = zip(*batch)

        x_pad, x_len, et_pad, et_mask, seq_len, max_et = self.pad_dynamic(
            X, et_idx)

        feed_dict = {}
        feed_dict[self.input_x] = np.array(x_pad)
        feed_dict[self.x_len] = np.array(x_len)
        if wLabels: feed_dict[self.input_y] = np.array(Y)

        feed_dict[self.et_idx] = np.array(et_pad)
        feed_dict[self.et_mask] = np.array(et_mask)

        feed_dict[self.seq_len] = seq_len
        feed_dict[self.max_et] = max_et

        et_adj_in, et_adj_out = self.get_adj(
            ETEdges, self.p.batch_size, max_et + 1,
            self.num_etLabel)  # max_et + 1(DCT)
        de_adj_in, de_adj_out = self.get_adj(DepEdges, self.p.batch_size,
                                             seq_len, self.num_deLabel)

        for i in range(self.p.batch_size):
            for lbl in range(self.num_etLabel):
                feed_dict[self.et_adj_mat_in[i][lbl]] = tf.SparseTensorValue(
                    indices=np.array(
                        [et_adj_in[i][lbl].row, et_adj_in[i][lbl].col]).T,
                    values=et_adj_in[i][lbl].data,
                    dense_shape=et_adj_in[i][lbl].shape)

                feed_dict[self.et_adj_mat_out[i][lbl]] = tf.SparseTensorValue(
                    indices=np.array(
                        [et_adj_out[i][lbl].row, et_adj_out[i][lbl].col]).T,
                    values=et_adj_out[i][lbl].data,
                    dense_shape=et_adj_out[i][lbl].shape)

            for lbl in range(self.num_deLabel):
                feed_dict[self.de_adj_mat_in[i][lbl]] = tf.SparseTensorValue(
                    indices=np.array(
                        [de_adj_in[i][lbl].row, de_adj_in[i][lbl].col]).T,
                    values=de_adj_in[i][lbl].data,
                    dense_shape=de_adj_in[i][lbl].shape)

                feed_dict[self.de_adj_mat_out[i][lbl]] = tf.SparseTensorValue(
                    indices=np.array(
                        [de_adj_out[i][lbl].row, de_adj_out[i][lbl].col]).T,
                    values=de_adj_out[i][lbl].data,
                    dense_shape=de_adj_out[i][lbl].shape)

        if dtype != 'train':
            feed_dict[self.dropout] = 1.0
            feed_dict[self.rec_dropout] = 1.0

        return feed_dict
Ejemplo n.º 5
0
    def create_feed_dict(self, batch, wLabels=True, dtype='train'):
        """
		Creates a feed dictionary for the batch

		Parameters
		----------
		batch:		contains a batch of bags
		wLabels:	Whether batch contains labels or not
		split:		Indicates the split of the data - train/valid/test

		Returns
		-------
		feed_dict	Feed dictionary to be fed during sess.run
		"""
        X, Y, et_idx, ETEdges, DepEdges = zip(*batch)

        x_pad, x_len, et_pad, et_mask, seq_len, max_et = self.pad_dynamic(
            X, et_idx)

        feed_dict = {}
        feed_dict[self.input_x] = np.array(x_pad)
        feed_dict[self.x_len] = np.array(x_len)
        if wLabels: feed_dict[self.input_y] = np.array(Y)

        feed_dict[self.et_idx] = np.array(et_pad)
        feed_dict[self.et_mask] = np.array(et_mask)

        feed_dict[self.seq_len] = seq_len
        feed_dict[self.max_et] = max_et

        et_adj = self.get_adj(ETEdges, self.p.batch_size, max_et + 1,
                              self.num_etLabel)  # max_et + 1(DCT)
        de_adj = self.get_adj(DepEdges, self.p.batch_size, seq_len,
                              self.num_deLabel)

        for i in range(self.p.batch_size):
            for lbl in range(self.num_etLabel):
                feed_dict[self.et_adj_mat[i][lbl]] = tf.SparseTensorValue(
                    indices=np.array([et_adj[i][lbl].row,
                                      et_adj[i][lbl].col]).T,
                    values=et_adj[i][lbl].data,
                    dense_shape=et_adj[i][lbl].shape)

            for lbl in range(self.num_deLabel):
                feed_dict[self.de_adj_mat[i][lbl]] = tf.SparseTensorValue(
                    indices=np.array([de_adj[i][lbl].row,
                                      de_adj[i][lbl].col]).T,
                    values=de_adj[i][lbl].data,
                    dense_shape=de_adj[i][lbl].shape)
        if dtype != 'train':
            feed_dict[self.dropout] = 1.0
            feed_dict[self.rec_dropout] = 1.0

        return feed_dict
    def test_evaluate(self):
        table = np.array([[1, 2, 1, 0, 0, 0], [0, 1, 2, 1, 0, 0],
                          [0, 0, 1, 2, 1, 0], [0, 0, 0, 1, 2, 1]])
        md = np.array([[1, 2, 3, 4]]).T
        md_holdout = np.array([[1.5, 2.5]]).T
        table_holdout = np.array([[1, 2, 1, 0, 0, 0], [0, 1, 2, 1, 0, 0]],
                                 dtype=np.float32)
        N, D = table.shape
        M, D = table_holdout.shape
        p = md.shape[1]
        table = coo_matrix(table)
        table_holdout = coo_matrix(table_holdout)

        opts = Options(batch_size=5,
                       num_neg_samples=3,
                       learning_rate=1e-1,
                       clipping_size=10,
                       beta_mean=0,
                       beta_scale=1,
                       gamma_mean=0,
                       gamma_scale=1)
        for _ in range(10):
            with tf.Graph().as_default(), tf.Session() as sess:
                y_data = tf.SparseTensorValue(indices=np.array(
                    [table.row, table.col]).T,
                                              values=table.data,
                                              dense_shape=(N, D))
                y_holdout = tf.SparseTensorValue(
                    indices=np.array([table_holdout.row, table_holdout.col]).T,
                    values=table_holdout.data,
                    dense_shape=table_holdout.shape)

                G_data = tf.constant(md, dtype=tf.float32)
                G_holdout = tf.constant(md_holdout, dtype=tf.float32)

                model = PoissonRegression(opts, sess)
                model.N = N
                model.M = N
                model.D = D
                model.p = p
                model.num_nonzero = table.nnz

                batch = model.sample(y_data)
                log_loss = model.loss(G_data, y_data, batch)
                train = model.optimize(log_loss)
                mad = model.evaluate(G_holdout, y_holdout)
                tf.global_variables_initializer().run()
                train_, mad_, loss_, beta, gamma = sess.run(
                    [train, mad, log_loss, model.qbeta, model.qgamma])

                self.assertIsNotNone(beta)
                self.assertIsNotNone(gamma)
                # Look at mean absolute error
                self.assertFalse(np.isnan(mad_))
Ejemplo n.º 7
0
    def __init__(self,
                 sess,
                 n,
                 filename,
                 jump_prob=0.05,
                 drop_tol=1e-8,
                 verbose=False):
        """
        Computes PPR using LU decomposition.

        Args:
            sess (Session): tensorflow session.
            n (int): Number of nodes.
            filename (str): A csv file denoting the graph.
            jump_prob (float): Jumping probability of PPR.
            drop_tol (float): Drops entries with absolute value lower than this value when computing inverse of LU.
            verbose (bool): Prints step messages if True.
        """
        self.alias = 'ludc'
        self.verbose = verbose
        self.pp("initializing")
        self.sess = sess
        self.n = n
        self.c = jump_prob
        d = 1 - self.c
        t = drop_tol
        exact = False
        if t is None:
            t = np.power(n, -0.5)
        elif t == 0:
            exact = True
        self.pp("reading")
        self.node2index, H = read_matrix(filename, d=-d, add_identity=True)
        self.pp("sorting H")
        self.perm = degree_reverse_rank_perm(H)
        H = reorder_matrix(H, self.perm).tocsc()
        self.pp("computing LU decomposition")
        if exact:
            self.LU = splu(H)
        else:
            self.LU = spilu(H, drop_tol=t)
        Linv = inv(self.LU.L).tocoo()
        Uinv = inv(self.LU.U).tocoo()
        self.pp("tf init")
        with tf.variable_scope('ppr_lu_decomposition_tf'):
            t_Linv = tf.SparseTensorValue(list(zip(Linv.row, Linv.col)),
                                          Linv.data,
                                          dense_shape=self.LU.L.shape)
            t_Uinv = tf.SparseTensorValue(list(zip(Uinv.row, Uinv.col)),
                                          Uinv.data,
                                          dense_shape=self.LU.U.shape)
            self.t_q = tf.placeholder(tf.float64, shape=[self.n, 1])
            self.t_r = _sdmm(t_Uinv, _sdmm(t_Linv, self.c * self.t_q))
Ejemplo n.º 8
0
    def Train(self, inputs, ArcNode, target, step, nodegraph=0.0, mask=None):
        ''' train methods: has to receive the inputs, arch-node matrix conversion, target,
        and optionally nodegraph indicator '''

        # Creating a SparseTEnsor with the feeded ArcNode Matrix
        arcnode_ = tf.SparseTensorValue(indices=ArcNode.indices,
                                        values=ArcNode.values,
                                        dense_shape=ArcNode.dense_shape)
        if self.graph_based:
            # lizx changed:
            indices1, indices2 = nodegraph.nonzero()
            nodegraph = tf.SparseTensorValue(
                indices=np.stack([indices1, indices2], axis=1),
                values=nodegraph[indices1, indices2],
                dense_shape=nodegraph.shape)

        if self.mask_flag:
            fd = {
                self.NodeGraph: nodegraph,
                self.comp_inp: inputs,
                self.state: np.zeros((ArcNode.dense_shape[0], self.state_dim)),
                self.state_old: np.ones(
                    (ArcNode.dense_shape[0], self.state_dim)),
                self.ArcNode: arcnode_,
                self.y: target,
                self.mask: mask
            }
        else:

            fd = {
                self.NodeGraph: nodegraph,
                self.comp_inp: inputs,
                self.state: np.zeros((ArcNode.dense_shape[0], self.state_dim)),
                self.state_old: np.ones(
                    (ArcNode.dense_shape[0], self.state_dim)),
                self.ArcNode: arcnode_,
                self.y: target
            }
        if self.tensorboard:
            _, loss, loop, merge_all, merge_tr = self.session.run([
                self.train_op, self.loss, self.loss_op, self.merged_all,
                self.merged_train
            ],
                                                                  feed_dict=fd)
            if step % 100 == 0:
                self.writer.add_summary(merge_all, step)
                self.writer.add_summary(merge_tr, step)
        else:
            _, loss, loop = self.session.run(
                [self.train_op, self.loss, self.loss_op], feed_dict=fd)

        return loss, loop[1]
Ejemplo n.º 9
0
 def _SparseTensorValue_3x50(self, indices_dtype, values_dtype):
     # NOTE: This input is intentionally not sorted to validate the
     # already_sorted flag below.
     ind = np.array([[0, 0], [1, 0], [1, 2], [2, 0], [2, 1], [1, 1]])
     # NB: these are not sorted
     indices = np.array([0, 13, 10, 33, 32, 14])
     values = np.array([-3, 4, 1, 9, 5, 1])
     shape = np.array([3, 3])
     indices = tf.SparseTensorValue(np.array(ind, np.int64),
                                    np.array(indices, indices_dtype),
                                    np.array(shape, np.int64))
     values = tf.SparseTensorValue(np.array(ind, np.int64),
                                   np.array(values, values_dtype),
                                   np.array(shape, np.int64))
     return indices, values
Ejemplo n.º 10
0
    def evaluate_simfunc(self, W_sparse_vals):
        if self.eval_get_data is None:
            self.eval_ids_i = tf.placeholder(tf.int32, shape=[None])
            self.eval_ids_j = tf.placeholder(tf.int32, shape=[None])
            self.eval_X_descr = tf.placeholder(tf.float32)

            self.eval_get_data = self._sim_func(
                X1=tf.gather(self.eval_X_descr, self.eval_ids_i),
                X2=tf.gather(self.eval_X_descr, self.eval_ids_j))

        ids_i = np.reshape(np.asarray(W_sparse_vals.indices[:, 0]), (-1, ))
        ids_j = np.reshape(np.asarray(W_sparse_vals.indices[:, 1]), (-1, ))

        data = np.zeros(ids_i.shape)
        i = 0

        while i < ids_i.shape[0]:
            nxt_i = min(i + self.X.shape[0], ids_i.shape[0])
            data[i:nxt_i] = self.sess.run(self.eval_get_data,
                                          feed_dict={
                                              self.W.indices:
                                              W_sparse_vals.indices,
                                              self.W.values:
                                              W_sparse_vals.values,
                                              self.eval_ids_i: ids_i[i:nxt_i],
                                              self.eval_ids_j: ids_j[i:nxt_i],
                                              self.eval_X_descr: self.X_descr
                                          })
            i = nxt_i

        updated_W = tf.SparseTensorValue(W_sparse_vals.indices, data,
                                         W_sparse_vals.dense_shape)
        return updated_W
Ejemplo n.º 11
0
    def testCopyTensorsProducesEquivalentTensors(self):
        tensors = {
            'dense': tf.placeholder(tf.int64, (None, ), name='my_dense_input'),
            'sparse': tf.sparse_placeholder(tf.int64, name='my_sparse_input')
        }
        copied_tensors = impl_helper.copy_tensors(tensors)

        with tf.Session() as session:
            dense_value = [1, 2]
            sparse_value = tf.SparseTensorValue(indices=[[0, 0], [0, 2],
                                                         [1, 1]],
                                                values=[3, 4, 5],
                                                dense_shape=[2, 3])
            sample_tensors = session.run(copied_tensors,
                                         feed_dict={
                                             tensors['dense']: dense_value,
                                             tensors['sparse']: sparse_value
                                         })
            self.assertAllEqual(sample_tensors['dense'], dense_value)
            self.assertAllEqual(sample_tensors['sparse'].indices,
                                sparse_value.indices)
            self.assertAllEqual(sample_tensors['sparse'].values,
                                sparse_value.values)
            self.assertAllEqual(sample_tensors['sparse'].dense_shape,
                                sparse_value.dense_shape)
Ejemplo n.º 12
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = cast(Iterable[List[str]],
                         dataset.maybe_get_series(self.data_id))

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train, max_len=self.max_length)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            # Need to convert the data to a sparse representation
            bool_mask = (paddings > 0.5)
            indices = np.stack(np.where(bool_mask), axis=1)
            values = vectors[bool_mask]

            fd[self.train_targets] = tf.SparseTensorValue(
                indices=indices, values=values, dense_shape=vectors.shape)

        return fd
    def testMaterializeFeaturesWithExcludes(self):
        example1 = self._makeExample(age=3.0,
                                     language='english',
                                     label=1.0,
                                     slice_key='first_slice')

        features = {
            'f': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            's': {
                encoding.NODE_SUFFIX:
                tf.SparseTensorValue(indices=[[0, 5], [1, 2], [3, 6]],
                                     values=[100., 200., 300.],
                                     dense_shape=[4, 10])
            }
        }
        predictions = {'p': {encoding.NODE_SUFFIX: np.array([2])}}
        labels = {'l': {encoding.NODE_SUFFIX: np.array([3])}}

        extracts = {
            constants.INPUT_KEY:
            example1.SerializeToString(),
            constants.FEATURES_PREDICTIONS_LABELS_KEY:
            types.FeaturesPredictionsLabels(input_ref=0,
                                            features=features,
                                            predictions=predictions,
                                            labels=labels)
        }
        result = feature_extractor._MaterializeFeatures(extracts,
                                                        excludes=['s'])
        self.assertFalse('features__s' in result)
Ejemplo n.º 14
0
def as_tf_sparse(a):
    """Convert a to tf.SparseTensorValue

    Args:

      a: input array

    Returns:

    SparseTensorValue: converted object.

    Examples:

    >>> # numpy input
    >>> a = np.random.random((3,4))
    >>> a2 = as_tf_sparse(a)
    >>> import tensorflow as tf
    >>> isinstance(a2, tf.SparseTensorValue)
    True
    >>> np.testing.assert_array_equal(a, as_numpy_array(a2))

    >>> a2 is as_tf_sparse(a2)
    True
    """
    _config.assert_has_package('tensorflow')
    import tensorflow as tf
    if isinstance(a, tf.SparseTensorValue):
        return a

    a = as_scipy_coo(a)
    indices = np.asarray(np.mat([a.row, a.col]).transpose())
    return tf.SparseTensorValue(indices, a.data, a.shape)
Ejemplo n.º 15
0
def sp2tf(sp_t, shape=None):
    t = sparse_to_tuple(sp_t)

    if shape is not None:
        t[2] = shape
    tensor = tf.SparseTensorValue(t[0], t[1].astype(np.float32), t[2])
    return tensor
Ejemplo n.º 16
0
def load_training_labels(random_images_path):
    """
    load labels
    :param random_images_path:
    :return:
    """
    labels = []
    max_size = 0
    for p in random_images_path:
        code_label = []
        _, image_name = os.path.split(p)
        l = str(image_name).split('-')[0].replace('#', '')
        label_length = len(l)
        if max_size < label_length:
            max_size = label_length
        for c in l:
            code = char_mapping[c]
            code_label.append(code)
        labels.append(code_label)
    indices = []
    values = []
    size = [len(labels), max_size]
    for x in range(len(labels)):
        for y in range(len(labels[x])):
            indices.append([x, y])
            values.append(labels[x][y])
    ten = tf.SparseTensorValue(indices, values, size)
    return ten
Ejemplo n.º 17
0
def feed_dict_predict(sentence, doc_positive_spt, on_training=True):
    """
    input: data_sets is a dict and the value type is numpy
    describe: to match the text classification the data_sets's content is the doc in df
    """
    #该地方插入函数,把query_iin,doc_positive_in,doc_negative_in转化成one_hot,再转化成coo_matrix
    query_in = query_input_list[0]
    doc_positive_in = doc_positive_input_list[0]
    doc_negative_in = doc_negative_input_list[0]

    query = train_data_set.get_one_hot_from_sentence(sentence)

    query = coo_matrix(query)

    query = tf.SparseTensorValue(
        np.transpose([
            np.array(query.row, dtype=np.int64),
            np.array(query.col, dtype=np.int64)
        ]), np.array(query.data, dtype=np.float),
        np.array(query.shape, dtype=np.int64))

    return {
        query_in: query,
        doc_positive_in: doc_positive_spt,
        on_train: on_training
    }
Ejemplo n.º 18
0
 def shape_test_stack(self, feat):
     np.random.seed(abs(myhash(feat.key)) % (2**31 - 1))
     phs = [
         feat.get_placeholder_and_feature(batch=False)[1]
         for i in range(self.NTESTS)
     ]
     vals_dense = [
         np.random.binomial(1, 0.1, size=feat.shape)
         for i in range(self.NTESTS)
     ]
     vals_sparse = []
     for A in vals_dense:
         idx, vals = basefeat.np_dense_to_sparse(A)
         vals_sparse.append(
             tf.SparseTensorValue(np.stack(idx, -1), vals, feat.shape))
     X = feat.stack(phs)
     X_ = self.sess.run(X, feed_dict=dict(zip(phs, vals_sparse)))
     total_len = sum([len(v.values) for v in vals_sparse])
     run_len = len(X_.values)
     msg = 'Differing number of values: {} vs {}'.format(run_len, total_len)
     self.assertEqual(run_len, total_len, msg)
     for i, v in enumerate(vals_sparse):
         for vval, vinds in zip(v.values, v.indices):
             value_found = False
             for xval, xinds in zip(X_.values, X_.indices):
                 if xinds[0] == i and np.allclose(xinds[1:], vinds):
                     msg = 'Values {} and {} not equal with inds {}'.format(
                         vval, xval, xinds)
                     self.assertEqual(xval, vval)
                     value_found = True
             self.assertTrue(value_found,
                             msg='Values {}, indices {} not found'.format(
                                 vval, vinds))
Ejemplo n.º 19
0
def sparse_to_tensor(value):
    """Convert a scipy sparse matrix to a tensorflow SparseTensorValue."""

    row = np.reshape(value.row, (-1, 1))
    col = np.reshape(value.col, (-1, 1))
    indices = np.concatenate((row, col), axis=1)
    return tf.SparseTensorValue(indices, value.data, value.shape)
Ejemplo n.º 20
0
    def _merge_models(self, m1, m2):
        sparse_merges = ['context', 'features']
        dense_merges = ['seq_len', 'label', 'forloeb']
        ignores = ['test', 'doc_ids', 'text_len']

        new_model = {}

        for key in sparse_merges:
            new_shape = np.concatenate(
                [[m1[key].dense_shape[0] + m2[key].dense_shape[0]],
                 m1[key].dense_shape[1:]],
                axis=0)
            new_indices = m1[key].indices
            to_append = m2[key].indices
            to_append[:, 0] += m1[key].dense_shape[0]
            new_indices = np.concatenate([new_indices, to_append], axis=0)
            new_values = np.concatenate([m1[key].values, m2[key].values],
                                        axis=0)

            new_model[key] = tf.SparseTensorValue(new_indices, new_values,
                                                  new_shape)
        for key in dense_merges:
            new_model[key] = np.concatenate([m1[key], m2[key]], axis=0)
        for key in ignores:
            new_model[key] = None

        return new_model
def batch_to_feed_dict(batch, is_train, add_noise=False):
    '''
    Create the dictionnary that is fed into the Session.run(..) calls.
    :param batch:
    :return:
    '''
    features_np, phns_np, seq_len_np, wav_np = batch
    n_time = max([len(i) for i in wav_np])
    wav_np = np.stack([pad_vector(w, n_time) for w in wav_np], axis=0)

    n_batch, n_time, n_features = features_np.shape
    relevance_mask_np = [(np.arange(n_time) < seq_len_np[i]) / seq_len_np[i]
                         for i in range(n_batch)]
    relevance_mask_np = np.array(relevance_mask_np)

    if FLAGS.n_repeat > 1:
        # Extend sequences with the repeat in time
        features_np = np.repeat(features_np, FLAGS.n_repeat, axis=1)

    n_batch, n_time, n_features = features_np.shape
    phns_labels = tf.SparseTensorValue(phns_np['indices'], phns_np['values'],
                                       [n_batch, n_time])

    return {
        features: features_np,
        phns: phns_labels,
        seq_len: seq_len_np,
        weighted_relevant_mask: relevance_mask_np,
        keep_prob: FLAGS.drop_out_probability if is_train else 1.,
        batch_size: n_batch,
        noise_gate: 1. if is_train else 0.,
        audio: wav_np,
        gd_noise: FLAGS.gd_noise if add_noise else 0.
    }
Ejemplo n.º 22
0
    def _run_graph(self, sess, qq, hh, tt, mdb, to_fetch):
        feed = {}
        if not self.query_is_language:
            feed[self.queries] = [[q] * (self.num_step - 1) + [self.num_query]
                                  for q in qq]
        else:
            feed[self.queries] = [[q] * (self.num_step - 1) +
                                  [[self.num_vocab] * self.num_word]
                                  for q in qq]

        feed[self.heads] = hh
        feed[self.tails] = tt

        # In[232]: data.matrix_db[0][0][:7]
        # Out[232]:
        # [[0, 0],
        #  [2675, 2698],
        #  [2268, 2274],
        #  [2713, 1240],
        #  [1978, 2013],
        #  [2417, 1881],
        #  [2068, 701]]
        #
        # In[233]: data.matrix_db[0][1][:7]
        # Out[233]: [0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
        #
        # In[234]: data.matrix_db[0][2]
        # Out[234]: [3007, 3007]
        for r in xrange(self.num_operator / 2):
            feed[self.database[r]] = tf.SparseTensorValue(
                *mdb[r]
            )  ## AJAY NOTE: constructs a sparse tensor from indices, values and dense shape for every relation ( by 2 is for symmetric relations)
        fetches = to_fetch
        graph_output = sess.run(fetches, feed)
        return graph_output
    def _evaluate(self, sess, source='test'):
        kg = self.kg
        hp = self.hparams

        triples_eval = []
        idx_tail_pred = []

        for batch in kg.get_eval_batch(hp.batch_size, source=source):
            head, query, tail = kg.get_triples_from_batch(batch)
            kg_inputs = kg.get_kg(batch)

            feed = {
                self.head_input: head,
                self.tail_input: tail,
                self.query_sequence: [[q] * (hp.n_steps - 1) + kg.END_RELATION for q in query]
            }
            for r in range(kg.n_relations):
                feed[self.kg_inputs[r]] = tf.SparseTensorValue(*kg_inputs[r])

            loss, predication = sess.run([self.total_loss, self.prediction], feed_dict=feed)

            idx_pred = np.argsort(-predication)
            idx_tail_pred.append(idx_pred)
            triples_eval.append(np.stack([head, query, tail], axis=-1))

        idx_tail_pred = np.concatenate(idx_tail_pred, 0).tolist()
        triples_eval = np.concatenate(triples_eval, 0).tolist()
        _, eval_tail, _ = kg.calc_metrics(triples_eval, idx_tail_pred)

        return eval_tail
Ejemplo n.º 24
0
def convert_sparse_matrix_to_sparse_tensor(X):
    '''
    code borrowed from https://stackoverflow.com/questions/40896157/scipy-sparse-csr-matrix-to-tensorflow-sparsetensor-mini-batch-gradient-descent
    '''
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensorValue(indices, coo.data, coo.shape)
Ejemplo n.º 25
0
def convert2SparseTensorValue(list_labels):
    #
    # list_labels: batch_major
    #

    #
    #print(list_labels)
    #
    num_samples = len(list_labels)
    num_maxlen = max(map(lambda x: len(x), list_labels))
    #
    indices = []
    values = []
    shape = [num_samples, num_maxlen]
    #
    for idx in range(num_samples):
        #
        item = list_labels[idx]
        #
        values.extend(item)
        indices.extend([[idx, posi] for posi in range(len(item))])
        #
    #
    return tf.SparseTensorValue(indices=indices,
                                values=values,
                                dense_shape=shape)
    def Apply(self):
        audio = self.feed_dict[self.model.get_data_layer().input_tensors["source_tensors"][0]]
        audio_length = self.feed_dict[self.model.get_data_layer().input_tensors["source_tensors"][1]]
        x_id = self.feed_dict[self.model.get_data_layer().input_tensors["source_ids"][0]]

        internal_request = predict_pb2.PredictRequest()
        internal_request.model_spec.name = 'deepspeech2'
        internal_request.model_spec.signature_name = 'predict_output'
        
        internal_request.inputs['audio'].CopyFrom(
            tf.contrib.util.make_tensor_proto(audio, shape=list(audio.shape)))
        internal_request.inputs['audio_length'].CopyFrom(
            tf.contrib.util.make_tensor_proto(audio_length, shape=list(audio_length.shape)))
        internal_request.inputs['x_id'].CopyFrom(
            tf.contrib.util.make_tensor_proto(x_id, shape=list(x_id.shape)))

        internal_result = self.istub.Predict(internal_request, 10.0)  # 5 seconds

        self.inputs = Deepspeech2.model.get_data_layer().input_tensors

        indices_decoded_sequence = tensor_util.MakeNdarray(
            internal_result.outputs['indices_decoded_sequence'])
        values_decoded_sequence = tensor_util.MakeNdarray(
            internal_result.outputs['values_decoded_sequence'])
        dense_shape_decoded_sequence = tensor_util.MakeNdarray(
            internal_result.outputs['dense_shape_decoded_sequence'])

        outputs = tf.SparseTensorValue(indices=indices_decoded_sequence,
                                       values=values_decoded_sequence,
                                       dense_shape=dense_shape_decoded_sequence)

        self.outputs = [outputs]

        results = Deepspeech2.model.infer(self.inputs, self.outputs)
        self.final_result = results[0][0]
Ejemplo n.º 27
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = {}  # type: FeedDict

        sentences = cast(Iterable[List[str]],
                         dataset.get_series(self.data_id, allow_none=True))

        fd[self.train_mode] = train

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            # Need to convert the data to a sparse representation
            bool_mask = (paddings > 0.5)
            indices = np.stack(np.where(bool_mask), axis=1)
            values = vectors[bool_mask]

            fd[self.train_targets] = tf.SparseTensorValue(
                indices=indices, values=values, dense_shape=vectors.shape)

        return fd
Ejemplo n.º 28
0
    def testFeedDenseReshapeSemantics(self):
        with self.test_session(use_gpu=False) as sess:
            # Compute a random rank-5 initial shape and new shape, randomly sparsify
            # it, and check that the output of SparseReshape has the same semantics
            # as a dense reshape.
            factors = np.array([2] * 4 + [3] * 4 +
                               [5] * 4)  # 810k total elements
            orig_rank = np.random.randint(2, 7)
            orig_map = np.random.randint(orig_rank, size=factors.shape)
            orig_shape = [
                np.prod(factors[orig_map == d]) for d in range(orig_rank)
            ]
            new_rank = np.random.randint(2, 7)
            new_map = np.random.randint(new_rank, size=factors.shape)
            new_shape = [
                np.prod(factors[new_map == d]) for d in range(new_rank)
            ]

            orig_dense = np.random.uniform(size=orig_shape)
            orig_indices = np.transpose(np.nonzero(orig_dense < 0.5))
            orig_values = orig_dense[orig_dense < 0.5]

            new_dense = np.reshape(orig_dense, new_shape)
            new_indices = np.transpose(np.nonzero(new_dense < 0.5))
            new_values = new_dense[new_dense < 0.5]

            sp_input = self._SparseTensorPlaceholder()
            input_val = tf.SparseTensorValue(orig_indices, orig_values,
                                             orig_shape)
            sp_output = tf.sparse_reshape(sp_input, new_shape)

            output_val = sess.run(sp_output, {sp_input: input_val})
            self.assertAllEqual(output_val.indices, new_indices)
            self.assertAllEqual(output_val.values, new_values)
            self.assertAllEqual(output_val.shape, new_shape)
Ejemplo n.º 29
0
        def feed_step(batch, label, dtype="train"):
            indices = np.mat([batch.tocoo().row,
                              batch.tocoo().col]).transpose()
            values = batch.tocoo().data
            shape = batch.tocoo().shape
            feed_dict = {
                model.input_x: tf.SparseTensorValue(indices, values, shape),
                model.input_y: np.reshape(label, [-1, 1])
            }
            if dtype == "train":
                train_operation = [
                    model.train_step, model.global_step, model.loss,
                    model.auc_score, model.summary_op
                ]
                _, step, loss_val, auc_val, merged = sess.run(
                    train_operation, feed_dict=feed_dict)
                writer.add_summary(merged, step)
                return step, loss_val, auc_val

            elif dtype == "evaluate":
                loss_val, auc_val = sess.run([model.loss, model.auc_score],
                                             feed_dict=feed_dict)
                return loss_val, auc_val

            elif dtype == "test":
                logit_val = sess.run([model.logit], feed_dict=feed_dict)[0]
                print("accuracy:",
                      accuracy_score(label, [round(x) for x in logit_val]))
                print("precision:",
                      precision_score(label, [round(x) for x in logit_val]))
                print("reall:",
                      recall_score(label, [round(x) for x in logit_val]))
                print("f1:", f1_score(label, [round(x) for x in logit_val]))
                print("auc:", roc_auc_score(label, logit_val))
Ejemplo n.º 30
0
    def _SparseTensorValue_5x6(self):
        ind = np.array([[0, 0], [1, 0], [1, 3], [1, 4], [3, 2],
                        [3, 3]]).astype(np.int64)
        val = np.array([0, 10, 13, 14, 32, 33]).astype(np.float64)

        shape = np.array([5, 6]).astype(np.int64)
        return tf.SparseTensorValue(ind, val, shape)