Example #1
0
    def __init__(self,
                 vocab_sizes: List[int],
                 embedding_vec_size: int,
                 num_gpus=1,
                 **kwargs):
        super(SOKEmbedding, self).__init__(**kwargs)
        self._vocab_sizes = vocab_sizes
        self._embedding_vec_size = embedding_vec_size

        prefix_sum = []
        offset = 0
        for i in range(len(vocab_sizes)):
            prefix_sum.append(offset)
            offset += self._vocab_sizes[i]
        prefix_sum = np.array(prefix_sum, dtype=np.int64).reshape(1, -1)
        self._vocab_prefix_sum = tf.constant(prefix_sum)
        print('[Info] Total vocabulary size:', offset)

        self._sok_embedding = sok.All2AllDenseEmbedding(
            max_vocabulary_size_per_gpu=int(offset / num_gpus + 1),
            embedding_vec_size=self._embedding_vec_size,
            slot_num=len(self._vocab_sizes),
            nnz_per_slot=1,
            dynamic_input=True,
            use_hashtable=False,
        )
    def __init__(self, max_vocabulary_size_per_gpu, embedding_vec_size,
                 slot_num, nnz_per_slot, num_dense_layers, num_dense_units,
                 **kwargs):
        super(SOKDenseDemo, self).__init__(**kwargs)

        self.max_vocabulary_size_per_gpu = max_vocabulary_size_per_gpu
        self.slot_num = slot_num
        self.nnz_per_slot = nnz_per_slot
        self.num_dense_layers = num_dense_layers
        self.embedding_vec_size = embedding_vec_size

        self.embedding_layer = sok.All2AllDenseEmbedding(
            max_vocabulary_size_per_gpu=self.max_vocabulary_size_per_gpu,
            embedding_vec_size=self.embedding_vec_size,
            slot_num=self.slot_num,
            nnz_per_slot=self.nnz_per_slot)

        self.dense_layers = []
        for _ in range(self.num_dense_layers):
            self.layer = tf.keras.layers.Dense(units=num_dense_units,
                                               activation='relu')
            self.dense_layers.append(self.layer)

        self.out_layer = tf.keras.layers.Dense(units=1,
                                               activation=None,
                                               kernel_initializer='ones',
                                               bias_initializer='zeros')
Example #3
0
    def __init__(self,
                 max_vocabulary_size_per_gpu,
                 embedding_vec_size,
                 slot_num,
                 nnz_per_slot,
                 num_dense_layers,
                 use_sok=False,
                 **kwargs):
        super(DemoModel, self).__init__(**kwargs)

        self.max_vocabulary_size_per_gpu = max_vocabulary_size_per_gpu
        self.embedding_vec_size = embedding_vec_size
        self.slot_num = slot_num
        self.nnz_per_slot = nnz_per_slot
        self.num_dense_layers = num_dense_layers
        self.use_sok = use_sok

        if self.use_sok:
            self.embedding_layer = sok.All2AllDenseEmbedding(
                max_vocabulary_size_per_gpu=self.max_vocabulary_size_per_gpu,
                embedding_vec_size=self.embedding_vec_size,
                slot_num=self.slot_num,
                nnz_per_slot=self.nnz_per_slot)
        else:
            self.embedding_layer = HashtableEmbedding(
                max_vocabulary_size=self.max_vocabulary_size_per_gpu,
                embedding_vec_size=self.embedding_vec_size)

        self.dense_layers = list()
        for _ in range(self.num_dense_layers):
            self._layer = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_layers.append(self._layer)

        self.out_layer = tf.keras.layers.Dense(units=1, activation=None)
    def __init__(self,
                 max_vocabulary_size_per_gpu,
                 embedding_vec_size,
                 slot_num,
                 nnz_per_slot,
                 use_hashtable=True,
                 key_dtype=None,
                 embedding_initializer=None,
                 **kwargs):
        super(SOKDenseDemo, self).__init__(**kwargs)

        self.max_vocabulary_size_per_gpu = max_vocabulary_size_per_gpu
        self.slot_num = slot_num
        self.nnz_per_slot = nnz_per_slot
        self.embedding_vec_size = embedding_vec_size

        self.embedding_layer = sok.All2AllDenseEmbedding(
            max_vocabulary_size_per_gpu=self.max_vocabulary_size_per_gpu,
            embedding_vec_size=self.embedding_vec_size,
            slot_num=self.slot_num,
            nnz_per_slot=self.nnz_per_slot,
            use_hashtable=use_hashtable,
            key_dtype=key_dtype,
            embedding_initializer=embedding_initializer)

        self.dense_layer = tf.keras.layers.Dense(units=1,
                                                 activation=None,
                                                 kernel_initializer="ones",
                                                 bias_initializer="zeros")
Example #5
0
    def __init__(self,
                 max_vocabulary_size_per_gpu,
                 embedding_vec_size,
                 slot_num,
                 nnz_per_slot,
                 use_hashtable=True,
                 dynamic_input=False,
                 num_of_dense_layers=5,
                 key_dtype=None,
                 embedding_initializer=None,
                 **unused):
        super(SOKDemo, self).__init__()

        self._max_vocabulary_size_per_gpu = max_vocabulary_size_per_gpu
        self._embedding_vec_size = embedding_vec_size
        self._slot_num = slot_num
        self._nnz_per_slot = nnz_per_slot
        self._use_hashtable = use_hashtable
        self._dynamic_input = dynamic_input
        self._num_of_dense_layers = num_of_dense_layers

        if (isinstance(self._embedding_vec_size, list)
                or isinstance(self._embedding_vec_size, tuple)):
            if len(self._embedding_vec_size) != len(self._slot_num):
                raise ValueError(
                    "The length of embedding_vec_size must be equal to that of "
                    "slot_num")

        self._embedding_num = len(self._embedding_vec_size)
        self._slot_num_prefix_sum = [0 for _ in range(self._embedding_num + 1)]
        for i in range(1, self._embedding_num + 1):
            self._slot_num_prefix_sum[i] = self._slot_num_prefix_sum[
                i - 1] + self._slot_num[i - 1]

        self.embedding_layers = list()
        for i in range(self._embedding_num):
            embedding_layer = sok.All2AllDenseEmbedding(
                max_vocabulary_size_per_gpu=self._max_vocabulary_size_per_gpu,
                embedding_vec_size=self._embedding_vec_size[i],
                slot_num=self._slot_num[i],
                nnz_per_slot=self._nnz_per_slot,
                use_hashtable=self._use_hashtable,
                dynamic_input=self._dynamic_input,
                key_dtype=key_dtype,
                embedding_initializer=embedding_initializer)
            self.embedding_layers.append(embedding_layer)

        self.dense_layers = list()
        for _ in range(self._num_of_dense_layers):
            self.layer = tf.keras.layers.Dense(units=1024,
                                               activation="relu",
                                               kernel_initializer="ones",
                                               bias_initializer="zeros")
            self.dense_layers.append(self.layer)

        self.out_layer = tf.keras.layers.Dense(units=1,
                                               activation=None,
                                               kernel_initializer="ones",
                                               bias_initializer="zeros")
Example #6
0
    def __init__(self,
                 max_vocabulary_size_per_gpu,
                 embedding_vec_size_list,
                 slot_num_list,
                 nnz_per_slot_list,
                 num_dense_layers,
                 dynamic_input=False,
                 use_hashtable=True,
                 **kwargs):
        super(SOKDenseModel, self).__init__(**kwargs)

        self.max_vocabulary_size_per_gpu = max_vocabulary_size_per_gpu
        self.embedding_vec_size_list = embedding_vec_size_list
        self.slot_num_list = slot_num_list
        self.nnz_per_slot_list = nnz_per_slot_list
        self.num_dense_layers = num_dense_layers
        self.dynamic_input = dynamic_input

        if (len(slot_num_list) != len(nnz_per_slot_list)
                or len(slot_num_list) != len(embedding_vec_size_list)):
            raise ValueError("The length of embedding_vec_size_list, slot_num_list"+\
                             " and nnz_per_slot_list must be equal.")

        self.embedding_num = len(self.embedding_vec_size_list)
        self.slot_num_prefix_sum = [0 for _ in range(self.embedding_num + 1)]
        for i in range(1, self.embedding_num + 1):
            self.slot_num_prefix_sum[i] = self.slot_num_prefix_sum[
                i - 1] + self.slot_num_list[i - 1]

        self.embedding_layers = list()
        for i in range(self.embedding_num):
            self.embedding_layer = sok.All2AllDenseEmbedding(
                max_vocabulary_size_per_gpu=self.max_vocabulary_size_per_gpu,
                embedding_vec_size=self.embedding_vec_size_list[i],
                slot_num=self.slot_num_list[i],
                nnz_per_slot=self.nnz_per_slot_list[i],
                dynamic_input=self.dynamic_input,
                use_hashtable=use_hashtable)
            self.embedding_layers.append(self.embedding_layer)

        self.dense_layers = list()
        for _ in range(self.num_dense_layers):
            self.layer = tf.keras.layers.Dense(units=1024,
                                               activation="relu",
                                               kernel_initializer="ones",
                                               bias_initializer="zeros")
            self.dense_layers.append(self.layer)

        self.out_layer = tf.keras.layers.Dense(units=1,
                                               activation=None,
                                               kernel_initializer="ones",
                                               bias_initializer="zeros")
Example #7
0
    def __init__(self, vocab_sizes: Dict[str, int], embedding_vec_size: int,
                 **kwargs):
        super(SOKEmbedding, self).__init__(**kwargs)
        self._vocab_sizes = vocab_sizes
        self._embedding_vec_size = embedding_vec_size

        self._sorted_keys = self._vocab_sizes.keys()
        self._vocab_prefix_sum = dict()
        offset = 0
        for key in self._sorted_keys:
            self._vocab_prefix_sum[key] = offset
            offset += self._vocab_sizes[key]
        self._vocab_prefix_sum["total"] = offset

        self._sok_embedding = sok.All2AllDenseEmbedding(
            max_vocabulary_size_per_gpu=int(self._vocab_prefix_sum["total"] /
                                            0.75),
            embedding_vec_size=self._embedding_vec_size,
            slot_num=len(self._vocab_sizes),
            nnz_per_slot=1,
            use_hashtable=False)