예제 #1
0
    def __init__(self, feature):
        super().__init__(feature)

        self.vocab = []

        self.embedding_size = 50
        self.representation = 'dense'
        self.embeddings_trainable = True
        self.pretrained_embeddings = None
        self.embeddings_on_cpu = False
        self.dropout = False
        self.initializer = None
        self.regularize = True

        _ = self.overwrite_defaults(feature)

        self.embed = Embed(vocab=self.vocab,
                           embedding_size=self.embedding_size,
                           representation=self.representation,
                           embeddings_trainable=self.embeddings_trainable,
                           pretrained_embeddings=self.pretrained_embeddings,
                           embeddings_on_cpu=self.embeddings_on_cpu,
                           dropout=self.dropout,
                           initializer=self.initializer,
                           regularize=self.regularize)
예제 #2
0
 def __init__(self,
              embedding_size=10,
              embeddings_on_cpu=False,
              dropout=False,
              fc_layers=None,
              num_fc_layers=0,
              fc_size=10,
              norm=None,
              activation='relu',
              initializer=None,
              regularize=True,
              **kwargs):
     """
         :param embedding_size: it is the maximum embedding size, the actual
                size will be `min(vocaularyb_size, embedding_size)`
                for `dense` representations and exacly `vocaularyb_size`
                for the `sparse` encoding, where `vocabulary_size` is
                the number of different strings appearing in the training set
                in the column the feature is named after (plus 1 for `<UNK>`).
         :type embedding_size: Integer
         :param embeddings_on_cpu: by default embedings matrices are stored
                on GPU memory if a GPU is used, as it allows
                for faster access, but in some cases the embedding matrix
                may be really big and this parameter forces the placement
                of the embedding matrix in regular memroy and the CPU is used
                to resolve them, slightly slowing down the process
                as a result of data transfer between CPU and GPU memory.
         :param dropout: determines if there should be a dropout layer before
                returning the encoder output.
         :type dropout: Boolean
         :param initializer: the initializer to use. If `None`, the default
                initialized of each variable is used (`glorot_uniform`
                in most cases). Options are: `constant`, `identity`, `zeros`,
                 `ones`, `orthogonal`, `normal`, `uniform`,
                 `truncated_normal`, `variance_scaling`, `glorot_normal`,
                 `glorot_uniform`, `xavier_normal`, `xavier_uniform`,
                 `he_normal`, `he_uniform`, `lecun_normal`, `lecun_uniform`.
                 Alternatively it is possible to specify a dictionary with
                 a key `type` that identifies the type of initialzier and
                 other keys for its parameters, e.g.
                 `{type: normal, mean: 0, stddev: 0}`.
                 To know the parameters of each initializer, please refer to
                 TensorFlow's documentation.
         :type initializer: str
         :param regularize: if `True` the embedding wieghts are added to
                the set of weights that get reularized by a regularization
                loss (if the `regularization_lambda` in `training`
                is greater than 0).
         :type regularize: Boolean
     """
     self.year_fc = FCStack(num_layers=1,
                            default_fc_size=1,
                            default_activation=None,
                            default_norm=None,
                            default_dropout=dropout,
                            default_regularize=regularize,
                            default_initializer=initializer)
     self.embed_month = Embed([str(i) for i in range(12)],
                              embedding_size,
                              representation='dense',
                              embeddings_trainable=True,
                              pretrained_embeddings=None,
                              embeddings_on_cpu=embeddings_on_cpu,
                              dropout=dropout,
                              initializer=initializer,
                              regularize=regularize)
     self.embed_day = Embed([str(i) for i in range(31)],
                            embedding_size,
                            representation='dense',
                            embeddings_trainable=True,
                            pretrained_embeddings=None,
                            embeddings_on_cpu=embeddings_on_cpu,
                            dropout=dropout,
                            initializer=initializer,
                            regularize=regularize)
     self.embed_weekday = Embed([str(i) for i in range(7)],
                                embedding_size,
                                representation='dense',
                                embeddings_trainable=True,
                                pretrained_embeddings=None,
                                embeddings_on_cpu=embeddings_on_cpu,
                                dropout=dropout,
                                initializer=initializer,
                                regularize=regularize)
     self.embed_yearday = Embed([str(i) for i in range(366)],
                                embedding_size,
                                representation='dense',
                                embeddings_trainable=True,
                                pretrained_embeddings=None,
                                embeddings_on_cpu=embeddings_on_cpu,
                                dropout=dropout,
                                initializer=initializer,
                                regularize=regularize)
     self.embed_hour = Embed([str(i) for i in range(24)],
                             embedding_size,
                             representation='dense',
                             embeddings_trainable=True,
                             pretrained_embeddings=None,
                             embeddings_on_cpu=embeddings_on_cpu,
                             dropout=dropout,
                             initializer=initializer,
                             regularize=regularize)
     self.embed_minute = Embed([str(i) for i in range(60)],
                               embedding_size,
                               representation='dense',
                               embeddings_trainable=True,
                               pretrained_embeddings=None,
                               embeddings_on_cpu=embeddings_on_cpu,
                               dropout=dropout,
                               initializer=initializer,
                               regularize=regularize)
     self.embed_second = Embed([str(i) for i in range(60)],
                               embedding_size,
                               representation='dense',
                               embeddings_trainable=True,
                               pretrained_embeddings=None,
                               embeddings_on_cpu=embeddings_on_cpu,
                               dropout=dropout,
                               initializer=initializer,
                               regularize=regularize)
     self.fc_stack = FCStack(layers=fc_layers,
                             num_layers=num_fc_layers,
                             default_fc_size=fc_size,
                             default_activation=activation,
                             default_norm=norm,
                             default_dropout=dropout,
                             default_regularize=regularize,
                             default_initializer=initializer)