예제 #1
0
    def __init__(self, config, embedding=None):
        self._config = config
        self.emb_size = config.emb_size
        self.vocab_size = config.vocab_size
        self.n_classes = config.n_classes
        self.max_seq_len = config.max_sequence_length
        self.mlp_units = config.mlp_units

        if embedding is None:
            self.embedding_table = tf.get_variable("embedding", shape=[self.vocab_size, self.emb_size])
        else:
            self.embedding_table = tf.get_variable(name="embedding", shape=[self.vocab_size, self.emb_size],
                                         initializer=tf.constant_initializer(embedding))

        # self.emb_initializer = tf.random_normal_initializer(stddev=0.1)
        # self.fc1 = layer.get_fc_layer(self.n_classes, initial_type='normal', activation=config.fc_activation_1)

        # Inputs
        self.sequence = tf.placeholder(tf.int32, [None, self.max_seq_len], name='sequence')
        self.sequence_length = tf.placeholder(tf.int32, [None, 1], name='sequence_length')

        self.label = tf.placeholder(tf.float32, [None, None], name='label')

        self.is_train = tf.placeholder(tf.bool, name='is_train')
        self.fc_drop = config.fc_drop

        params = self.get_param(config)
        self.transformer = transformer.Transformer(params=params, train=self.is_train)

        # Fetch OPs
        self.logits_op = self.logits()
        self.loss_op = self.loss()
        self.result_op = self.result()
def create_transformer(args, input_vocab_size, target_vocab_size):
    hyperparameters = {
        "input_vocab_size": input_vocab_size + 1,
        "target_vocab_size": target_vocab_size + 1,
        "pe_input": input_vocab_size + 1,
        "pe_target": target_vocab_size + 1,
        **read_json_file(args.hyperparameters),
    }
    return transformer.Transformer(**hyperparameters), hyperparameters
 def create_model(self):
     return transformer.Transformer(
         num_layers=4,
         num_heads=8,
         dff=512,
         d_model=256,
         input_vocab_size=VOCAB_SIZE + 1,
         target_vocab_size=VOCAB_SIZE + 1,
         pe_input=VOCAB_SIZE + 1,
         pe_target=VOCAB_SIZE + 1,
         rate=0.1,
     )
def create_transformer_pretrained(args, input_vocab_size, target_vocab_size):
    demi_bert_args = copy.deepcopy(args)
    demi_bert_args.model = "demi-bert"
    demi_bert = find(demi_bert_args, input_vocab_size, target_vocab_size)
    demi_bert.load("2")
    hyperparameters = {
        "input_vocab_size": input_vocab_size + 1,
        "target_vocab_size": target_vocab_size,
        "pe_input": input_vocab_size + 1,
        "pe_target": target_vocab_size,
        **read_json_file(args.hyperparameters),
    }
    args.model = "transformer"
    transformer_ = transformer.Transformer(**hyperparameters)
    transformer_.encoder = demi_bert.encoder
    transformer_.title += "-pretrained"
    return transformer_, hyperparameters
예제 #5
0
    def __init__(self, cfg):
        super(PLUS_TFM, self).__init__()
        self.transformer = tfm.Transformer(cfg)

        # masked language modeling (decoder is shared with embedding layer)
        self.fc_lm = nn.Linear(cfg.hidden_dim, cfg.hidden_dim)
        self.norm_lm = tfm.LayerNorm(cfg)
        embed_weight = self.transformer.embed.tok_embed.weight
        n_vocab, n_dim = embed_weight.size()
        self.decoder = nn.Linear(n_dim, n_vocab, bias=False)
        self.decoder.weight = embed_weight
        self.decoder_bias = nn.Parameter(torch.zeros(n_vocab))

        # classification
        if cfg.num_classes is not None:
            self.drop_cls = nn.Dropout(cfg.dropout)
            self.cls = nn.Linear(cfg.hidden_dim, cfg.num_classes)
예제 #6
0
    def __init__(self, config, embedding=None):
        self._config = config
        self.emb_size = config.emb_size
        self.vocab_size = config.vocab_size
        self.n_classes = config.n_classes
        self.rnn_units = config.rnn_units
        self.window_size = config.window_size
        self.real_window_size = config.window_size
        self.mlp_units = config.mlp_units
        self.batch_size = config.batch_size

        self.filter_size = [3, 5, 7]
        if config.window_size is not None:
            self.filter_size = [config.window_size]
        self.filter_num = 128

        self.max_seq_len = config.max_sequence_length

        self.emb_initializer = tf.random_uniform_initializer(minval=-0.01,
                                                             maxval=0.01)
        # self.emb_initializer = layer.xavier_initializer(self._config.xavier_factor, seed=config.fixedrng)
        #
        if embedding is None:
            self.embedding_table = tf.get_variable(
                "embedding",
                shape=[self.vocab_size, self.emb_size],
                initializer=self.emb_initializer)
        else:
            self.embedding_table = tf.get_variable(
                name="embedding",
                shape=[self.vocab_size, self.emb_size],
                initializer=tf.constant_initializer(embedding))
        # Inputs
        self.sequence = tf.placeholder(tf.int32,
                                       [self.batch_size, self.max_seq_len],
                                       name='sequence')

        self.sequence_length = tf.placeholder(tf.int32, [self.batch_size, 1],
                                              name='sequence_length')
        self.label = tf.placeholder(tf.float32, [self.batch_size, None],
                                    name='label')

        self.is_train = tf.placeholder(
            tf.bool, name="is_train")  # for batch normalization

        self.rnn_keep_prob = tf.cond(
            self.is_train, lambda: tf.constant(1.0 - config.rnn_drop),
            lambda: tf.constant(1.0))
        self.fc_drop = config.fc_drop
        self.attention_drop = config.attention_drop

        if config.attention_type is not None:
            if config.encode_type is None:
                logging.info("must choose an encode type")
                exit()
            elif config.encode_type == "transformer":
                params = self.get_param(config)
                self.encoder_func = transformer.Transformer(
                    params=params, train=self.is_train)

        if self._config.feature_type == 'rnn':
            with tf.name_scope('feature_extractor_rnn'):
                # self.rnn_cell = tf.nn.rnn_cell.GRUCell(num_units=self.rnn_units,
                #                                        kernel_initializer=layer.xavier_initializer(
                #                                            self._config.xavier_factor, seed=config.fixedrng))
                self.rnn_cell = tf.nn.rnn_cell.GRUCell(
                    num_units=self.rnn_units,
                    kernel_initializer=tf.initializers.orthogonal())
                self.rnn_cell = tf.nn.rnn_cell.DropoutWrapper(
                    self.rnn_cell,
                    input_keep_prob=self.rnn_keep_prob,
                    output_keep_prob=self.rnn_keep_prob,
                    state_keep_prob=1.0)
                self.fixedrng = np.random.RandomState(config.fixedrng)
                self.u = layer.weight_variable(
                    [self.mlp_units, self.n_classes],
                    level="u",
                    factor=config.xavier_factor)
                self.W = layer.weight_variable(
                    [self.mlp_units, self.mlp_units],
                    level="w",
                    factor=config.xavier_factor)
                self.WC = layer.weight_variable(
                    [self.mlp_units, self.mlp_units],
                    level="wc",
                    factor=config.xavier_factor)
            self.logits_op = self.logits_drnn()
        elif self._config.feature_type == 'cnn':
            self.logits_op = self.logits_cnn_1d()
        elif self._config.feature_type == "dpcnn":
            self.logits_op = self.logits_cnn_dp()
        else:
            raise NotImplementedError
        self.loss_op = self.loss()
        self.result_op = self.result()
        self.loss_encode_op = self.loss_encoder()