def __init__(self, config, embedding=None): self._config = config self.emb_size = config.emb_size self.vocab_size = config.vocab_size self.n_classes = config.n_classes self.max_seq_len = config.max_sequence_length self.mlp_units = config.mlp_units if embedding is None: self.embedding_table = tf.get_variable("embedding", shape=[self.vocab_size, self.emb_size]) else: self.embedding_table = tf.get_variable(name="embedding", shape=[self.vocab_size, self.emb_size], initializer=tf.constant_initializer(embedding)) # self.emb_initializer = tf.random_normal_initializer(stddev=0.1) # self.fc1 = layer.get_fc_layer(self.n_classes, initial_type='normal', activation=config.fc_activation_1) # Inputs self.sequence = tf.placeholder(tf.int32, [None, self.max_seq_len], name='sequence') self.sequence_length = tf.placeholder(tf.int32, [None, 1], name='sequence_length') self.label = tf.placeholder(tf.float32, [None, None], name='label') self.is_train = tf.placeholder(tf.bool, name='is_train') self.fc_drop = config.fc_drop params = self.get_param(config) self.transformer = transformer.Transformer(params=params, train=self.is_train) # Fetch OPs self.logits_op = self.logits() self.loss_op = self.loss() self.result_op = self.result()
def create_transformer(args, input_vocab_size, target_vocab_size): hyperparameters = { "input_vocab_size": input_vocab_size + 1, "target_vocab_size": target_vocab_size + 1, "pe_input": input_vocab_size + 1, "pe_target": target_vocab_size + 1, **read_json_file(args.hyperparameters), } return transformer.Transformer(**hyperparameters), hyperparameters
def create_model(self): return transformer.Transformer( num_layers=4, num_heads=8, dff=512, d_model=256, input_vocab_size=VOCAB_SIZE + 1, target_vocab_size=VOCAB_SIZE + 1, pe_input=VOCAB_SIZE + 1, pe_target=VOCAB_SIZE + 1, rate=0.1, )
def create_transformer_pretrained(args, input_vocab_size, target_vocab_size): demi_bert_args = copy.deepcopy(args) demi_bert_args.model = "demi-bert" demi_bert = find(demi_bert_args, input_vocab_size, target_vocab_size) demi_bert.load("2") hyperparameters = { "input_vocab_size": input_vocab_size + 1, "target_vocab_size": target_vocab_size, "pe_input": input_vocab_size + 1, "pe_target": target_vocab_size, **read_json_file(args.hyperparameters), } args.model = "transformer" transformer_ = transformer.Transformer(**hyperparameters) transformer_.encoder = demi_bert.encoder transformer_.title += "-pretrained" return transformer_, hyperparameters
def __init__(self, cfg): super(PLUS_TFM, self).__init__() self.transformer = tfm.Transformer(cfg) # masked language modeling (decoder is shared with embedding layer) self.fc_lm = nn.Linear(cfg.hidden_dim, cfg.hidden_dim) self.norm_lm = tfm.LayerNorm(cfg) embed_weight = self.transformer.embed.tok_embed.weight n_vocab, n_dim = embed_weight.size() self.decoder = nn.Linear(n_dim, n_vocab, bias=False) self.decoder.weight = embed_weight self.decoder_bias = nn.Parameter(torch.zeros(n_vocab)) # classification if cfg.num_classes is not None: self.drop_cls = nn.Dropout(cfg.dropout) self.cls = nn.Linear(cfg.hidden_dim, cfg.num_classes)
def __init__(self, config, embedding=None): self._config = config self.emb_size = config.emb_size self.vocab_size = config.vocab_size self.n_classes = config.n_classes self.rnn_units = config.rnn_units self.window_size = config.window_size self.real_window_size = config.window_size self.mlp_units = config.mlp_units self.batch_size = config.batch_size self.filter_size = [3, 5, 7] if config.window_size is not None: self.filter_size = [config.window_size] self.filter_num = 128 self.max_seq_len = config.max_sequence_length self.emb_initializer = tf.random_uniform_initializer(minval=-0.01, maxval=0.01) # self.emb_initializer = layer.xavier_initializer(self._config.xavier_factor, seed=config.fixedrng) # if embedding is None: self.embedding_table = tf.get_variable( "embedding", shape=[self.vocab_size, self.emb_size], initializer=self.emb_initializer) else: self.embedding_table = tf.get_variable( name="embedding", shape=[self.vocab_size, self.emb_size], initializer=tf.constant_initializer(embedding)) # Inputs self.sequence = tf.placeholder(tf.int32, [self.batch_size, self.max_seq_len], name='sequence') self.sequence_length = tf.placeholder(tf.int32, [self.batch_size, 1], name='sequence_length') self.label = tf.placeholder(tf.float32, [self.batch_size, None], name='label') self.is_train = tf.placeholder( tf.bool, name="is_train") # for batch normalization self.rnn_keep_prob = tf.cond( self.is_train, lambda: tf.constant(1.0 - config.rnn_drop), lambda: tf.constant(1.0)) self.fc_drop = config.fc_drop self.attention_drop = config.attention_drop if config.attention_type is not None: if config.encode_type is None: logging.info("must choose an encode type") exit() elif config.encode_type == "transformer": params = self.get_param(config) self.encoder_func = transformer.Transformer( params=params, train=self.is_train) if self._config.feature_type == 'rnn': with tf.name_scope('feature_extractor_rnn'): # self.rnn_cell = tf.nn.rnn_cell.GRUCell(num_units=self.rnn_units, # kernel_initializer=layer.xavier_initializer( # self._config.xavier_factor, seed=config.fixedrng)) self.rnn_cell = tf.nn.rnn_cell.GRUCell( num_units=self.rnn_units, kernel_initializer=tf.initializers.orthogonal()) self.rnn_cell = tf.nn.rnn_cell.DropoutWrapper( self.rnn_cell, input_keep_prob=self.rnn_keep_prob, output_keep_prob=self.rnn_keep_prob, state_keep_prob=1.0) self.fixedrng = np.random.RandomState(config.fixedrng) self.u = layer.weight_variable( [self.mlp_units, self.n_classes], level="u", factor=config.xavier_factor) self.W = layer.weight_variable( [self.mlp_units, self.mlp_units], level="w", factor=config.xavier_factor) self.WC = layer.weight_variable( [self.mlp_units, self.mlp_units], level="wc", factor=config.xavier_factor) self.logits_op = self.logits_drnn() elif self._config.feature_type == 'cnn': self.logits_op = self.logits_cnn_1d() elif self._config.feature_type == "dpcnn": self.logits_op = self.logits_cnn_dp() else: raise NotImplementedError self.loss_op = self.loss() self.result_op = self.result() self.loss_encode_op = self.loss_encoder()