def __init__(self, input_size, hidden_size): super(Dl4mtEncoder, self).__init__() self.gru = RNN(type="gru", batch_first=True, input_size=input_size, hidden_size=hidden_size, bidirectional=True)
def __init__(self, generator, **config): super().__init__() self.generator = generator self.gru = RNN(type="gru", batch_first=True, input_size=config['d_model'], hidden_size=config['d_model']) self.linear = nn.Linear(config['d_model'], config['d_word_vec'])
def __init__(self, feature_size=768, hidden_size=512, dropout_rate=0.1, **kwargs): super(QE_PAIR, self).__init__() # Use PAD self.gru = RNN(type="gru", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm_src = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm_mt = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.w = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w.weight) self.w_all = nn.Linear(2 * 2 * hidden_size, 1) my_init.default_init(self.w_all.weight) self.w_1 = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w_1.weight) self.w_2 = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w_2.weight) self.dropout = nn.Dropout(dropout_rate) self.sigmoid = nn.Sigmoid()
def __init__( self, n_src_words, n_trg_words, d_word_vec, d_model, dropout=0.0, **kwargs, ): super(TransDiscriminator, self).__init__() # the embedding is pre-trained and without dropout layer self.src_embedding = Embeddings(num_embeddings=n_src_words, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=False) self.trg_embedding = Embeddings(num_embeddings=n_trg_words, embedding_dim=d_word_vec, dropout=dropout, add_position_embedding=False) if not kwargs["update_embedding"]: for param in self.src_embedding.parameters(): param.requires_grad = False for param in self.trg_embedding.parameters(): param.requires_grad = False self.src_gru = RNN(type="gru", batch_first=True, input_size=d_word_vec, hidden_size=d_model, bidirectional=True) self.trg_gru = RNN(type="gru", batch_first=True, input_size=d_word_vec, hidden_size=d_model, bidirectional=True) # twice of the bi-GRN dimension self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True) # whether the (x,y) is a translation pair self.ffn = nn.Linear(in_features=4 * d_model, out_features=2) self.dropout = nn.Dropout(dropout)
def __init__(self, d_model, n_head, feature_size=1024, hidden_size=512, dropout=0.0, **kwargs ): super(QE_ATTENTION, self).__init__() self.ctx_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout, dim_per_head=None) # Use PAD self.gru = RNN(type="gru", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.lstm = RNN(type="lstm", batch_first=True, input_size=feature_size, hidden_size=hidden_size, bidirectional=True) self.w = nn.Linear(2 * hidden_size, 1) my_init.default_init(self.w.weight) self.dropout = nn.Dropout(dropout) self.sigmoid = nn.Sigmoid()
def __init__(self, n_words, input_size, hidden_size): super(Encoder, self).__init__() # Use PAD self.embeddings = Embeddings(num_embeddings=n_words, embedding_dim=input_size, dropout=0.0, add_position_embedding=False) self.gru = RNN(type="gru", batch_first=True, input_size=input_size, hidden_size=hidden_size, bidirectional=True)
def __init__(self, victim_configs, victim_model_path, input_dim, d_model, dropout=0.0, **kwargs): super().__init__(victim_configs, victim_model_path, dropout) self.src_gru = RNN(type="gru", batch_first=True, input_size=input_dim, hidden_size=d_model, bidirectional=True) self.trg_gru = RNN(type="gru", batch_first=True, input_size=input_dim, hidden_size=d_model, bidirectional=True) self.dropout = nn.Dropout(dropout, inplace=True) self.layer_norm = nn.LayerNorm(d_model * 4, elementwise_affine=True) # single layer binary classification self.ffn = nn.Linear(in_features=4 * d_model, out_features=2) self._reset_parameters()
def __init__(self, d_word_vec=512, d_model=256, limit_dist=0.1, dropout=0.0, reparam_noise=1e-6): super(Rephraser, self).__init__() self.input_size = d_word_vec self.action_dim = d_word_vec # modification on embeddings self.hidden_size = d_model self.action_range = limit_dist # action range self.reparam_noise = reparam_noise self.dropout_rate = dropout self.dropout = nn.Dropout(dropout) self.log_std_bound = [-5, 4] # default log std bound # current sequence as ctx features self.src_gru = RNN(type="gru", batch_first=True, input_size=self.input_size, hidden_size=self.hidden_size, bidirectional=True) # Linears for input step features: current embeddings, avg_seqs as ctx self.ctx_linear = nn.Linear(in_features=2 * self.hidden_size, out_features=self.hidden_size) self.input_linear = nn.Linear(in_features=self.input_size, out_features=self.hidden_size) # layer norm for inputs feature self.rephrase_LN = nn.LayerNorm(self.hidden_size, elementwise_affine=True) # outputs: actor policy distribution # Gaussian policy: mean and std; self.rephraser_linear_base_mu = nn.Linear( in_features=self.hidden_size, out_features=self.hidden_size) self.rephraser_linear_mu = nn.Linear(in_features=self.hidden_size, out_features=self.action_dim) self.rephraser_linear_base_log_sig = nn.Linear( in_features=self.hidden_size, out_features=self.hidden_size) self.rephraser_linear_log_sig = nn.Linear(in_features=self.hidden_size, out_features=self.action_dim) # # intrinsic curiosity module # self.icm = IntrinsicPredictor( # d_model=self.hidden_size, action_dim=self.action_dim, dropout=dropout) # initialize parameter self._reset_parameters()
def __init__(self, d_word_vec=512, d_model=256, limit_dist=0.1, dropout=0.0, reparam_noise=1e-6): super(CriticNet, self).__init__() self.input_size = d_word_vec self.hidden_size = d_model self.limit_dist = limit_dist self.reparam_noise = reparam_noise self.dropout = nn.Dropout(dropout) # current sequences as ctx self.src_gru = RNN(type="gru", batch_first=True, input_size=self.input_size, hidden_size=self.hidden_size, bidirectional=True) # Linear for input step features self.ctx_linear = nn.Linear(in_features=2 * self.hidden_size, out_features=self.hidden_size) self.input_linear = nn.Linear(in_features=self.input_size, out_features=self.hidden_size) self.action_linear = nn.Linear(in_features=self.input_size, out_features=self.hidden_size) self.critic_LN = nn.LayerNorm(self.hidden_size, elementwise_affine=True) # double-q trick with 2 critics (sharing input features), # Q(s_t, a_t) is the smaller one self.critic1_linear_base = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.critic1_linear = nn.Linear(in_features=self.hidden_size, out_features=1) self.critic2_linear_base = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size) self.critic2_linear = nn.Linear(in_features=self.hidden_size, out_features=1) # initialize the parameter self._reset_parameters()
def __init__(self, victim_configs, victim_model_path, trg_vocab_emb, input_dim, d_model, dropout=0.0, **kwargs): """ :param victim_configs: build trg_emb from victim. :param victim_model_path: build trg_emb from victim. :param input_dim: word embedding dim :param d_model: encoding dimension :param dropout: redundant parameter in Annunciater base class :param sample_amount: save memory with only sample_amount of tokens from the vocab (larger->better). :param kwargs: provide trg_vocab_emb """ super().__init__(victim_configs, victim_model_path, dropout) # the perturbed emb should possess better high-level features indicating targets self.trg_vocab_emb = trg_vocab_emb self.sample_amount = kwargs["sample_amount"] if "density_temperature" in kwargs: self.density_temperature = kwargs["density_temperature"] else: # to scale the density self.density_temperature = self.sample_amount**0.5 # src encoding self.src_gru = RNN(type="gru", batch_first=True, input_size=input_dim, hidden_size=d_model, bidirectional=True) self.LN = nn.LayerNorm(d_model * 2, elementwise_affine=True) # prediction layer for inner product similarity (density ratio) self.scorer_ffn = nn.Linear(in_features=2 * d_model, out_features=input_dim) # the "reference" for the density ratio, direct trg_emb or the victim enc representation of original src self._reset_parameters() # init parameter
def __init__(self, n_words, action_space=2, action_roll_steps=1, d_word_vec=512, d_model=256, dropout=0.0, **kwargs): super(Attacker, self).__init__() self.action_roll_steps = action_roll_steps self.action_space = action_space self.input_size = d_word_vec self.hidden_size = d_model self.src_embedding = Embeddings(num_embeddings=n_words, embedding_dim=self.input_size, dropout=dropout, add_position_embedding=False) # label representation self.src_gru = RNN(type="gru", batch_first=True, input_size=self.input_size, hidden_size=self.hidden_size, bidirectional=True) # inputs: current input, avg_seqs as ctx self.ctx_linear = nn.Linear(in_features=2*self.hidden_size, out_features=self.hidden_size) self.input_linear = nn.Linear(in_features=self.input_size, out_features=self.hidden_size) # layer norm for inputs feature self.layer_norm = nn.LayerNorm(self.hidden_size, elementwise_affine=True) # outputs: actor distribution and critic value self.attacker_linear = nn.Linear(in_features=self.hidden_size, out_features=self.action_space) self.critic_linear = nn.Linear(in_features=self.hidden_size, out_features=1) self.dropout = nn.Dropout(dropout) self._reset_parameters()