def __init__(self, config): super(Model, self).__init__() self.config = config self.bilstm1 = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, bidirectional=True, batch_first=True) self.bilstm2 = nn.LSTM(input_size=config.emb_size, hidden_size=config.hidden_size, bidirectional=True, batch_first=True) self.linear1 = nn.Linear(2*config.hidden_size, 2*config.hidden_size) self.linear2 = nn.Linear(2*config.hidden_size, 2*config.hidden_size) self.linear3 = nn.Linear(2*config.hidden_size, 2*config.hidden_size) self.fuse_linear1 = nn.Linear(4*config.hidden_size, 2*config.hidden_size) self.fuse_linear2 = nn.Linear(4*config.hidden_size, 2*config.hidden_size) self.bilinear = nn.Bilinear(2*config.hidden_size, 2*config.hidden_size, 1) self.align_linear1 = nn.Linear(2*config.hidden_size, 1) self.align_linear2 = nn.Linear(2*config.hidden_size, 1)
def __init__(self, args, rank_mode=False): super(InteractionAggregator, self).__init__() self.hidden_size = args.hidden_size self.position_enc = nn.Embedding.from_pretrained(get_sinusoid_encoding_table(11, self.hidden_size, padding_idx=0), freeze=True) self.tanh = nn.Tanh() self.relu = nn.ReLU(inplace=False) self.softmax = nn.Softmax(dim=1) self.rank_mode = rank_mode self.M = nn.Bilinear(self.hidden_size, self.hidden_size, self.hidden_size) nn.init.xavier_normal_(self.M.weight) self.w_attn1 = nn.Linear(self.hidden_size, self.hidden_size) nn.init.xavier_normal_(self.w_attn1.weight) if self.rank_mode: self.w_attn2 = nn.Linear(self.hidden_size, self.hidden_size) nn.init.xavier_normal_(self.w_attn2.weight)
def __init__(self, word_vocab, rel_vocab, config): super(RelationRanking, self).__init__() self.config = config rel1_vocab, rel2_vocab = rel_vocab self.word_embed = Embeddings(word_vec_size=config.d_word_embed, dicts=word_vocab) self.rel1_embed = Embeddings(word_vec_size=config.d_rel_embed, dicts=rel1_vocab) self.rel2_embed = Embeddings(word_vec_size=config.d_rel_embed, dicts=rel2_vocab) # print(self.rel_embed.word_lookup_table.weight.data) #rel_embed的初始化待改 rel_embed.lookup_table.weight.data.normal_(0, 0.1) if self.config.rnn_type.lower() == 'gru': self.rnn = nn.GRU(input_size=config.d_word_embed, hidden_size=config.d_hidden, num_layers=config.n_layers, dropout=config.dropout_prob, bidirectional=config.birnn, batch_first=True) else: self.rnn = nn.LSTM(input_size=config.d_word_embed, hidden_size=config.d_hidden, num_layers=config.n_layers, dropout=config.dropout_prob, bidirectional=config.birnn, batch_first=True) self.dropout = nn.Dropout(p=config.dropout_prob) seq_in_size = config.d_hidden if self.config.birnn: seq_in_size *= 2 self.question_attention = MLPWordSeqAttention( input_size=config.d_rel_embed, seq_size=seq_in_size) self.bilinear = nn.Bilinear(seq_in_size, config.d_rel_embed, 1, bias=False) self.seq_out = nn.Sequential( # nn.BatchNorm1d(seq_in_size), self.dropout, nn.Linear(seq_in_size, config.d_rel_embed))
def __init__(self, args): super(NeuralNetwork, self).__init__() self.args = args self.patience = 0 self.init_clip_max_norm = 5.0 self.optimizer = None self.best_result = [0, 0, 0, 0, 0, 0] self.metrics = Metrics(self.args.score_file_path) self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') config_class, model_class, tokenizer_class = MODEL_CLASSES[ args.model_type] self.bert_config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, finetuning_task="classification", num_labels=1) self.bert_tokenizer = BertTokenizer.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case) special_tokens_dict = { 'eos_token': '[eos]', 'additional_special_tokens': ['[soe]', '[eoe]'] } num_added_toks = self.bert_tokenizer.add_special_tokens( special_tokens_dict) self.bert_model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=self.bert_config) self.bert_model.resize_token_embeddings(len(self.bert_tokenizer)) self.bert_model = self.bert_model.cuda() self.attn = nn.Linear(768, 768) self.rnn = nn.GRU(input_size=768, hidden_size=200, num_layers=1, batch_first=True, bidirectional=False) self.bilinear = nn.Bilinear(768, 768, 1)
def __init__(self,inshape,kernelsize,hiddensize): super(CNN_LSTM_predictor,self).__init__() # shape parameters: self.batch_size, self.n_filters = inshape self.lstm_input = seq_len = 20 - kernelsize + 1 # layers self.cnnPrice = nn.Conv1d(1,self.n_filters,kernelsize) self.cnnVolume = nn.Conv1d(1,self.n_filters,kernelsize) self.lstmPrice = nn.LSTM(self.lstm_input,self.lstm_input) self.lstmVolume = nn.LSTM(self.lstm_input,self.lstm_input) bilin_size = seq_len * self.n_filters self.Bilin = nn.Bilinear(bilin_size,bilin_size,36) self.output_layer = nn.Sequential( nn.Dropout(p=0.5), # explained later nn.Linear(36, 1) )
def __init__(self, config): super(StructuredAttention, self).__init__() # self.use_gpu = config.use_gpu #self.device = torch.device("cuda" if config.use_gpu else "cpu") self.device = config.device self.bidirectional = config.rnn_bidir self.sem_dim_size = None if config.rnn_bidir: self.sem_dim_size = 2 * config.sem_dim_size else: self.sem_dim_size = config.sem_dim_size self.rnn_cell_size = config.rnn_cell_size if self.bidirectional: self.rnn_cell_size = self.rnn_cell_size * 2 self.str_dim_size = self.rnn_cell_size - self.sem_dim_size self.pytorch_version = "stable" # print("Setting pytorch "+self.pytorch_version+" version for Structured Attention") self.tp_linear = nn.Linear(self.str_dim_size, self.str_dim_size, bias=True) torch.nn.init.xavier_uniform_(self.tp_linear.weight) nn.init.constant_(self.tp_linear.bias, 0) self.tc_linear = nn.Linear(self.str_dim_size, self.str_dim_size, bias=True) torch.nn.init.xavier_uniform_(self.tc_linear.weight) nn.init.constant_(self.tc_linear.bias, 0) self.fi_linear = nn.Linear(self.str_dim_size, 1, bias=False) torch.nn.init.xavier_uniform_(self.fi_linear.weight) self.bilinear = nn.Bilinear(self.str_dim_size, self.str_dim_size, 1, bias=False) torch.nn.init.xavier_uniform_(self.bilinear.weight) self.exparam = nn.Parameter(torch.Tensor(1,1,self.sem_dim_size)) torch.nn.init.xavier_uniform_(self.exparam) self.fzlinear = nn.Linear(3*self.sem_dim_size, self.sem_dim_size, bias=True) torch.nn.init.xavier_uniform_(self.fzlinear.weight) nn.init.constant_(self.fzlinear.bias, 0) self.tanh = nn.Tanh() self.relu = nn.ReLU() self.leak_relu = nn.LeakyReLU()
def __init__(self, dim_embeddings, dim_rnn=128, num_layers=2, dropout_rate=0, similarity='inner', pooling='avg'): super(RnnAttentionNet, self).__init__() self.dim_embeddings = dim_embeddings self.dim_rnn = dim_rnn self.dim_encoded = 2 * dim_rnn self.num_layers = num_layers self.dropout_rate = dropout_rate self.similarity = similarity self.pooling = pooling self.rnn_context = nn.GRU(input_size=dim_embeddings, hidden_size=dim_rnn, num_layers=num_layers, batch_first=True, dropout=dropout_rate, bidirectional=True) # self.rnn_option = nn.GRU(input_size=dim_embeddings, hidden_size=dim_rnn, # num_layers=num_layers, batch_first=True, # dropout=dropout_rate, bidirectional=True) self.rnn_attn_context = nn.GRU(input_size=4 * self.dim_encoded, hidden_size=dim_rnn, num_layers=1, batch_first=True, dropout=dropout_rate, bidirectional=True) # self.rnn_attn_option = nn.GRU(input_size=4*self.dim_encoded, hidden_size=dim_rnn, # num_layers=num_layers, batch_first=True, # dropout=dropout_rate, bidirectional=True) if self.similarity == 'cosine' or self.similarity == 'inner': self.bi_fc = nn.Bilinear(self.dim_encoded, self.dim_encoded, 1) elif self.similarity == 'trilinear': self.co_attn = COAttention(d_model=self.dim_encoded, dropout=dropout_rate) self.fc_co = nn.Linear(self.dim_encoded, 1) else: raise ValueError(f"Invalid Similarity: {self.similarity}")
def __init__(self, opts): self.vocab_size = opts.vocab_size # default 10000 self.r = opts.r # 1, 2 super(GRN16, self).__init__() self.emb = nn.Embedding( self.vocab_size, 50 ) self.blstm = nn.LSTM( 50, 50, batch_first=True, bidirectional=True ) self.gate = nn.Linear(200, self.r ) self.H = nn.Bilinear( 100, 100, self.r, bias=False ) self.V = nn.Linear( 200, self.r, bias=False ) self.b = Parameter(torch.zeros(1, 2)) self.v = nn.Linear(self.r, 1) self.maxpool2d = nn.MaxPool2d([3, 3]) self.linear1 = nn.Linear(17 * 17, 50) # [50, 50] --> (3, 3) maxpool --> [17, 17] self.linear2 = nn.Linear(50, 2)
def __init__(self, dim_in: int, dropout: float, score_type: str = 'dot') -> None: super(Score_Net, self).__init__() self.dim_in = dim_in self.dropout = dropout self.score_type = score_type self.Dropout = nn.Dropout(dropout) self.head = nn.Parameter(T(1, dim_in)) self.tail = nn.Parameter(T(1, dim_in)) self.layernorm = nn.LayerNorm(dim_in) if score_type == 'bilinear': self.func: Callable[[T, T], T] = nn.Bilinear(dim_in, dim_in, 1) else: self.func: Callable[[T, T], T] = torch.bmm self.init_para()
def __init__(self, dim_hid: int, score_type: str, bidirectional: bool = False) -> None: super(Predic_Net, self).__init__() self.dim_hid = dim_hid self.score_type = score_type self.bidirectional = bidirectional if score_type == 'bilinear': self.func: Callable[[T, T], T] = nn.Bilinear(dim_hid, dim_hid, 1) elif score_type == 'dot': self.func: Callable[[T, T], T] = torch.bmm elif score_type == 'denselinear': self.func = nn.Linear(4 * dim_hid, 2) elif score_type == 'linear': self.func = nn.Linear(2 * dim_hid, 2) self.init_para() self.norm_factor = np.sqrt(dim_hid) self.layernorm = nn.LayerNorm(dim_hid)
def __init__(self, config, embedding): super(rc_cnn_dailmail, self).__init__() self.dict_embedding = nn.Embedding(num_embeddings=config.dict_num, embedding_dim=100, _weight=embedding) # self.dict_embedding.weight.requires_grad = False self.bilinear = nn.Bilinear(config.hidden_size * 2, config.hidden_size * 2, 1) self.lstm1 = nn.GRU(config.input_size, config.hidden_size, bidirectional=True, batch_first=True, dropout=config.dropout) self.lstm2 = nn.GRU(config.input_size, config.hidden_size, bidirectional=True, batch_first=True, dropout=config.dropout) self.linear = nn.Linear(config.hidden_size * 2, config.eneity_num)
def __init__(self, config): super(CNNBaseline, self).__init__(config) self.bert = BertModel(config) self.init_weights() self._embedding = self.bert.embeddings.word_embeddings filter_sizes = [2, 3, 4, 5] num_filters = 36 embedding_size = 768 self._convs = nn.ModuleList([ nn.Conv2d(1, num_filters, (K, embedding_size)) for K in filter_sizes ]) self._dropout = nn.Dropout(0.1) self._linear = nn.Bilinear( len(filter_sizes) * num_filters, len(filter_sizes) * num_filters, 1) self.apply(self.init_esim_weights)
def __init__(self, vocab, vocab_size, hidden_size, dropout, slots, gating_dict, shared_emb): super(Generator, self).__init__() self.slots = slots self.vocab = vocab self.vocab_size = vocab_size self.nb_gate = len(gating_dict) self.hidden_size = hidden_size self.gating_dict = gating_dict self.embedding = shared_emb # token embedding matrix shared with encoders self.dropout_layer = nn.Dropout(dropout) self.bilinear = nn.Bilinear(self.hidden_size, self.hidden_size, 1) self.gru = nn.GRU(hidden_size, hidden_size, dropout=dropout) self.W_ratio = nn.Linear(3 * hidden_size, 1) # W_1 self.softmax = nn.Softmax(dim=1) self.sigmoid = nn.Sigmoid()
def __init__(self,input_dim1,input_dim2,output_dim, tauM = 20,tauAdp_inital =100, tau_initializer = 'normal',tauM_inital_std = 5,tauAdp_inital_std = 5, is_adaptive=1,device='cpu'): super(spike_Bidense, self).__init__() self.input_dim1 = input_dim1 self.input_dim2 = input_dim2 self.output_dim = output_dim self.is_adaptive = is_adaptive self.device = device self.dense = nn.Bilinear(input_dim1,input_dim2,output_dim) self.tau_m = nn.Parameter(torch.Tensor(self.output_dim)) self.tau_adp = nn.Parameter(torch.Tensor(self.output_dim)) if tau_initializer == 'normal': nn.init.normal_(self.tau_m,tauM,tauM_inital_std) nn.init.normal_(self.tau_adp,tauAdp_inital,tauAdp_inital_std) elif tau_initializer == 'multi_normal': self.tau_m = multi_normal_initilization(self.tau_m,tauM,tauM_inital_std) self.tau_adp = multi_normal_initilization(self.tau_adp,tauAdp_inital,tauAdp_inital_std)
def __init__(self, n_in, n_hid, n_out, do_prob=0., bilinear=False, bnorm=True): super(MLP, self).__init__() self.bilinear = bilinear self.bnorm = bnorm if bilinear: self.fc1 = nn.Bilinear(n_in, n_in, n_hid) else: self.fc1 = nn.Linear(n_in, n_hid) self.fc2 = nn.Linear(n_hid, n_out) if bnorm: self.bn = nn.BatchNorm1d(n_out) self.dropout_prob = do_prob self.init_weights()
def test_metabilinear_params(bias): meta_model = MetaBilinear(2, 3, 5, bias=bias) model = nn.Bilinear(2, 3, 5, bias=bias) params = OrderedDict() params['weight'] = torch.randn(5, 2, 3) model.weight.data.copy_(params['weight']) if bias: params['bias'] = torch.randn(5) model.bias.data.copy_(params['bias']) inputs1 = torch.randn(7, 2) inputs2 = torch.randn(7, 3) outputs_torchmeta = meta_model(inputs1, inputs2, params=params) outputs_nn = model(inputs1, inputs2) np.testing.assert_equal(outputs_torchmeta.detach().numpy(), outputs_nn.detach().numpy())
def __init__(self, latent_dim, hidden_dim, n_out=1, num_layers=1, activation=nn.Tanh, softplus=False, resid=False, expand_coords=False, bilinear=False): super(SpatialGenerator, self).__init__() self.softplus = softplus self.expand_coords = expand_coords in_dim = 2 if expand_coords: in_dim = 5 # include squares of coordinates as inputs self.coord_linear = nn.Linear(in_dim, hidden_dim) self.latent_dim = latent_dim if latent_dim > 0: self.latent_linear = nn.Linear(latent_dim, hidden_dim, bias=False) if latent_dim > 0 and bilinear: # include bilinear layer on latent and coordinates self.bilinear = nn.Bilinear(in_dim, latent_dim, hidden_dim, bias=False) layers = [activation()] for _ in range(1, num_layers): if resid: layers.append( ResidLinear(hidden_dim, hidden_dim, activation=activation)) else: layers.append(nn.Linear(hidden_dim, hidden_dim)) layers.append(activation()) layers.append(nn.Linear(hidden_dim, n_out)) self.layers = nn.Sequential(*layers)
def __init__(self, input_dims, action_dims): super(M1pM1, self).__init__() self.input_dims = input_dims self.action_dims = action_dims self.seq = nn.Sequential( nn.Linear(input_dims * 2 + action_dims, input_dims * 4), cheese_layer(), nn.Linear(input_dims * 4, input_dims * 4), cheese_layer(), nn.Linear(input_dims * 4, input_dims * 4), cheese_layer(), nn.Linear(input_dims * 4, input_dims)) self.seq2 = nn.Sequential(nn.Linear(input_dims, input_dims * 4), cheese_layer(), nn.Linear(input_dims * 4, input_dims * 4), cheese_layer(), nn.Linear(input_dims * 4, input_dims * 4), cheese_layer(), nn.Linear(input_dims * 4, input_dims)) self.bi2 = nn.Bilinear(input_dims, input_dims, input_dims)
def __init__(self, vocab_size, emb_size, hidden_size, dropout_rate, tied_embedding=None, enc_attention=False): super(DecoderRNN, self).__init__() self.GRU = nn.GRU(emb_size, hidden_size, dropout=dropout_rate) output_size = hidden_size self.enc_attention = enc_attention if enc_attention: self.bilinear = nn.Bilinear(hidden_size, hidden_size, 1) output_size += hidden_size self.pre_out = nn.Linear(output_size, emb_size) self.out_layer = nn.Linear(emb_size, vocab_size) # Tied embedding means the word embedding layer weights are shared in encoder and decoder if tied_embedding is not None: self.out_layer.weight = tied_embedding.weight