def __init__(self, vocab_src, vocab_tgt, embed_dim, hidden_size, num_layers, dropout, input_feed): super(ResponseGenerator, self).__init__() self.vocab_src = vocab_src self.vocab_tgt = vocab_tgt self.encoder_src = LSTMencoder(vocab_src, embed_dim=embed_dim, hidden_size=hidden_size // 2, num_layers=num_layers, dropout_in=dropout, dropout_out=dropout, bidirectional=True, pretrained_embed=None) self.embed_tgt = Embedding(vocab_tgt.size, embed_dim, vocab_tgt.padding_idx) self.encoder_ske = LSTMencoder(vocab_tgt, embed_dim=embed_dim, hidden_size=hidden_size // 2, num_layers=num_layers, dropout_in=dropout, dropout_out=dropout, bidirectional=True, pretrained_embed=self.embed_tgt) self.decoder = LSTMdecoder(vocab_tgt, embed_dim=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout_in=dropout, dropout_out=dropout, encoder_hidden_size=hidden_size, pretrained_embed=self.embed_tgt, input_feed=input_feed) self.copy = Linear(hidden_size, 1) self.generate = Linear(hidden_size, vocab_tgt.size)
def __init__(self, n_head, d_model, d_k, d_v, dropout_dict, attention_mechanism="vanilla_attention"): super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.attention_mechanism = attention_mechanism if attention_mechanism == "self-attention": # self.w_qkv = nn.Sequential(Linear(d_model, 3*d_model), nn.ReLU()) self.w_qkv = Linear(d_model, 3 * d_model) if attention_mechanism == "vanilla-attention": # self.w_q = nn.Sequential(Linear(d_model, d_model), nn.ReLU()) # self.w_kv = nn.Sequential(Linear(d_model, 2*d_model), nn.ReLU()) self.w_q = Linear(d_model, d_model) self.w_kv = Linear(d_model, 2 * d_model) self.attention = ScaledDotProductAttention( d_model, n_head, dropout_dict["attention_dropout"]) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head * d_v, d_model) self.residual_dropout = nn.Dropout(dropout_dict['residual_dropout'])
def __init__(self, d_hid, d_inner_hid, dropout_dict): super(PositionwiseFeedForward, self).__init__() # nn.Linear is faster than nn.Conv1d self.conv1 = nn.Sequential(Linear(d_hid, d_inner_hid), nn.ReLU()) self.conv2 = Linear(d_inner_hid, d_hid) self.layer_norm = LayerNormalization(d_hid) self.relu_dropout = nn.Dropout(dropout_dict['relu_dropout']) self.residual_dropout = nn.Dropout(dropout_dict['residual_dropout'])
def __init__(self, vocabs, word_embed_file, word_embed_dim, char_embed_dim, char_filters, char_feat_dim, lstm_hidden_size, lstm_dropout=.5, feat_dropout=.5, parameters=None): super(LstmCnnFeatGate, self).__init__() assert word_embed_dim == char_feat_dim self.vocabs = vocabs self.label_size = len(self.vocabs['label']) # input features if parameters is not None: self.word_embed = nn.Embedding(parameters['word_embed_num'], parameters['word_embed_dim']) else: self.word_embed = load_embedding_from_file(word_embed_file, word_embed_dim, vocabs['token'], vocabs['embed'], vocabs['form'], padding_idx=C.PAD_INDEX, trainable=True) self.char_embed = CharCNNFF(len(vocabs['char']), char_embed_dim, char_filters, output_size=char_feat_dim) # word dim = char_dim = feat_dim in this model self.word_dim = self.word_embed.embedding_dim self.char_dim = self.char_embed.output_size self.feat_dim = self.word_dim # layers self.char_gate = Linear(self.char_dim, self.char_dim, bias=False) self.word_gate = Linear(self.word_dim, self.word_dim, bias=False) self.gate = Linear(self.feat_dim, self.feat_dim, bias=False) self.lstm = LSTM(input_size=self.feat_dim, hidden_size=lstm_hidden_size, batch_first=True, bidirectional=True) self.output_linear = Linear(self.lstm.output_size, self.label_size) self.crf = CRF(vocabs['label']) self.feat_dropout = nn.Dropout(p=feat_dropout) self.lstm_dropout = nn.Dropout(p=lstm_dropout)
def parseModel(dictionary, crossval=False): layers = [] for element in dictionary["model"]: if "Linear" in element: in_size, out_size = element[7:-1].split(",") in_size = int(in_size) out_size = int(out_size) if crossval == False: layers.append( Linear(in_size, out_size, dictionary["xavierGain"])) else: layers.append(Linear(in_size, out_size)) elif "LeakyReLU" in element: layers.append(LeakyReLU()) elif "ReLU" in element: layers.append(ReLU()) elif "Tanh" in element: layers.append(Tanh()) elif "Sigmoid" in element: layers.append(Tanh()) else: print(f'{elem} is an invalid argument') return Sequential(layers)
def __init__(self, block, num_blocks, num_classes=10): super(ResNet, self).__init__() self.in_planes = 16 # self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) self.conv1 = Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False, bitwidth = 8) self.bn1 = nn.BatchNorm2d(16) self.alpha1 = nn.Parameter(torch.tensor(10.)) self.ActFn = ActFn.apply self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2) # self.linear = nn.Linear(64, num_classes) self.linear = Linear(64, num_classes, bitwidth = 8) self.apply(_weights_init)
def __init__(self, vocab, embed_dim=512, hidden_size=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, encoder_hidden_size=512, pretrained_embed=None, input_feed=False): super(LSTMdecoder, self).__init__() self.embed_dim = embed_dim self.hidden_size = hidden_size self.num_layers = num_layers self.dropout_in = dropout_in self.dropout_out = dropout_out self.input_feed = input_feed if pretrained_embed is not None: self.embed_tokens = pretrained_embed else: self.embed_tokens = Embedding(vocab.size, embed_dim, vocab.padding_idx) self.lstm = LSTM( input_size=embed_dim + (hidden_size if self.input_feed else 0), hidden_size=hidden_size, num_layers=num_layers, dropout=self.dropout_out if num_layers > 1 else 0., ) self.attention_src = GeneralAttention(hidden_size, encoder_hidden_size, encoder_hidden_size) self.proj_with_src = Linear(encoder_hidden_size + hidden_size, hidden_size) self.attention_ske = GeneralAttention(hidden_size, hidden_size, hidden_size) self.gate_with_ske = Linear(2 * hidden_size, hidden_size)
def step(self): #update of velocity for parameters that require a gradient (therefore only the Linear levels) for i in range(len(self.layers)): if self.layers[i].param() is not None: if type(self.layers[i]) == type(Linear(1, 1)): if self.momentum != 0: # velocity update is the same as specified on pytorch: #https://pytorch.org/docs/stable/optim.html #Velocity_(t+1) = momentum * Velocity_(t) + gradient of layer #Parameters_(t+1) = Parameters_(t) - lr * Velocity_(t+1)-->> update done within class of layer (calling step) self.velocity[i] = ( self.momentum * self.velocity[i][0] + self.layers[i].param()[0][1], self.momentum * self.velocity[i][1] + self.layers[i].param()[1][1]) self.layers[i].step( self.learning_rate * self.velocity[i][0], self.learning_rate * self.velocity[i][1]) else: self.layers[i].step(self.learning_rate, self.learning_rate)
from module import Sequential, Linear, SoftMax, ClassNLLCriterion, sgd_momentum, get_batches, net_image N = 500 X1 = 4 * np.random.randn(N, 2) + np.array([2, 2]) X2 = np.random.randn(N, 2) + np.array([-2, -2]) Y = np.concatenate([np.ones(N), np.zeros(N)])[:, None] Y = np.hstack([Y, 1 - Y]) X = np.vstack([X1, X2]) plt.scatter(X[:, 0], X[:, 1], c=Y[:, 0], edgecolors='none') plt.show() net = Sequential() net.add(Linear(2, 8)) net.add(SoftMax()) net.add(Linear(8, 2)) net.add(SoftMax()) criterion = ClassNLLCriterion() print(net) optimizer_config = {'learning_rate': 1e-1, 'momentum': 0.9} optimizer_state = {} # Looping params n_epoch = 20 batch_size = 128
def __init__(self, vocabs, counters, word_embed_file, word_embed_dim, char_embed_dim, char_filters, char_feat_dim, lstm_hidden_size, lstm_dropout=0.5, feat_dropout=0.5): # TODO: init function for saved model super(LstmCnnGate, self).__init__() self.vocabs = vocabs self.label_size = len(self.vocabs['label']) # input features self.word_embed = load_embedding_from_file(word_embed_file, word_embed_dim, vocabs['token'], vocabs['embed'], vocabs['form'], padding_idx=C.PAD_INDEX, trainable=True) self.char_embed = CharCNNFF(len(vocabs['char']), char_embed_dim, char_filters, output_size=char_feat_dim) self.word_dim = self.word_embed.embedding_dim self.char_dim = self.char_embed.output_size self.feat_dim = self.char_dim # layers self.lstm = LSTM(input_size=self.feat_dim, hidden_size=lstm_hidden_size, batch_first=True, bidirectional=True) self.output_linear = Linear(self.lstm.output_size, self.label_size) self.crf = CRF(vocabs['label']) self.feat_dropout = nn.Dropout(p=feat_dropout) self.lstm_dropout = nn.Dropout(p=lstm_dropout) self.lstm_size = self.lstm.output_size self.uni_lstm_size = self.lstm_size // 2 # word representation level self.word_gate = Linear(self.word_dim, self.word_dim) self.char_gate = Linear(self.word_dim, self.word_dim) # feature extraction level # context-only feature linear layers self.cof_linear_fwd = Linear(self.uni_lstm_size, self.uni_lstm_size) self.cof_linear_bwd = Linear(self.uni_lstm_size, self.uni_lstm_size) # hidden states gates self.hs_gates = nn.ModuleList([ Linear(self.uni_lstm_size, self.uni_lstm_size), Linear(self.uni_lstm_size, self.uni_lstm_size) ]) # context-only feature gates self.cof_gates = nn.ModuleList([ Linear(self.uni_lstm_size, self.uni_lstm_size), Linear(self.uni_lstm_size, self.uni_lstm_size) ])
def __init__( self, vocabs, counters, word_embed_file, word_embed_dim, char_embed_dim, char_filters, char_feat_dim, lstm_hidden_size, lstm_dropout=0.5, feat_dropout=0.5, signal_dropout=0, ctx_size=5, use_signal=True, parameters=None, ): assert char_feat_dim >= word_embed_dim super(LstmCnnDfc, self).__init__() self.vocabs = vocabs self.label_size = len(self.vocabs['label']) self.use_signal = use_signal # input features if parameters is not None: self.word_embed = nn.Embedding(parameters['word_embed_num'], parameters['word_embed_dim']) else: self.word_embed = load_embedding_from_file(word_embed_file, word_embed_dim, vocabs['token'], vocabs['embed'], vocabs['form'], padding_idx=C.PAD_INDEX, trainable=True) self.char_embed = CharCNNFF(len(vocabs['char']), char_embed_dim, char_filters, output_size=char_feat_dim) if use_signal: if parameters is not None: self.signal_embed = nn.Embedding( parameters['signal_embed_num'], parameters['signal_embed_dim']) else: self.signal_embed = build_signal_embed(counters['embed'], counters['token'], vocabs['token'], vocabs['form']) self.word_dim = self.word_embed.embedding_dim self.char_dim = self.char_embed.output_size self.feat_dim = self.char_dim self.signal_dim = self.signal_embed.embedding_dim self.ctx_size = ctx_size # layers self.lstm = LSTM(input_size=self.feat_dim, hidden_size=lstm_hidden_size, batch_first=True, bidirectional=True) self.output_linear = Linear(self.lstm.output_size, self.label_size) self.crf = CRF(vocabs['label']) self.feat_dropout = nn.Dropout(p=feat_dropout) self.lstm_dropout = nn.Dropout(p=lstm_dropout) self.signal_dropout = nn.Dropout(p=signal_dropout) self.lstm_size = self.lstm.output_size self.uni_lstm_size = self.lstm_size // 2 # word representation level self.word_gates = nn.ModuleList([ Linear(self.word_dim, self.word_dim), Linear(self.word_dim, self.word_dim) ]) self.char_gates = nn.ModuleList([ Linear(self.word_dim, self.word_dim), Linear(self.word_dim, self.word_dim) ]) if use_signal: self.signal_gates = nn.ModuleList([ Linear(self.signal_dim, self.word_dim), Linear(self.signal_dim, self.word_dim) ]) # feature extraction level # context-only feature linear layers self.cof_linear_fwd = Linear(self.uni_lstm_size, self.uni_lstm_size) self.cof_linear_bwd = Linear(self.uni_lstm_size, self.uni_lstm_size) # hidden states gates self.hs_gates = nn.ModuleList( [Linear(self.uni_lstm_size, self.uni_lstm_size) for _ in range(4)]) # context-only feature gates self.cof_gates = nn.ModuleList( [Linear(self.uni_lstm_size, self.uni_lstm_size) for _ in range(4)]) if use_signal: self.crs_gates = nn.ModuleList([ Linear(self.signal_dim * (ctx_size + 1), self.uni_lstm_size) for _ in range(4) ])
for mini_batch_size in [20]: for activation in [Tanh()]: print('***') print('Criterion: {}, mini_batch_size: {}, activation: {}.'. format(criterion.name(), mini_batch_size, activation.name())) print('***') training_time_acc = [] test_error_acc = [] for i in tqdm(range(k_fold), leave=False): torch.manual_seed(2019) model = Sequential([ Linear(2, 25, activation.name()), activation, Linear(25, 25, activation.name()), activation, Linear(25, 25, activation.name()), activation, Linear(25, 25, activation.name()), activation, Linear(25, 2, activation.name()) ]) optimizer = SGD(model.param(), lr=lr) train_input, train_target, valid_input, valid_target = CV_sets[ i] train_input, valid_input, test_input_ = standardise_input( train_input, valid_input, test_input) start = time.time()