def __init__(self, input_size=2, output_size=1, st_size=2, hidden_size=2, bounded=-1, lr=.001): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.st_size = st_size self.lr = lr self.conv1 = nn.Conv2d(input_size, hidden_size, kernel_size=5, padding=2, groups=2) self.rnn_layer1 = GRU(hidden_size, st_size) self.conv2 = nn.Conv2d(st_size, hidden_size, kernel_size=3, padding=1) self.rnn_layer2 = GRU(hidden_size, st_size) self.conv3 = nn.Conv2d(hidden_size, output_size, kernel_size=3, padding=1) self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) self.bounded = bounded
def __init__(self): #Build dataloaders, vocabulary, and numericalize texts self.databunch = TextClasDataBunch.from_csv(args.data, bs = 10, csv_name='data.csv', pad_first=True, pad_idx = 1) ''' Build word_to_idx and idx_to_word dictionaries for the dataset's vocabulary ''' def build_word_to_idx(idx_to_word): word_to_idx = {} for i in range(len(idx_to_word)): word_to_idx[idx_to_word[i]] = i return word_to_idx idx_to_word = self.databunch.vocab.itos word_to_idx = build_word_to_idx(idx_to_word) models = {} models['LSTM'] = LSTM(vocab_size = len(idx_to_word), embedding_dim = 300, hidden_size = 300, word_to_idx = word_to_idx, glove_path = args.embedding) models['GloVe'] = Word_Vector_Model(vocab_size = len(idx_to_word), embedding_dim = 300, word_to_idx = word_to_idx, glove_path = args.embedding) models['GRU'] = GRU(vocab_size=len(idx_to_word), embedding_dim = 300, hidden_size = 300, word_to_idx = word_to_idx, glove_path = args.embedding) self.model = models[args.model] #self.model = nn.DataParallel(self.model) self.device = torch.device("cuda:0") self.model.to(self.device) self.train_dataloader = self.databunch.train_dl self.valid_dataloader = self.databunch.valid_dl self.epochs = 20 self.learning_rate = 0.0001 self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) self.loss_function = nn.CrossEntropyLoss()
def init_model(self): ''' pooling, rnn, lstm, bilstm, cnn, multi_cnn, gru :return: ''' if self.opts.model == 'pooling': self.model = Pooling(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'cnn': self.model = CNN(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'multi_channel_cnn': self.model = Multi_Channel_CNN(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'multi_layer_cnn': self.model = Multi_Layer_CNN(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'char_cnn': self.char = True self.model = Char_CNN(opts=self.opts, vocab=self.vocab, char_vocab=self.char_vocab, label_vocab=self.label_vocab) elif self.opts.model == 'lstm': self.model = LSTM(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'gru': self.model = GRU(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'lstm_cnn': self.model = LSTM_CNN(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'treelstm': self.tree = True self.model = BatchChildSumTreeLSTM(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'cnn_treelstm': self.tree = True self.model = CNN_TreeLSTM(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) elif self.opts.model == 'lstm_treelstm': self.tree = True self.model = LSTM_TreeLSTM(opts=self.opts, vocab=self.vocab, label_vocab=self.label_vocab) else: raise RuntimeError('please choose your model first!') if self.opts.use_cuda: self.model = self.model.cuda()
def initialize_model_and_trainer(model_properties, training_properties, datasetloader, device): logger.info("Model type is %s", training_properties["learner"]) if training_properties["learner"] == "text_cnn": model = TextCnn(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "gru": model = GRU(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "lstm": model = LSTM(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "char_cnn": model = CharCNN(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "vdcnn": model = VDCNN(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "conv_deconv_cnn": model = ConvDeconvCNN(model_properties) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "transformer_google": model = TransformerGoogle(model_properties).model.to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device) elif training_properties["learner"] == "lstmcrf": assert training_properties["task"] == "ner" model = LSTMCRF(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_ner_trainer", training_properties, datasetloader, device) else: raise ValueError( "Model is not defined! Available learner values are : 'text_cnn', 'char_cnn', 'vdcnn', 'gru', " "'lstm', 'conv_deconv_cnn' and 'transformer_google'") return model, trainer
def initialize_model_and_trainer(model_properties, training_properties, datasetloader, device): logger.info("Model type is %s", training_properties["learner"]) if training_properties["learner"] == "text_cnn": model = TextCnn(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) elif training_properties["learner"] == "gru": model = GRU(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) elif training_properties["learner"] == "lstm": model = LSTM(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) elif training_properties["learner"] == "char_cnn": model = CharCNN(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) elif training_properties["learner"] == "vdcnn": model = VDCNN(model_properties).to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) elif training_properties["learner"] == "conv_deconv_cnn": convDeconveCNN = ConvDeconvCNN(model_properties) encoderCNN = convDeconveCNN.encoder.to(device) decoderCNN = convDeconveCNN.decoder.to(device) classifier = convDeconveCNN.classifier.to(device) trainer = Trainer.trainer_factory("multiple_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) model = [encoderCNN, decoderCNN, classifier] elif training_properties["learner"] == "transformer_google": model = TransformerGoogle(model_properties).model.to(device) trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter, datasetloader.val_iter, datasetloader.test_iter, device) elif training_properties["learner"] == "crf": model = ConditionalRandomField().to(device) else: raise ValueError("Model is not defined! Available learner values are : 'text_cnn', 'char_cnn', 'vdcnn', 'gru', " "'lstm', 'conv_deconv_cnn' and 'transformer_google'") return model, trainer
class RIM(nn.Module): def __init__(self, input_size=2, output_size=1, st_size=2, hidden_size=2, bounded=-1, lr=.001): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.st_size = st_size self.lr = lr self.conv1 = nn.Conv2d(input_size, hidden_size, kernel_size=5, padding=2, groups=2) self.rnn_layer1 = GRU(hidden_size, st_size) self.conv2 = nn.Conv2d(st_size, hidden_size, kernel_size=3, padding=1) self.rnn_layer2 = GRU(hidden_size, st_size) self.conv3 = nn.Conv2d(hidden_size, output_size, kernel_size=3, padding=1) self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) self.bounded = bounded def forward(self, xt, st1, st2, j): if j == 0: st1 = torch.zeros(xt.size()) out = f.relu(self.conv1.forward(xt)) #out = nn.BatchNorm2d(out.shape[1])(out) st_out1 = self.rnn_layer1.forward(out, st1) out = f.relu(self.conv2.forward(st_out1)) #out = nn.BatchNorm2d(out.shape[1])(out) if j == 0: st2 = torch.zeros(out.size()) st_out2 = self.rnn_layer2.forward(out, st2) if self.bounded > 0: out = torch.clamp(self.conv3.forward(st_out2), -self.bounded, self.bounded) else: out = self.conv3.forward(st_out2) return out, st_out1, st_out2 def backprop(self, loss): loss.backward() self.optimizer.step() self.optimizer.zero_grad() def loss(self, theta, list_psi_t): loss_t = self.loss_func(theta, list_psi_t) return self.weight_func(loss_t) def init_hidden(self, batch_dim=1): return torch.zeros((batch_dim, self.st_size))