def __init__( self, encoder_params, linear_params, batch_size=32, seed=1, ): """Neural network-based classifier Arguments: encoder_params {Dict[str, Any]} -- encoder parameters linear_params {Dict[str, Any]} -- dense layer parameters Keyword arguments: batch_size {int} -- batch size (default: {32}) seed {int} -- random seed (default: {1}) """ super(Model, self).__init__() random.seed(seed) torch.manual_seed(seed) self.util = Util() self.batch_size = batch_size self.use_cuda = self.util.use_cuda self.encoders = self._init_encoders(encoder_params) self.linears = self._init_linears(linear_params) self.optimizer = optim.SGD(self.parameters(), lr=0.01) self.criterion = nn.NLLLoss()
def __init__(self, xdim, edim, hdim, lnum, use_bidirectional=True, use_lstm=True, dropout=0.2, **kwargs): """RNN encoder Arguments: xdim {int} -- input feature dimension edim {int} -- embedding dimension hdim {int} -- hidden vector dimension lnum {int} -- number of stacked RNN layers Keyword Arguments: use_bidirectional {bool} -- if True, it uses bidirectional RNN (default: {True}) # NOQA use_lstm {bool} -- if True, it uses LSTM (default: {True}) dropout {float} -- dropout ratio (default: {0.2}) """ super(RecurrentEncoder, self).__init__() self.util = Util() self.pad_index = self.util.PAD_INDEX self.xdim = xdim self.edim = edim self.hdim = hdim self.lnum = lnum self.use_bidirectional = use_bidirectional self.use_lstm = use_lstm self.dropout = dropout self.use_cuda = self.util.use_cuda self.embedding = self._init_embedding() self.rnn = self._init_rnn()
def __init__(self, examples, x_to_index=None, isregression=False): self.util = Util() self.pad_index = self.util.PAD_INDEX self.unk_index = self.util.UNK_INDEX X_sets = [[example['Xs'][i] for example in examples] for i in range(len(examples[0]['Xs']))] self.x_to_index = x_to_index if x_to_index is None: self.x_to_index = [] for i in range(len(examples[0]['Xs'])): xs = [x for X in X_sets[i] for x in X] self.x_to_index.append(self._make_index(xs)) self.Xs = [] self.raw_Xs = [] # for debug for i in range(len(examples[0]['Xs'])): self.Xs.append(self._degitize(X_sets[i], self.x_to_index[i])) self.raw_Xs.append(X_sets[i]) # indices self.indices = [example['index'] for example in examples] if isregression: self.ys = [math.log10(example['y']) for example in examples] else: self.ys = [example['y'] for example in examples]
class AverageEncoder(GenericEncoder): def __init__( self, xdim, edim, **kwargs ): """Averaging word vectors encoder Arguments: xdim {int} -- input feature dimension edim {int} -- embedding dimension """ super(AverageEncoder, self).__init__() self.util = Util() self.pad_index = self.util.PAD_INDEX self.xdim = xdim self.edim = edim self.use_cuda = self.util.use_cuda self.embedding = self._init_embedding() def forward(self, X): H = [] for x in X: h = self._embed(self.util.tensorize(x)) h = h.mean(dim=0) H.append(h) return torch.stack(H, dim=0)
def __init__(self, xdim, edim, dropout=0.2, **kwargs): """Averaging word vectors encoder Arguments: xdim {int} -- input feature dimension edim {int} -- embedding dimension Keyword Arguments: dropout {float} -- dropout ratio (default: {0.2}) """ super(AverageEncoder, self).__init__() self.util = Util() self.pad_index = self.util.PAD_INDEX self.xdim = xdim self.edim = edim self.dropout = dropout self.use_cuda = self.util.use_cuda self.embedding = self._init_embedding()
def __init__( self, xdim, edim, **kwargs ): """Averaging word vectors encoder Arguments: xdim {int} -- input feature dimension edim {int} -- embedding dimension """ super(AverageEncoder, self).__init__() self.util = Util() self.pad_index = self.util.PAD_INDEX self.xdim = xdim self.edim = edim self.use_cuda = self.util.use_cuda self.embedding = self._init_embedding()
def __init__( self, encoder_params, linear_params, epoch_num=100, checkpoint_interval=10, batch_size=32, seed=1, save_best_model=True, ): """Neural Network based classifier Arguments: encoder_params {Dict[str, Any]} -- encoder parameters linear_params {Dict[str, Any]} -- dense layer parameters Keyword Arguments: epoch_num {int} -- number of epochs (default: {100}) checkpoint_interval {int} -- it creates checkpoints at {checkpoint_interval} (default: {10}) # NOQA batch_size {int} -- batch sizze (default: {32}) seed {int} -- random seed (default: {1}) """ super(Model, self).__init__() random.seed(seed) torch.manual_seed(seed) self.util = Util() self.epoch_num = epoch_num self.checkpoint_interval = checkpoint_interval self.batch_size = batch_size self.use_cuda = self.util.use_cuda self.encoders = self._init_encoders(encoder_params) self.linears = self._init_linears(linear_params) self.optimizer = optim.SGD(self.parameters(), lr=0.01) self.criterion = nn.NLLLoss() self._best_dev_accuracy = None self._best_epoch = None self._log = None self._save_best_model = save_best_model
def __init__( self, xdim, edim, hdim, lnum, use_bidirectional=True, use_lstm=True, dropout=0.2, **kwargs ): """RNN encoder Arguments: xdim {int} -- Size of a vocabulay edim {int} -- Dimension of an embedding layer hdim {int} -- Dimension of a hidden layer lnum {int} -- Number of stacked RNN layers Keyword Arguments: use_bidirectional {bool} -- Use bidirectional RNN (default: {True}) use_lstm {bool} -- If True, use LSTM, else GRU (default: {True}) dropout {float} -- dropout ratio (default: {0.2}) """ super(RecurrentEncoder, self).__init__() self.util = Util() self.pad_index = self.util.PAD_INDEX self.xdim = xdim self.edim = edim self.hdim = hdim self.lnum = lnum self.use_bidirectional = use_bidirectional self.use_lstm = use_lstm self.dropout = dropout self.use_cuda = self.util.use_cuda self.embedding = self._init_embedding() self.rnn = self._init_rnn()
class RecurrentEncoder(GenericEncoder): def __init__( self, xdim, edim, hdim, lnum, use_bidirectional=True, use_lstm=True, dropout=0.2, **kwargs ): """RNN encoder Arguments: xdim {int} -- Size of a vocabulay edim {int} -- Dimension of an embedding layer hdim {int} -- Dimension of a hidden layer lnum {int} -- Number of stacked RNN layers Keyword Arguments: use_bidirectional {bool} -- Use bidirectional RNN (default: {True}) use_lstm {bool} -- If True, use LSTM, else GRU (default: {True}) dropout {float} -- dropout ratio (default: {0.2}) """ super(RecurrentEncoder, self).__init__() self.util = Util() self.pad_index = self.util.PAD_INDEX self.xdim = xdim self.edim = edim self.hdim = hdim self.lnum = lnum self.use_bidirectional = use_bidirectional self.use_lstm = use_lstm self.dropout = dropout self.use_cuda = self.util.use_cuda self.embedding = self._init_embedding() self.rnn = self._init_rnn() def _init_rnn(self): return self._init_lstm() if self.use_lstm else self._init_gru() def _init_lstm(self): lstm = nn.LSTM(self.edim, self.hdim, num_layers=self.lnum, batch_first=True, bidirectional=self.use_bidirectional, dropout=self.dropout) return lstm.cuda() if self.use_cuda else lstm def _init_gru(self): gru = nn.GRU(self.edim, self.hdim, num_layers=self.lnum, batch_first=True, bidirectional=self.use_bidirectional, dropout=self.dropout) return gru.cuda() if self.use_cuda else gru def forward(self, X): X, indices_before_sort, lengths_after_sort = self._sort(X) X = self._pad(X, lengths_after_sort) X = self.util.tensorize(X) X = self._embed(X) X = self._pack(X, lengths_after_sort) H = self._rnn(X) H = self._cat(H) if self.use_bidirectional else self._view(H) H = self._unsort(H, indices_before_sort) return H def _sort(self, X): indices, X = zip(*sorted(enumerate(X), key=lambda x: -len(x[1]))) return list(X), self.util.tensorize(list(indices)), [len(x) for x in X] def _pad(self, X, lengths): for i, x in enumerate(X): X[i] = x + [self.pad_index] * (max(lengths) - len(X[i])) return X def _pack(self, X, lengths): return U.rnn.pack_padded_sequence(X, lengths, batch_first=True) def _rnn(self, X): _, H = self.rnn(X) return H[0] if self.use_lstm else H def _cat(self, H): forward_H = H[-2, :, :] backward_H = H[-1, :, :] return torch.cat((forward_H, backward_H), 1) def _view(self, H): return H.view(-1, self.hdim) def _unsort(self, H, indices): _, unsorted_indices = torch.tensor(indices).sort() return H.index_select(0, unsorted_indices)
class Model(nn.Module): def __init__( self, encoder_params, linear_params, epoch_num=100, checkpoint_interval=10, batch_size=32, seed=1, save_best_model=True, ): """Neural Network based classifier Arguments: encoder_params {Dict[str, Any]} -- encoder parameters linear_params {Dict[str, Any]} -- dense layer parameters Keyword Arguments: epoch_num {int} -- number of epochs (default: {100}) checkpoint_interval {int} -- it creates checkpoints at {checkpoint_interval} (default: {10}) # NOQA batch_size {int} -- batch sizze (default: {32}) seed {int} -- random seed (default: {1}) """ super(Model, self).__init__() random.seed(seed) torch.manual_seed(seed) self.util = Util() self.epoch_num = epoch_num self.checkpoint_interval = checkpoint_interval self.batch_size = batch_size self.use_cuda = self.util.use_cuda self.encoders = self._init_encoders(encoder_params) self.linears = self._init_linears(linear_params) self.optimizer = optim.SGD(self.parameters(), lr=0.01) self.criterion = nn.NLLLoss() self._best_dev_accuracy = None self._best_epoch = None self._log = None self._save_best_model = save_best_model def _init_encoders(self, encoder_params): encoders = [] for params in encoder_params: if params.get('encoder') == 'average': encoders.append(AverageEncoder(**params)) else: encoders.append(RecurrentEncoder(**params)) return nn.ModuleList(encoders) def _init_linears(self, linear_params): linears = [] for params in linear_params: linear = nn.Linear(params['indim'], params['outdim']) linear = linear.cuda() if self.use_cuda else linear linears.append(linear) return nn.ModuleList(linears) def run_training( self, output_dir, training_set, development_set=None, ): """Run training procedure Arguments: output_dir_path {str} -- path to output dir TODO training_set {} -- dataset for training Keyword Arguments: TODO development_set {} -- dataset for validating (default: {None}) # NOQA """ self._output_dir_path = pathlib.Path(output_dir) self._best_dev_accuracy = -float('inf') self._best_epoch = 0 batches = training_set.split(self.batch_size) for epoch in range(1, self.epoch_num + 1): self.train() self._train(batches, epoch) if not self._ischeckpoint(epoch): continue self._save(epoch) if development_set is not None: dev_accuracy = self.run_evaluation(development_set) log_line = 'dev accuracy: {:3.2f}'.format(dev_accuracy) log_line += ' epoch: {}'.format(epoch) logger.info(log_line) if not self.is_best(dev_accuracy): continue # When the new model outperforms the previous ones log_line = '[new best] dev accuracy: {:3.2f}'.format( dev_accuracy) # NOQA log_line += ' epoch: {}'.format(epoch) # Save epoch and performance information self._best_epoch = epoch self._best_dev_accuracy = dev_accuracy if self._save_best_model: self._save('best') logger.debug('Update best model') logging.info(log_line) log_line = '[best] dev_accuracy: {:3.2f}'.format( self._best_dev_accuracy) # NOQA log_line += ' epoch: {}'.format(self._best_epoch) logger.info(log_line) def _train(self, batches, epoch): random.shuffle(batches) loss_sum = 0 for *Xs, ys in batches: self.zero_grad() ys_hat = self(Xs) ys = self.util.tensorize(ys) loss = self.criterion(ys_hat, ys) loss.backward() self.optimizer.step() loss_sum += loss logger.info('epoch {:>3}\tloss {:6.2f}'.format(epoch, loss_sum)) def _ischeckpoint(self, epoch): return epoch % self.checkpoint_interval == 0 def _save(self, epoch): model_path = self._output_dir_path / '{}.model'.format(epoch) torch.save(self.state_dict(), model_path.as_posix()) def test(self, test_set): if len(test_set.Xs[0]) < self.batch_size: self.batch_size = len(test_set.Xs[0]) batches = test_set.split(self.batch_size) results = self._test(batches) return results def _test(self, batches): results = [] for *Xs, _ in batches: ys_hat = self(Xs) results.extend(ys_hat) return results def forward(self, Xs): Hs = [] for i in range(len(self.encoders)): H = self.encoders[i](Xs[i]) Hs.append(H) H = torch.cat(Hs, 1) for i in range(len(self.linears)): H = self.linears[i](H) return F.log_softmax(H, dim=1) def run_evaluation(self, test_set): self.eval() ys_hat = [y_hat.argmax().item() for y_hat in self.test(test_set)] X_num = len(test_set.Xs) ok = .0 for i in range(len(ys_hat)): for j in range(X_num): logger.debug("X{}: ".format(j) + str(test_set.Xs[j][i])) logger.debug("raw_X{}:".format(j) + str(test_set.raw_Xs[j][i])) logger.debug("y: " + str(test_set.ys[i])) logger.debug("y_hat: " + str(ys_hat[i])) if ys_hat[i] == test_set.ys[i]: ok += 1 accuracy = ok / len(ys_hat) return accuracy def is_best(self, dev_accuracy): if self._best_dev_accuracy is None: return False return dev_accuracy > self._best_dev_accuracy
class Model(nn.Module): def __init__( self, encoder_params, linear_params, batch_size=32, seed=1, ): """Neural network-based classifier Arguments: encoder_params {Dict[str, Any]} -- encoder parameters linear_params {Dict[str, Any]} -- dense layer parameters Keyword arguments: batch_size {int} -- batch size (default: {32}) seed {int} -- random seed (default: {1}) """ super(Model, self).__init__() random.seed(seed) torch.manual_seed(seed) self.util = Util() self.batch_size = batch_size self.use_cuda = self.util.use_cuda self.encoders = self._init_encoders(encoder_params) self.linears = self._init_linears(linear_params) self.optimizer = optim.SGD(self.parameters(), lr=0.01) self.criterion = nn.NLLLoss() def _init_encoders(self, encoder_params): encoders = [] for params in encoder_params: if params.get('encoder') == 'average': encoders.append(AverageEncoder(**params)) else: encoders.append(RecurrentEncoder(**params)) return nn.ModuleList(encoders) def _init_linears(self, linear_params): linears = [] for params in linear_params: linear = nn.Linear(params['indim'], params['outdim']) linear = linear.cuda() if self.use_cuda else linear linears.append(linear) return nn.ModuleList(linears) def run_training( self, output_dir, training_set, development_set, epoch_num=100, checkpoint_interval=10, ): """Run training procedure Arguments: output_dir_path {str} -- path to output dir training_set {Dataset} -- dataset for training development_set {Dataset} -- dataset for development Keyword Arguments: epoch_num {int} -- number of epochs (default: {100}) checkpoint_interval {int} -- it creates checkpoints at {checkpoint_interval} (default: {10}) # NOQA """ self._output_dir_path = pathlib.Path(output_dir) best_accuracy, best_epoch = -float('inf'), 0 batches = training_set.split(self.batch_size) for epoch in range(1, epoch_num + 1): self.epoch = epoch self.train() self._train(batches) if not self._ischeckpoint(checkpoint_interval): continue best_accuracy, best_epoch = \ self._develop(development_set, best_accuracy, best_epoch) self._write_log(best_accuracy, best_epoch, '[best]') def _train(self, batches): random.shuffle(batches) loss_sum = 0 for *Xs, ys in batches: self.zero_grad() ys_hat = self(Xs) ys = self.util.tensorize(ys) loss = self.criterion(ys_hat, ys) loss.backward() self.optimizer.step() loss_sum += loss logger.info('epoch {:>3}\tloss {:6.2f}'.format(self.epoch, loss_sum)) def _ischeckpoint(self, checkpoint_interval): return self.epoch % checkpoint_interval == 0 def _develop(self, development_set, best_accuracy, best_epoch): accuracy = self.run_evaluation(development_set) self._write_log(accuracy, self.epoch) if accuracy > best_accuracy: best_accuracy, best_epoch = accuracy, self.epoch self._write_log(accuracy, self.epoch, '[new best]') self._save('best.model') return best_accuracy, best_epoch def _save(self, model_file_name: str): model_path = self._output_dir_path / model_file_name torch.save(self.state_dict(), model_path.as_posix()) def _write_log(self, accuracy, epoch, note=''): log_line = self._set_log_line(accuracy, epoch, note) logger.info(log_line) def _set_log_line(self, accuracy, epoch, note): return '{} dev_accuracy: {:3.2f} epoch: {}'.format( note, accuracy, epoch) # NOQA def test(self, test_set): if len(test_set.Xs[0]) < self.batch_size: self.batch_size = len(test_set.Xs[0]) batches = test_set.split(self.batch_size) results = self._test(batches) return results def _test(self, batches): results = [] for *Xs, _ in batches: ys_hat = self(Xs).detach().numpy() results.extend(ys_hat) return results def forward(self, Xs): Hs = [] for i in range(len(self.encoders)): H = self.encoders[i](Xs[i]) Hs.append(H) H = torch.cat(Hs, 1) for i in range(len(self.linears)): H = self.linears[i](H) return F.log_softmax(H, dim=1) def run_evaluation(self, test_set): self.eval() ys_hat = [y_hat.argmax().item() for y_hat in self.test(test_set)] X_num = len(test_set.Xs) ok = .0 for i in range(len(ys_hat)): for j in range(X_num): logger.debug("X{}: ".format(j) + str(test_set.Xs[j][i])) logger.debug("raw_X{}:".format(j) + str(test_set.raw_Xs[j][i])) logger.debug("y: " + str(test_set.ys[i])) logger.debug("y_hat: " + str(ys_hat[i])) if ys_hat[i] == test_set.ys[i]: ok += 1 accuracy = ok / len(ys_hat) return accuracy