def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'): super().__init__(vocabs, vectorizers, model, preproc) # The model always returns indices (no need for `return_labels`) self.label_vocab = revlut(self.get_labels()) self.rev_vocab = {k: revlut(v) for k, v in self.vocabs.items()}
def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'): super().__init__(vocabs, vectorizers, model, preproc) if hasattr(self.model, 'return_labels'): self.return_labels = self.model.return_labels else: self.return_labels = False # keeping the default tagger behavior if not self.return_labels: self.label_vocab = revlut(self.get_labels()) self.rev_vocab = {k: revlut(v) for k, v in self.vocabs.items()}
def __init__(self, model, span_type, verbose): self.model = model self.idx2label = revlut(model.labels) self.span_type = span_type if verbose: logger.info('Setting span type %s', self.span_type) self.verbose = verbose
def __init__(self, model, span_type, verbose): self.model = model self.idx2label = revlut(model.labels) self.span_type = span_type if verbose: print('Setting span type {}'.format(self.span_type)) self.verbose = verbose
def train(self, checkpoint=None): num_ex = self.config_params['num_valid_to_show'] rlut1 = revlut(self.feat2src[self.primary_key]) rlut2 = revlut(self.feat2tgt) if num_ex > 0: logger.info('Showing examples') preproc = self.config_params.get('preproc', {}) show_ex_fn = preproc['show_ex'] self.config_params['train']['after_train_fn'] = lambda model: show_ex_fn(model, self.valid_data, rlut1, rlut2, self.feat2tgt, preproc['mxlen'], False, 0, num_ex, reverse=False) self.config_params['train']['tgt_rlut'] = rlut2 return super(EncoderDecoderTask, self).train(checkpoint)
def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'): super(TaggerService, self).__init__(vocabs, vectorizers, model, preproc) if hasattr(self.model, 'return_labels'): self.return_labels = self.model.return_labels else: self.return_labels = False # keeping the default tagger behavior if not self.return_labels: self.label_vocab = revlut(self.get_labels())
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer, self.scheduler = pytorch_prepare_optimizer( self.model, **kwargs) if self.gpu: self.model = model.to_gpu()
def __init__(self, model, **kwargs): super(TaggerTrainerDyNet, self).__init__() self.span_type = kwargs.get('span_type', 'iob') self.gpu = not bool(kwargs.get('nogpu', False)) self.model = model self.idx2label = revlut(self.model.labels) self.autobatchsz = kwargs.get('autobatchsz') self.labels = model.labels self.optimizer = optimizer(model, **kwargs)
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') print('Setting span type {}'.format(self.span_type)) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer, self.scheduler = pytorch_prepare_optimizer(self.model, **kwargs) if self.gpu: self.model = model.to_gpu()
def __init__(self, model, **kwargs): super(TaggerTrainerDyNet, self).__init__() self.span_type = kwargs.get('span_type', 'iob') self.gpu = not bool(kwargs.get('nogpu', False)) self.model = model self.idx2label = revlut(self.model.labels) self.autobatchsz = kwargs.get('autobatchsz') self.labels = model.labels self.optimizer = OptimizerManager(model, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model, **kwargs): super(TaggerTrainerDyNet, self).__init__() self.span_type = kwargs.get('span_type', 'iob') logger.info('Setting span type %s', self.span_type) self.gpu = not bool(kwargs.get('nogpu', False)) self.model = model self.idx2label = revlut(self.model.labels) self.autobatchsz = kwargs.get('autobatchsz') self.labels = model.labels self.optimizer = OptimizerManager(model, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) self.verbose = kwargs.get('verbose', False)
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpu: self.model = model.to_gpu() self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) if self.verbose: logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpu: self.model = model.to_gpu() self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def predict_text(self, tokens, mxlen, maxw, zero_alloc=np.zeros, word_trans_fn=lowercase): """ Utility function to convert lists of sentence tokens to integer value one-hots which are then passed to the tagger. The resultant output is then converted back to label and token to be printed. This method is not aware of any input features other than words and characters (and lengths). If you wish to use other features and have a custom model that is aware of those, use `predict` directly. :param tokens: :param mxlen: :param maxw: :param zero_alloc: Define :param word_trans_fn: :return: """ words_vocab = self.get_vocab(vocab_type='word') chars_vocab = self.get_vocab(vocab_type='char') # This might be inefficient if the label space is large label_vocab = revlut(self.get_labels()) xs = zero_alloc((1, mxlen), dtype=int) xs_ch = zero_alloc((1, mxlen, maxw), dtype=int) lengths = zero_alloc(1, dtype=int) lengths[0] = min(len(tokens), mxlen) for j in range(mxlen): if j == len(tokens): break w = tokens[j] nch = min(len(w), maxw) xs[0, j] = words_vocab.get(word_trans_fn(w), 0) for k in range(nch): xs_ch[0, j, k] = chars_vocab.get(w[k], 0) indices = self.predict({'x': xs, 'xch': xs_ch, 'lengths': lengths})[0] output = [] for j in range(lengths[0]): output.append((tokens[j], label_vocab[indices[j]])) return output
def __init__(self, vocabs=None, vectorizers=None, model=None, preproc='client'): super(EncoderDecoderService, self).__init__(None, None, model, preproc) self.src_vocabs = {} self.tgt_vocab = None for k, vocab in vocabs.items(): if k == 'tgt': self.tgt_vocab = vocab else: self.src_vocabs[k] = vocab self.tgt_idx_to_token = revlut(self.tgt_vocab) self.src_vectorizers = {} self.tgt_vectorizer = None for k, vectorizer, in vectorizers.items(): if k == 'tgt': self.tgt_vectorizer = vectorizer else: self.src_vectorizers[k] = vectorizer
def predict_text(self, tokens, mxlen, maxw, zero_alloc=np.zeros, word_trans_fn=lowercase): """ Utility function to convert lists of sentence tokens to integer value one-hots which are then passed to the tagger. The resultant output is then converted back to label and token to be printed :param tokens: :param mxlen: :param maxw: :param zero_alloc: Define :param word_trans_fn: :return: """ words_vocab = self.get_vocab(vocab_type='word') chars_vocab = self.get_vocab(vocab_type='char') # This might be inefficient if the label space is large label_vocab = revlut(self.get_labels()) xs = zero_alloc((1, mxlen), dtype=int) xs_ch = zero_alloc((1, mxlen, maxw), dtype=int) lengths = zero_alloc(1, dtype=int) lengths[0] = min(len(tokens), mxlen) for j in range(mxlen): if j == len(tokens): break w = tokens[j] nch = min(len(w), maxw) xs[0, j] = words_vocab.get(word_trans_fn(w), 0) for k in range(nch): xs_ch[0, j, k] = chars_vocab.get(w[k], 0) indices = self.predict(xs, xs_ch, lengths)[0] output = [] for j in range(lengths[0]): output.append((tokens[j], label_vocab[indices[j]])) return output
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpu = not bool(kwargs.get('nogpu', False)) optim = kwargs.get('optim', 'adam') eta = float(kwargs.get('eta', 0.01)) mom = float(kwargs.get('mom', 0.9)) self.clip = float(kwargs.get('clip', 5)) self.model = model self.idx2label = revlut(self.model.labels) if optim == 'adadelta': self.optimizer = torch.optim.Adadelta(model.parameters(), lr=eta) elif optim == 'adam': self.optimizer = torch.optim.Adam(model.parameters(), lr=eta) elif optim == 'rmsprop': self.optimizer = torch.optim.RMSprop(model.parameters(), lr=eta) else: self.optimizer = torch.optim.SGD(model.parameters(), lr=eta, momentum=mom) self.crit = model.get_criterion() if self.gpu: self.model = model.cuda() self.crit.cuda()
def __init__(self, model, **kwargs): super(TaggerTrainerPyTorch, self).__init__() self.gpus = int(kwargs.get('gpus', 1)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpus > 1: logger.info( "Trainer for PyTorch tagger currently doesnt support multiple GPUs. Setting to 1" ) self.gpus = 1 if self.gpus > 0: self.model = model.to_gpu() else: logger.warning("Requested training on CPU. This will be slow.") self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def predict_text(self, tokens, **kwargs): """ Utility function to convert lists of sentence tokens to integer value one-hots which are then passed to the tagger. The resultant output is then converted back to label and token to be printed. This method is not aware of any input features other than words and characters (and lengths). If you wish to use other features and have a custom model that is aware of those, use `predict` directly. :param tokens: (``list``) A list of tokens """ featurizer = kwargs.get('featurizer') if featurizer is None: mxlen = kwargs.get( 'mxlen', self.mxlen if hasattr(self, 'mxlen') else len(tokens)) maxw = kwargs.get( 'maxw', self.maxw if hasattr(self, 'maxw') else max( [len(token) for token in tokens])) zero_alloc = kwargs.get('zero_alloc', np.zeros) featurizer = WordCharLength(self, mxlen, maxw, zero_alloc) # This might be inefficient if the label space is large label_vocab = revlut(self.get_labels()) #lengths = zero_alloc(1, dtype=int) #lengths[0] = min(len(tokens), mxlen) data = featurizer.run(tokens) lengths = data['lengths'] indices = self.predict(data)[0] output = [] for j in range(lengths[0]): output.append((tokens[j], label_vocab[indices[j].item()])) return output
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.idx_to_token = revlut(self.vocabs[self.model.tgt_key])
def __init__(self, *args, **kwargs): super(LanguageModelService, self).__init__(*args, **kwargs) self.idx_to_token = revlut(self.vocabs[self.model.tgt_key])
def __init__(self, model): self.model = model self.idx2label = revlut(model.labels)
def __init__(self, vocabs=None, vectorizers=None, model=None): super(TaggerService, self).__init__(vocabs, vectorizers, model) self.label_vocab = revlut(self.get_labels())