def load(cls, path, **kwargs): r""" Loads a parser with data fields and pretrained model parameters. Args: path (str): - a string with the shortcut name of a pretrained parser defined in ``supar.PRETRAINED`` to load from cache or download, e.g., ``'crf-dep-en'``. - a path to a directory containing a pre-trained parser, e.g., `./<path>/model`. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations and initiate the model. Examples: >>> from supar import Parser >>> parser = Parser.load('biaffine-dep-en') >>> parser = Parser.load('./ptb.biaffine.dependency.char') """ args = Config(**locals()) args.device = 'cuda' if torch.cuda.is_available() else 'cpu' if os.path.exists(path): state = torch.load(path) else: state = torch.hub.load_state_dict_from_url(supar.PRETRAINED[path] if path in supar.PRETRAINED else path) cls = supar.PARSER[state['name']] if cls.NAME is None else cls args = state['args'].update(args) model = cls.MODEL(**args) model.load_pretrained(state['pretrained']) model.load_state_dict(state['state_dict'], False) model.to(args.device) transform = state['transform'] return cls(args, model, transform)
def parse(parser): parser.add_argument('--conf', '-c', help='path to config file') parser.add_argument('--path', '-p', help='path to model file') parser.add_argument('--device', '-d', default='-1', help='ID of GPU to use') parser.add_argument('--seed', '-s', default=1, type=int, help='seed for generating random numbers') parser.add_argument('--threads', '-t', default=16, type=int, help='max num of threads') parser.add_argument('--batch-size', default=5000, type=int, help='batch size') parser.add_argument("--local_rank", type=int, default=-1, help='node rank for distributed training') args, unknown = parser.parse_known_args() args, _ = parser.parse_known_args(unknown, args) args = Config(**vars(args)) Parser = args.pop('Parser') torch.set_num_threads(args.threads) torch.manual_seed(args.seed) init_device(args.device, args.local_rank) init_logger(logger, f"{args.path}.{args.mode}.log") logger.info('\n' + str(args)) if args.mode == 'train': parser = Parser.build(**args) parser.train(**args) elif args.mode == 'evaluate': parser = Parser.load(args.path) print(args) parser.evaluate(**args) elif args.mode == 'predict': parser = Parser.load(args.path) parser.predict(**args)
def load(cls, path, **kwargs): r""" Load data fields and model parameters from a pretrained parser. Args: path (str): - a string with the shortcut name of a pre-trained parser defined in supar.PRETRAINED to load from cache or download, e.g., `crf-dep-en`. - a path to a directory containing a pre-trained parser, e.g., `./<path>/model`. kwargs (dict): A dict holding the unconsumed arguments. Returns: The loaded parser. """ args = Config(**locals()) args.device = 'cuda' if torch.cuda.is_available() else 'cpu' if os.path.exists(path): state = torch.load(path) else: path = supar.PRETRAINED[path] if path in supar.PRETRAINED else path state = torch.hub.load_state_dict_from_url(path) cls = supar.PARSER[state['name']] if cls.NAME is None else cls args = state['args'].update(args) model = cls.MODEL(normalize_paras=not args.em_alg, **args) model.load_pretrained(state['pretrained']) model.load_state_dict(state['state_dict'], False) model.to(args.device) transform = state['transform'] return cls(args, model, transform)
def build(cls, path, min_freq=2, fix_len=20, **kwargs): """ Build a brand-new Parser, including initialization of all data fields and model parameters. Args: path (str): The path of the model to be saved. min_freq (str): The minimum frequency needed to include a token in the vocabulary. Default: 2. fix_len (int): The max length of all subword pieces. The excess part of each piece will be truncated. Required if using CharLSTM/BERT. Default: 20. kwargs (dict): A dict holding the unconsumed arguments. Returns: The created parser. """ args = Config(**locals()) args.device = 'cuda' if torch.cuda.is_available() else 'cpu' os.makedirs(os.path.dirname(path), exist_ok=True) if os.path.exists(path) and not args.build: parser = cls.load(**args) parser.model = cls.MODEL(**parser.args) parser.model.load_pretrained(parser.WORD.embed).to(args.device) return parser logger.info("Build the fields") WORD = Field('words', pad=pad, unk=unk, lower=True) CPOS = Field('tags') transform = CoNLL(FORM=WORD, CPOS=CPOS) train = Dataset(transform, args.train) WORD.build( train, args.min_freq, (Embedding.load(args.embed, args.unk) if args.embed else None), not_extend_vocab=True) # WORD.build(train, args.min_freq) CPOS.build(train) args.update({ 'n_words': len(WORD.vocab), 'n_cpos': len(CPOS.vocab), 'pad_index': WORD.pad_index, 'unk_index': WORD.unk_index, }) model = cls.MODEL(normalize_paras=not args.em_alg, **args) if args.em_alg: model.requires_grad_(False) # model.load_pretrained(WORD.embed).to(args.device) model.to(args.device) return cls(args, model, transform)
def parse(parser): parser.add_argument('--conf', '-c', help='path to config file') parser.add_argument('--path', '-p', help='path to model file') parser.add_argument('--device', '-d', default='-1', help='ID of GPU to use') parser.add_argument('--seed', '-s', default=1, type=int, help='seed for generating random numbers') parser.add_argument('--threads', '-t', default=16, type=int, help='max num of threads') parser.add_argument('--batch-size', default=5000, type=int, help='batch size') parser.add_argument("--local_rank", type=int, default=-1, help='node rank for distributed training') parser.add_argument( '--min_freq', default=2, type=int, help='minimum frequency needed to include a token in the vocabulary') args, unknown = parser.parse_known_args() args, _ = parser.parse_known_args(unknown, args) args = Config(**vars(args)) Parser = args.pop('Parser') torch.set_num_threads(args.threads) torch.manual_seed(args.seed) init_device(args.device, args.local_rank) init_logger(logger, f"{args.path}.{args.mode}.log") logger.info('\n' + str(args)) if args.mode == 'train': # min_freq看看是多少 parser = Parser.build(**args) args.update({'mu': .0, 'nu': 0.95, 'lr': 1e-3, 'weight_decay': 3e-9}) parser.train(**args) elif args.mode == 'evaluate': parser = Parser.load(args.path) parser.evaluate(**args) elif args.mode == 'predict': parser = Parser.load(args.path) parser.predict(**args)
def evaluate(self, data, buckets=8, batch_size=5000, punct=False, tree=True, proj=False, verbose=True, **kwargs): """ Args: data (str): The data to be evaluated. buckets (int): Number of buckets that sentences are assigned to. Default: 32. batch_size (int): Number of tokens in each batch. Default: 5000. punct (bool): If False, ignores the punctuations during evaluation. Default: False. tree (bool): If True, ensures to output well-formed trees. Default: False. proj (bool): If True, ensures to output projective trees. Default: False. verbose (bool): If True, increases the output verbosity. Default: True. kwargs (dict): A dict holding the unconsumed arguments. Returns: The loss scalar and evaluation results. """ return super().evaluate(**Config().update(locals()))
def train(self, train, dev, test, buckets=32, batch_size=5000, punct=False, tree=False, proj=False, verbose=True, **kwargs): """ Args: train, dev, test (list[list] or str): the train/dev/test data, both list of instances and filename are allowed. buckets (int): Number of buckets that sentences are assigned to. Default: 32. batch_size (int): Number of tokens in each batch. Default: 5000. punct (bool): If False, ignores the punctuations during evaluation. Default: False. tree (bool): If True, ensures to output well-formed trees. Default: False. proj (bool): If True, ensures to output projective trees. Default: False. verbose (bool): If True, increases the output verbosity. Default: True. kwargs (dict): A dict holding the unconsumed arguments. """ return super().train(**Config().update(locals()))
def evaluate(self, data, buckets=8, batch_size=5000, mbr=True, delete={'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', ''}, equal={'ADVP': 'PRT'}, verbose=True, **kwargs): """ Args: data (str): The data to be evaluated. buckets (int): Number of buckets that sentences are assigned to. Default: 32. batch_size (int): Number of tokens in each batch. Default: 5000. mbr (bool): If True, performs mbr decoding. Default: True. delete (set[str]): A set of labels that will not be taken into consideration during evaluation. Default: {'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', ''}. equal (dict[str, str]): The pairs in the dict are considered equivalent during evaluation. Default: {'ADVP': 'PRT'}. verbose (bool): If True, increases the output verbosity. Default: True. kwargs (dict): A dict holding the unconsumed arguments. Returns: The loss scalar and evaluation results. """ return super().evaluate(**Config().update(locals()))
def train(self, train, dev, test, buckets=32, batch_size=5000, punct=False, mbr=True, tree=False, proj=False, partial=False, verbose=True, **kwargs): r""" Args: train/dev/test (list[list] or str): Filenames of the train/dev/test datasets. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. punct (bool): If ``False``, ignores the punctuations during evaluation. Default: ``False``. mbr (bool): If ``True``, returns marginals for MBR decoding. Default: ``True``. tree (bool): If ``True``, ensures to output well-formed trees. Default: ``False``. proj (bool): If ``True``, ensures to output projective trees. Default: ``False``. partial (bool): ``True`` denotes the trees are partially annotated. Default: ``False``. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for training. """ return super().train(**Config().update(locals()))
def predict(self, data, pred=None, lang=None, buckets=8, batch_size=5000, prob=False, tree=True, proj=False, verbose=True, **kwargs): r""" Args: data (list[list] or str): The data for prediction, both a list of instances and filename are allowed. pred (str): If specified, the predicted results will be saved to the file. Default: ``None``. lang (str): Language code (e.g., ``en``) or language name (e.g., ``English``) for the text to tokenize. ``None`` if tokenization is not required. Default: ``None``. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. prob (bool): If ``True``, outputs the probabilities. Default: ``False``. tree (bool): If ``True``, ensures to output well-formed trees. Default: ``False``. proj (bool): If ``True``, ensures to output projective trees. Default: ``False``. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding unconsumed arguments for updating prediction configs. Returns: A :class:`~supar.utils.Dataset` object that stores the predicted results. """ return super().predict(**Config().update(locals()))
def __init__(self, n_src_words, n_tgt_words, src_pad_idx, tgt_pad_idx, generator, d_word_vec=512, d_model=512, d_inner=2048, n_layers=6, n_heads=8, d_k=64, d_v=64, dropout=0.1, n_positions=200): super().__init__() self.args = self.args = Config().update(locals()) self.d_model = d_model self.src_pad_idx, self.trg_pad_idx = src_pad_idx, tgt_pad_idx self.encoder = Encoder(n_src_words, src_pad_idx, d_word_vec, d_model, d_inner, n_layers, n_heads, d_k, d_v, dropout, n_positions) self.decoder = Decoder(n_tgt_words, tgt_pad_idx, d_word_vec, d_model, d_inner, n_layers, n_heads, d_k, d_v, dropout, n_positions) self.generator = generator
def train(self, train, dev, test, buckets=32, batch_size=5000, update_steps=1, verbose=True, **kwargs): r""" Args: train/dev/test (list[list] or str): Filenames of the train/dev/test datasets. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. update_steps (int): Gradient accumulation steps. Default: 1. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding unconsumed arguments for updating training configs. """ return super().train(**Config().update(locals()))
def evaluate(self, data, buckets=8, batch_size=5000, delete={ 'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', '' }, equal={'ADVP': 'PRT'}, verbose=True, **kwargs): r""" Args: data (str): The data for evaluation, both list of instances and filename are allowed. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. delete (set[str]): A set of labels that will not be taken into consideration during evaluation. Default: {'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', ''}. equal (dict[str, str]): The pairs in the dict are considered equivalent during evaluation. Default: {'ADVP': 'PRT'}. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding unconsumed arguments for updating evaluation configs. Returns: The loss scalar and evaluation results. """ return super().evaluate(**Config().update(locals()))
def predict(self, data, pred=None, buckets=8, batch_size=5000, prob=False, mbr=True, verbose=True, **kwargs): """ Args: data (list[list] or str): The data to be predicted, both a list of instances and filename are allowed. pred (str): If specified, the predicted results will be saved to the file. Default: None. buckets (int): Number of buckets that sentences are assigned to. Default: 32. batch_size (int): Number of tokens in each batch. Default: 5000. prob (bool): If True, outputs the probabilities. Default: False. mbr (bool): If True, performs mbr decoding. Default: True. verbose (bool): If True, increases the output verbosity. Default: True. kwargs (dict): A dict holding the unconsumed arguments. Returns: A Dataset object that stores the predicted results. """ return super().predict(**Config().update(locals()))
def predict(self, data, pred=None, buckets=8, batch_size=5000, prob=False, mbr=True, tree=True, proj=True, verbose=True, **kwargs): r""" Args: data (list[list] or str): The data for prediction, both a list of instances and filename are allowed. pred (str): If specified, the predicted results will be saved to the file. Default: ``None``. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. prob (bool): If ``True``, outputs the probabilities. Default: ``False``. mbr (bool): If ``True``, returns marginals for MBR decoding. Default: ``True``. tree (bool): If ``True``, ensures to output well-formed trees. Default: ``False``. proj (bool): If ``True``, ensures to output projective trees. Default: ``False``. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for prediction. Returns: A :class:`~supar.utils.Dataset` object that stores the predicted results. """ return super().predict(**Config().update(locals()))
def evaluate(self, data, buckets=8, batch_size=5000, punct=False, tree=True, proj=False, partial=False, verbose=True, **kwargs): r""" Args: data (str): The data for evaluation, both list of instances and filename are allowed. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. punct (bool): If ``False``, ignores the punctuations during evaluation. Default: ``False``. tree (bool): If ``True``, ensures to output well-formed trees. Default: ``False``. proj (bool): If ``True``, ensures to output projective trees. Default: ``False``. partial (bool): ``True`` denotes the trees are partially annotated. Default: ``False``. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for evaluation. Returns: The loss scalar and evaluation results. """ return super().evaluate(**Config().update(locals()))
def train(self, train, dev, test, buckets=32, batch_size=5000, mbr=True, delete={'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', ''}, equal={'ADVP': 'PRT'}, verbose=True, **kwargs): r""" Args: train/dev/test (list[list] or str): Filenames of the train/dev/test datasets. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. mbr (bool): If ``True``, performs MBR decoding. Default: ``True``. delete (set[str]): A set of labels that will not be taken into consideration during evaluation. Default: {'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', ''}. equal (dict[str, str]): The pairs in the dict are considered equivalent during evaluation. Default: {'ADVP': 'PRT'}. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for training. """ return super().train(**Config().update(locals()))
def load(cls, path, reload=False, src='github', checkpoint=False, **kwargs): r""" Loads a parser with data fields and pretrained model parameters. Args: path (str): - a string with the shortcut name of a pretrained model defined in ``supar.MODEL`` to load from cache or download, e.g., ``'biaffine-dep-en'``. - a local path to a pretrained model, e.g., ``./<path>/model``. reload (bool): Whether to discard the existing cache and force a fresh download. Default: ``False``. src (str): Specifies where to download the model. ``'github'``: github release page. ``'hlt'``: hlt homepage, only accessible from 9:00 to 18:00 (UTC+8). Default: ``'github'``. checkpoint (bool): If ``True``, loads all checkpoint states to restore the training process. Default: ``False``. kwargs (dict): A dict holding unconsumed arguments for updating training configs and initializing the model. Examples: >>> from supar import Parser >>> parser = Parser.load('biaffine-dep-en') >>> parser = Parser.load('./ptb.biaffine.dep.lstm.char') """ args = Config(**locals()) args.device = 'cuda' if torch.cuda.is_available() else 'cpu' state = torch.load(path if os.path.exists(path) else download( supar.MODEL[src].get(path, path), reload=reload)) cls = supar.PARSER[state['name']] if cls.NAME is None else cls args = state['args'].update(args) model = cls.MODEL(**args) model.load_pretrained(state['pretrained']) model.load_state_dict(state['state_dict'], False) model.to(args.device) transform = state['transform'] parser = cls(args, model, transform) parser.checkpoint_state_dict = state[ 'checkpoint_state_dict'] if args.checkpoint else None return parser
def __init__(self, n_words, n_rels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, elmo='original_5b', elmo_bos_eos=(True, False), bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_arc_mlp=500, n_rel_mlp=100, mlp_dropout=.33, scale=0, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.arc_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.arc_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.rel_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.rel_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.arc_attn = Biaffine(n_in=n_arc_mlp, scale=scale, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=n_rel_mlp, n_out=n_rels, bias_x=True, bias_y=True) self.criterion = nn.CrossEntropyLoss()
def __init__(self, n_words, n_labels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_span_mlp=500, n_pair_mlp=100, n_label_mlp=100, mlp_dropout=.33, inference='mfvi', max_iter=3, interpolation=0.1, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.span_mlp_l = MLP(n_in=self.args.n_hidden, n_out=n_span_mlp, dropout=mlp_dropout) self.span_mlp_r = MLP(n_in=self.args.n_hidden, n_out=n_span_mlp, dropout=mlp_dropout) self.pair_mlp_l = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=mlp_dropout) self.pair_mlp_r = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=mlp_dropout) self.pair_mlp_b = MLP(n_in=self.args.n_hidden, n_out=n_pair_mlp, dropout=mlp_dropout) self.label_mlp_l = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=mlp_dropout) self.label_mlp_r = MLP(n_in=self.args.n_hidden, n_out=n_label_mlp, dropout=mlp_dropout) self.span_attn = Biaffine(n_in=n_span_mlp, bias_x=True, bias_y=False) self.pair_attn = Triaffine(n_in=n_pair_mlp, bias_x=True, bias_y=False) self.label_attn = Biaffine(n_in=n_label_mlp, n_out=n_labels, bias_x=True, bias_y=True) self.inference = (MFVIConstituency if inference == 'mfvi' else LBPConstituency)(max_iter) self.criterion = nn.CrossEntropyLoss()
def predict(self, data, pred=None, buckets=8, batch_size=5000, prob=False, tree=True, proj=False, verbose=True, **kwargs): r""" Args: data (list[list] or str): The data for prediction, both a list of instances and filename are allowed. pred (str): If specified, the predicted results will be saved to the file. Default: ``None``. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. prob (bool): If ``True``, outputs the probabilities. Default: ``False``. tree (bool): If ``True``, ensures to output well-formed trees. Default: ``False``. proj (bool): If ``True``, ensures to output projective trees. Default: ``False``. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for prediction. Returns: A :class:`~supar.utils.Dataset` object that stores the predicted results. """ if kwargs['elmo_options']: self.elmo = ElmoEmbedder(kwargs['elmo_options'], kwargs['elmo_weights'], -1) else: self.efml = EFML(kwargs['elmo_weights']) if kwargs['map_method'] == 'vecmap': self.mapper = Vecmap(kwargs) #print(self.mapper) elif kwargs['map_method'] == 'elmogan': self.mapper = Elmogan(kwargs) #print(self.mapper) elif kwargs['map_method'] == 'muse': self.mapper = Muse(kwargs) else: self.mapper = None return super().predict(**Config().update(locals()))
def evaluate(self, data, buckets=8, batch_size=5000, punct=False, tree=True, proj=False, partial=False, verbose=True, **kwargs): r""" Args: data (str): The data for evaluation, both list of instances and filename are allowed. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. punct (bool): If ``False``, ignores the punctuations during evaluation. Default: ``False``. tree (bool): If ``True``, ensures to output well-formed trees. Default: ``False``. proj (bool): If ``True``, ensures to output projective trees. Default: ``False``. partial (bool): ``True`` denotes the trees are partially annotated. Default: ``False``. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for evaluation. Returns: The loss scalar and evaluation results. """ if kwargs['elmo_options']: self.elmo = ElmoEmbedder(kwargs['elmo_options'], kwargs['elmo_weights'], -1) else: self.efml = EFML(kwargs['elmo_weights']) if kwargs['map_method'] == 'vecmap': self.mapper = Vecmap(kwargs) #print(self.mapper) elif kwargs['map_method'] == 'elmogan': self.mapper = Elmogan(kwargs) #print(self.mapper) elif kwargs['map_method'] == 'muse': self.mapper = Muse(kwargs) else: self.mapper = None return super().evaluate(**Config().update(locals()))
def __init__(self, n_words, n_cpos, normalize_paras=False, **kwargs): super().__init__() self.args = Config().update(locals()) # the embedding layer # emit prob self.E = nn.Parameter(torch.ones(n_words, n_cpos)) # transfer prob self.T = nn.Parameter(torch.ones(n_cpos, n_cpos)) self.start = nn.Parameter(torch.ones(n_cpos)) self.end = nn.Parameter(torch.ones(n_cpos)) self.eps = 1e-6 self.gamma_sum = 0 self.start_sum = 0 self.end_sum = 0 self.emit_sum = 0 self.xi_sum = 0 self.total_sent = 0 self.reset_parameters()
def __init__( self, n_words, n_rels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_arc_mlp=500, n_rel_mlp=100, mlp_dropout=.33, use_attentions=True, # attention attention_head=0, # attention attention_layer=6, # attention scale=0, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.criterion = nn.BCEWithLogitsLoss()
def evaluate(self, data, buckets=8, batch_size=5000, verbose=True, **kwargs): r""" Args: data (str): The data for evaluation, both list of instances and filename are allowed. buckets (int): The number of buckets that sentences are assigned to. Default: 32. batch_size (int): The number of tokens in each batch. Default: 5000. verbose (bool): If ``True``, increases the output verbosity. Default: ``True``. kwargs (dict): A dict holding the unconsumed arguments that can be used to update the configurations for evaluation. Returns: The loss scalar and evaluation results. """ return super().evaluate(**Config().update(locals()))
def __init__(self, n_words, n_tags=None, n_chars=None, n_lemmas=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, char_dropout=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=False, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, pad_index=0, **kwargs): super().__init__() self.args = Config().update(locals()) if encoder != 'bert': self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) n_input = n_embed if n_pretrained != n_embed: n_input += n_pretrained if 'tag' in feat: self.tag_embed = nn.Embedding(num_embeddings=n_tags, embedding_dim=n_feat_embed) n_input += n_feat_embed if 'char' in feat: self.char_embed = CharLSTM(n_chars=n_chars, n_embed=n_char_embed, n_hidden=n_char_hidden, n_out=n_feat_embed, pad_index=char_pad_index, dropout=char_dropout) n_input += n_feat_embed if 'lemma' in feat: self.lemma_embed = nn.Embedding(num_embeddings=n_lemmas, embedding_dim=n_feat_embed) n_input += n_feat_embed if 'bert' in feat: self.bert_embed = TransformerEmbedding( model=bert, n_layers=n_bert_layers, n_out=n_feat_embed, pooling=bert_pooling, pad_index=bert_pad_index, dropout=mix_dropout, requires_grad=(not freeze)) n_input += self.bert_embed.n_out self.embed_dropout = IndependentDropout(p=embed_dropout) if encoder == 'lstm': self.encoder = VariationalLSTM(input_size=n_input, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, bidirectional=True, dropout=encoder_dropout) self.encoder_dropout = SharedDropout(p=encoder_dropout) self.args.n_hidden = n_lstm_hidden * 2 else: self.encoder = TransformerEmbedding(model=bert, n_layers=n_bert_layers, pooling=bert_pooling, pad_index=pad_index, dropout=mix_dropout, requires_grad=True) self.encoder_dropout = nn.Dropout(p=encoder_dropout) self.args.n_hidden = self.encoder.n_out
def __init__(self, n_words, n_feats, n_labels, feat='char', n_embed=100, n_feat_embed=100, n_char_embed=50, bert=None, n_bert_layers=4, max_len=None, mix_dropout=.0, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, lstm_dropout=.33, n_mlp_span=500, n_mlp_label=100, mlp_dropout=.33, feat_pad_index=0, pad_index=0, unk_index=1, **kwargs): super().__init__() self.args = Config().update(locals()) # the embedding layer self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) if feat == 'char': self.feat_embed = CharLSTM(n_chars=n_feats, n_embed=n_char_embed, n_out=n_feat_embed, pad_index=feat_pad_index) elif feat == 'bert': self.feat_embed = BertEmbedding(model=bert, n_layers=n_bert_layers, n_out=n_feat_embed, pad_index=feat_pad_index, max_len=max_len, dropout=mix_dropout) self.n_feat_embed = self.feat_embed.n_out elif feat == 'tag': self.feat_embed = nn.Embedding(num_embeddings=n_feats, embedding_dim=n_feat_embed) else: raise RuntimeError("The feat type should be in ['char', 'bert', 'tag'].") self.embed_dropout = IndependentDropout(p=embed_dropout) # the lstm layer self.lstm = BiLSTM(input_size=n_embed+n_feat_embed, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, dropout=lstm_dropout) self.lstm_dropout = SharedDropout(p=lstm_dropout) # the MLP layers self.mlp_span_l = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_span, dropout=mlp_dropout) self.mlp_span_r = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_span, dropout=mlp_dropout) self.mlp_label_l = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_label, dropout=mlp_dropout) self.mlp_label_r = MLP(n_in=n_lstm_hidden*2, n_out=n_mlp_label, dropout=mlp_dropout) # the Biaffine layers self.span_attn = Biaffine(n_in=n_mlp_span, bias_x=True, bias_y=False) self.label_attn = Biaffine(n_in=n_mlp_label, n_out=n_labels, bias_x=True, bias_y=True) self.crf = CRFConstituency() self.criterion = nn.CrossEntropyLoss() self.pad_index = pad_index self.unk_index = unk_index
def build(cls, path, min_freq=2, fix_len=20, **kwargs): r""" Build a brand-new Parser, including initialization of all data fields and model parameters. Args: path (str): The path of the model to be saved. min_freq (str): The minimum frequency needed to include a token in the vocabulary. Default: 2. fix_len (int): The max length of all subword pieces. The excess part of each piece will be truncated. Required if using CharLSTM/BERT. Default: 20. kwargs (dict): A dict holding the unconsumed arguments. """ args = Config(**locals()) args.device = 'cuda' if torch.cuda.is_available() else 'cpu' os.makedirs(os.path.dirname(path) or './', exist_ok=True) if os.path.exists(path) and not args.build: parser = cls.load(**args) parser.model = cls.MODEL(**parser.args) parser.model.load_pretrained(parser.WORD.embed).to(args.device) return parser logger.info("Building the fields") WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, eos=EOS, lower=True) TAG, CHAR, ELMO, BERT = None, None, None, None if args.encoder == 'bert': from transformers import (AutoTokenizer, GPT2Tokenizer, GPT2TokenizerFast) t = AutoTokenizer.from_pretrained(args.bert) WORD = SubwordField( 'words', pad=t.pad_token, unk=t.unk_token, bos=t.cls_token or t.cls_token, eos=t.sep_token or t.sep_token, fix_len=args.fix_len, tokenize=t.tokenize, fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' ' + x) WORD.vocab = t.get_vocab() else: WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, eos=EOS, lower=True) if 'tag' in args.feat: TAG = Field('tags', bos=BOS, eos=EOS) if 'char' in args.feat: CHAR = SubwordField('chars', pad=PAD, unk=UNK, bos=BOS, eos=EOS, fix_len=args.fix_len) if 'elmo' in args.feat: from allennlp.modules.elmo import batch_to_ids ELMO = RawField('elmo') ELMO.compose = lambda x: batch_to_ids(x).to(WORD.device) if 'bert' in args.feat: from transformers import (AutoTokenizer, GPT2Tokenizer, GPT2TokenizerFast) t = AutoTokenizer.from_pretrained(args.bert) BERT = SubwordField( 'bert', pad=t.pad_token, unk=t.unk_token, bos=t.cls_token or t.cls_token, eos=t.sep_token or t.sep_token, fix_len=args.fix_len, tokenize=t.tokenize, fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' ' + x) BERT.vocab = t.get_vocab() TREE = RawField('trees') CHART = ChartField('charts') transform = Tree(WORD=(WORD, CHAR, ELMO, BERT), POS=TAG, TREE=TREE, CHART=CHART) train = Dataset(transform, args.train) if args.encoder != 'bert': WORD.build( train, args.min_freq, (Embedding.load(args.embed, args.unk) if args.embed else None)) if TAG is not None: TAG.build(train) if CHAR is not None: CHAR.build(train) CHART.build(train) args.update({ 'n_words': len(WORD.vocab) if args.encoder == 'bert' else WORD.vocab.n_init, 'n_labels': len(CHART.vocab), 'n_tags': len(TAG.vocab) if TAG is not None else None, 'n_chars': len(CHAR.vocab) if CHAR is not None else None, 'char_pad_index': CHAR.pad_index if CHAR is not None else None, 'bert_pad_index': BERT.pad_index if BERT is not None else None, 'pad_index': WORD.pad_index, 'unk_index': WORD.unk_index, 'bos_index': WORD.bos_index, 'eos_index': WORD.eos_index }) logger.info(f"{transform}") logger.info("Building the model") model = cls.MODEL(**args).load_pretrained( WORD.embed if hasattr(WORD, 'embed') else None).to(args.device) logger.info(f"{model}\n") return cls(args, model, transform)
def build(cls, path, optimizer_args={'lr': 2e-3, 'betas': (.9, .9), 'eps': 1e-12}, scheduler_args={'gamma': .75**(1/5000)}, min_freq=2, fix_len=20, **kwargs): r""" Build a brand-new Parser, including initialization of all data fields and model parameters. Args: path (str): The path of the model to be saved. optimizer_args (dict): Arguments for creating an optimizer. scheduler_args (dict): Arguments for creating a scheduler. min_freq (str): The minimum frequency needed to include a token in the vocabulary. Default: 2. fix_len (int): The max length of all subword pieces. The excess part of each piece will be truncated. Required if using CharLSTM/BERT. Default: 20. kwargs (dict): A dict holding the unconsumed arguments. """ args = Config(**locals()) args.device = 'cuda' if torch.cuda.is_available() else 'cpu' os.makedirs(os.path.dirname(path), exist_ok=True) if os.path.exists(path) and not args.build: parser = cls.load(**args) parser.model = cls.MODEL(**parser.args) parser.model.load_pretrained(parser.WORD.embed).to(args.device) return parser logger.info("Building the fields") WORD = Field('words', pad=pad, unk=unk, bos=bos, lower=True) if args.feat == 'char': FEAT = SubwordField('chars', pad=pad, unk=unk, bos=bos, fix_len=args.fix_len) elif args.feat == 'bert': from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(args.bert) FEAT = SubwordField('bert', pad=tokenizer.pad_token, unk=tokenizer.unk_token, bos=tokenizer.bos_token or tokenizer.cls_token, fix_len=args.fix_len, tokenize=tokenizer.tokenize) FEAT.vocab = tokenizer.get_vocab() else: FEAT = Field('tags', bos=bos) ARC = Field('arcs', bos=bos, use_vocab=False, fn=CoNLL.get_arcs) SIB = Field('sibs', bos=bos, use_vocab=False, fn=CoNLL.get_sibs) REL = Field('rels', bos=bos) if args.feat in ('char', 'bert'): transform = CoNLL(FORM=(WORD, FEAT), HEAD=(ARC, SIB), DEPREL=REL) else: transform = CoNLL(FORM=WORD, CPOS=FEAT, HEAD=(ARC, SIB), DEPREL=REL) train = Dataset(transform, args.train) WORD.build(train, args.min_freq, (Embedding.load(args.embed, args.unk) if args.embed else None)) FEAT.build(train) REL.build(train) args.update({ 'n_words': WORD.vocab.n_init, 'n_feats': len(FEAT.vocab), 'n_rels': len(REL.vocab), 'pad_index': WORD.pad_index, 'unk_index': WORD.unk_index, 'bos_index': WORD.bos_index, 'feat_pad_index': FEAT.pad_index }) logger.info(f"{transform}") logger.info("Building the model") model = cls.MODEL(**args).load_pretrained(WORD.embed).to(args.device) logger.info(f"{model}\n") optimizer = Adam(model.parameters(), **optimizer_args) scheduler = ExponentialLR(optimizer, **scheduler_args) return cls(args, model, transform, optimizer, scheduler)
def __init__( self, n_words, n_rels, n_tags=None, n_chars=None, encoder='lstm', feat=['char'], n_embed=100, n_pretrained=100, n_feat_embed=100, n_char_embed=50, n_char_hidden=100, char_pad_index=0, bert=None, n_bert_layers=4, mix_dropout=.0, bert_pooling='mean', bert_pad_index=0, freeze=True, embed_dropout=.33, n_lstm_hidden=400, n_lstm_layers=3, encoder_dropout=.33, n_arc_mlp=500, n_rel_mlp=100, mlp_dropout=.33, use_attentions=True, # attention attention_head=0, # attention attention_layer=6, # attention scale=0, pad_index=0, unk_index=1, **kwargs): super().__init__(**Config().update(locals())) self.arc_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.arc_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_arc_mlp, dropout=mlp_dropout) self.rel_mlp_d = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.rel_mlp_h = MLP(n_in=self.args.n_hidden, n_out=n_rel_mlp, dropout=mlp_dropout) self.arc_attn = Biaffine(n_in=n_arc_mlp, scale=scale, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=n_rel_mlp, n_out=n_rels, bias_x=True, bias_y=True) # transformer attention if use_attentions: self.attn_mix = nn.Parameter(torch.randn(1)) self.criterion = nn.CrossEntropyLoss()