예제 #1
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         batch_size,
         embed: Embedding,
         mention_feedforward: FeedForward,
         antecedent_feedforward: FeedForward,
         feature_size: int,
         max_span_width: int,
         spans_per_word: float,
         max_antecedents: int,
         lr=1e-3,
         transformer_lr=1e-5,
         adam_epsilon=1e-6,
         weight_decay=0.01,
         warmup_steps=0.1,
         epochs=150,
         grad_norm=None,
         coarse_to_fine: bool = False,
         inference_order: int = 1,
         lexical_dropout: float = 0.2,
         context_layer: LSTMContextualEncoder = None,
         devices=None,
         logger=None,
         seed=None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #2
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         transformer=None,
         lr=5e-5,
         transformer_lr=None,
         adam_epsilon=1e-8,
         weight_decay=0,
         warmup_steps=0.1,
         batch_size=32,
         gradient_accumulation=1,
         grad_norm=5.0,
         transformer_grad_norm=None,
         average_subwords=False,
         scalar_mix: Union[ScalarMixWithDropoutBuilder, int] = None,
         word_dropout=None,
         hidden_dropout=None,
         max_sequence_length=None,
         ret_raw_hidden_states=False,
         batch_max_tokens=None,
         epochs=3,
         logger=None,
         devices: Union[float, int, List[int]] = None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #3
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         transformer,
         max_seq_length=256,
         transformer_dropout=.33,
         d_positional=None,
         n_mlp_arc=500,
         n_mlp_rel=100,
         mlp_dropout=.33,
         optimizer='adamw',
         learning_rate=5e-5,
         learning_rate_transformer=None,
         weight_decay_rate=0,
         epsilon=1e-8,
         clipnorm=None,
         fp16=False,
         warmup_steps_ratio=0,
         arc_loss='binary_crossentropy',
         rel_loss='sparse_categorical_crossentropy',
         metrics=('UF', 'LF'),
         batch_size=3000,
         samples_per_batch=150,
         max_samples_per_batch=None,
         epochs=100,
         tree=False,
         punct=False,
         token_mapping=None,
         enhanced_only=False,
         run_eagerly=False,
         logger=None,
         verbose=True,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #4
0
    def fit(self,
            trn_data,
            dev_data,
            save_dir,
            embed,
            context_layer,
            batch_size=40,
            batch_max_tokens=700,
            lexical_dropout=0.5,
            dropout=0.2,
            span_width_feature_size=20,
            ffnn_size=150,
            ffnn_depth=2,
            argument_ratio=0.8,
            predicate_ratio=0.4,
            max_arg_width=30,
            mlp_label_size=100,
            enforce_srl_constraint=False,
            use_gold_predicates=False,
            doc_level_offset=True,
            use_biaffine=False,
            lr=1e-3,
            transformer_lr=1e-5,
            adam_epsilon=1e-6,
            weight_decay=0.01,
            warmup_steps=0.1,
            grad_norm=5.0,
            gradient_accumulation=1,
            loss_reduction='sum',
            devices=None,
            logger=None,
            seed=None,
            **kwargs):

        return super().fit(**merge_locals_kwargs(locals(), kwargs))
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         encoder,
         lr=5e-5,
         transformer_lr=None,
         adam_epsilon=1e-8,
         weight_decay=0,
         warmup_steps=0.1,
         grad_norm=1.0,
         n_mlp_span=500,
         n_mlp_label=100,
         mlp_dropout=.33,
         batch_size=None,
         batch_max_tokens=5000,
         gradient_accumulation=1,
         epochs=30,
         patience=0.5,
         mbr=True,
         sampler_builder=None,
         delete=('', ':', '``', "''", '.', '?', '!', '-NONE-', 'TOP', ',',
                 'S1'),
         equal=(('ADVP', 'PRT'), ),
         no_subcategory=True,
         eval_trn=True,
         transform=None,
         devices=None,
         logger=None,
         seed=None,
         **kwargs):
     if isinstance(equal, tuple):
         equal = dict(equal)
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #6
0
파일: span_bio.py 프로젝트: yehuangcn/HanLP
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         embed,
         encoder=None,
         lr=1e-3,
         transformer_lr=1e-4,
         adam_epsilon=1e-8,
         warmup_steps=0.1,
         weight_decay=0,
         crf=False,
         n_mlp_rel=300,
         mlp_dropout=0.2,
         batch_size=32,
         gradient_accumulation=1,
         grad_norm=1,
         loss_reduction='mean',
         epochs=30,
         delimiter=None,
         doc_level_offset=True,
         eval_trn=False,
         logger=None,
         devices: Union[float, int, List[int]] = None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #7
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         epochs=5,
         append_after_sentence=None,
         eos_chars=None,
         eos_char_min_freq=200,
         eos_char_is_punct=True,
         char_min_freq=None,
         window_size=5,
         batch_size=32,
         lr=0.001,
         grad_norm=None,
         loss_reduction='sum',
         embedding_size=128,
         rnn_type: str = 'LSTM',
         rnn_size=256,
         rnn_layers=1,
         rnn_bidirectional=False,
         dropout=0.2,
         devices=None,
         logger=None,
         seed=None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #8
0
 def fit(
         self,
         encoder: Embedding,
         tasks: Dict[str, Task],
         save_dir,
         epochs,
         patience=0.5,
         lr=1e-3,
         encoder_lr=5e-5,
         adam_epsilon=1e-8,
         weight_decay=0.0,
         warmup_steps=0.1,
         gradient_accumulation=1,
         grad_norm=5.0,
         encoder_grad_norm=None,
         decoder_grad_norm=None,
         tau: float = 0.8,
         transform=None,
         # prune: Callable = None,
         eval_trn=True,
         prefetch=None,
         tasks_need_custom_eval=None,
         _device_placeholder=False,
         cache=False,
         devices=None,
         logger=None,
         seed=None,
         **kwargs):
     trn_data, dev_data, batch_size = 'trn', 'dev', None
     task_names = list(tasks.keys())
     return super().fit(
         **merge_locals_kwargs(locals(),
                               kwargs,
                               excludes=('self', 'kwargs', '__class__',
                                         'tasks')), **tasks)
예제 #9
0
 def fit(self, trn_data, dev_data, save_dir,
         transformer=None,
         mask_prob=0.15,
         projection=None,
         average_subwords=False,
         transformer_hidden_dropout=None,
         layer_dropout=0,
         mix_embedding: int = None,
         embed_dropout=.33,
         n_mlp_arc=500,
         n_mlp_rel=100,
         mlp_dropout=.33,
         lr=2e-3,
         transformer_lr=5e-5,
         mu=.9,
         nu=.9,
         epsilon=1e-12,
         clip=5.0,
         decay=.75,
         decay_steps=5000,
         patience=100,
         sampler='kmeans',
         n_buckets=32,
         batch_max_tokens=5000,
         batch_size=None,
         epochs=50000,
         tree=False,
         punct=False,
         logger=None,
         verbose=True,
         max_sequence_length=512,
         devices: Union[float, int, List[int]] = None,
         transform=None,
         **kwargs):
     return TorchComponent.fit(self, **merge_locals_kwargs(locals(), kwargs))
예제 #10
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         transformer,
         average_subwords=False,
         word_dropout: float = 0.2,
         hidden_dropout=None,
         layer_dropout=0,
         scalar_mix=None,
         mix_embedding: int = 0,
         grad_norm=5.0,
         transformer_grad_norm=None,
         lr=5e-5,
         transformer_lr=None,
         transformer_layers=None,
         gradient_accumulation=1,
         adam_epsilon=1e-6,
         weight_decay=0,
         warmup_steps=0.1,
         secondary_encoder=None,
         crf=False,
         reduction='sum',
         batch_size=32,
         sampler_builder: SamplerBuilder = None,
         epochs=3,
         patience=5,
         token_key=None,
         max_seq_len=None, sent_delimiter=None, char_level=False, hard_constraint=False,
         transform=None,
         logger=None,
         devices: Union[float, int, List[int]] = None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #11
0
 def fit(self, trn_data, dev_data, save_dir,
         n_embed=100,
         pretrained_embed=None,
         embed_dropout=.33,
         n_lstm_hidden=400,
         n_lstm_layers=3,
         lstm_dropout=.33,
         n_mlp_arc=500,
         n_mlp_rel=100,
         mlp_dropout=.33,
         optimizer='adam',
         lr=2e-3,
         mu=.9,
         nu=.9,
         epsilon=1e-12,
         clip=5.0,
         decay=.75,
         decay_steps=5000,
         patience=100,
         arc_loss='sparse_categorical_crossentropy',
         rel_loss='sparse_categorical_crossentropy',
         metrics=('UAS', 'LAS'),
         n_buckets=32,
         batch_size=5000,
         epochs=50000,
         early_stopping_patience=100,
         tree=False,
         punct=False,
         min_freq=2,
         run_eagerly=False, logger=None, verbose=True,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #12
0
 def distill(self,
             teacher: str,
             trn_data,
             dev_data,
             save_dir,
             batch_size=None,
             epochs=None,
             kd_criterion='kd_ce_loss',
             temperature_scheduler='flsw',
             devices=None,
             logger=None,
             seed=None,
             **kwargs):
     devices = devices or cuda_devices()
     if isinstance(kd_criterion, str):
         kd_criterion = KnowledgeDistillationLoss(kd_criterion)
     if isinstance(temperature_scheduler, str):
         temperature_scheduler = TemperatureScheduler.from_name(temperature_scheduler)
     teacher = self.build_teacher(teacher, devices=devices)
     self.vocabs = teacher.vocabs
     config = copy(teacher.config)
     batch_size = batch_size or config.get('batch_size', None)
     epochs = epochs or config.get('epochs', None)
     config.update(kwargs)
     return super().fit(**merge_locals_kwargs(locals(),
                                              config,
                                              excludes=('self', 'kwargs', '__class__', 'config')))
예제 #13
0
파일: dep_2nd.py 프로젝트: lei1993/HanLP
 def __init__(self,
              trn: str = None,
              dev: str = None,
              tst: str = None,
              sampler_builder: SamplerBuilder = None,
              dependencies: str = None,
              scalar_mix: ScalarMixWithDropoutBuilder = None,
              use_raw_hidden_states=False,
              lr=2e-3,
              separate_optimizer=False,
              punct=False,
              tree=False,
              apply_constraint=True,
              n_mlp_arc=500,
              n_mlp_rel=100,
              mlp_dropout=.33,
              pad_rel=None,
              joint=True,
              mu=.9,
              nu=.9,
              epsilon=1e-12,
              cls_is_bos=True,
              **kwargs) -> None:
     super().__init__(**merge_locals_kwargs(locals(), kwargs))
     self.vocabs = VocabDict()
예제 #14
0
 def fit(self,
         trn_data: Any,
         dev_data: Any,
         save_dir: str,
         word_embed: Union[str, int, dict] = 200,
         ngram_embed: Union[str, int, dict] = 50,
         embedding_trainable=True,
         window_size=4,
         kernel_size=3,
         filters=(200, 200, 200, 200, 200),
         dropout_embed=0.2,
         dropout_hidden=0.2,
         weight_norm=True,
         loss: Union[tf.keras.losses.Loss, str] = None,
         optimizer: Union[str, tf.keras.optimizers.Optimizer] = 'adam',
         metrics='accuracy',
         batch_size=100,
         epochs=100,
         logger=None,
         verbose=True,
         **kwargs):
     assert kwargs.get('run_eagerly',
                       True), 'NgramConvTaggingModel can only run eagerly'
     kwargs['run_eagerly'] = True
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #15
0
 def fit(
         self,
         trn_data,
         dev_data,
         save_dir,
         text_a_key=None,
         text_b_key=None,
         label_key=None,
         transformer=None,
         max_seq_length=512,
         truncate_long_sequences=True,
         # hidden_dropout_prob=0.0,
         lr=5e-5,
         transformer_lr=None,
         adam_epsilon=1e-6,
         weight_decay=0,
         warmup_steps=0.1,
         batch_size=32,
         batch_max_tokens=None,
         epochs=3,
         logger=None,
         # transform=None,
         devices: Union[float, int, List[int]] = None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #16
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         transformer: ContextualWordEmbedding,
         sampler_builder=None,
         mix_embedding: int = 13,
         layer_dropout: int = 0.1,
         n_mlp_arc=768,
         n_mlp_rel=256,
         mlp_dropout=.33,
         lr=1e-3,
         transformer_lr=2.5e-5,
         patience=0.1,
         batch_size=32,
         epochs=30,
         gradient_accumulation=1,
         adam_epsilon=1e-8,
         weight_decay=0,
         warmup_steps=0.1,
         grad_norm=1.0,
         tree=False,
         proj=False,
         punct=False,
         logger=None,
         verbose=True,
         devices: Union[float, int, List[int]] = None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #17
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         transformer,
         sent_a_col,
         sent_b_col,
         similarity_col,
         delimiter='auto',
         batch_size=32,
         max_seq_len=128,
         epochs=3,
         lr=1e-3,
         transformer_lr=5e-5,
         adam_epsilon=1e-8,
         weight_decay=0.0,
         warmup_steps=0.1,
         gradient_accumulation=1,
         grad_norm=1.0,
         sampler_builder=None,
         devices=None,
         logger=None,
         seed=None,
         finetune: Union[bool, str] = False,
         eval_trn=True,
         _device_placeholder=False,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #18
0
파일: rnn_ner.py 프로젝트: lei1993/HanLP
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         batch_size=50,
         epochs=100,
         embed=100,
         rnn_input=None,
         rnn_hidden=256,
         drop=0.5,
         lr=0.001,
         patience=10,
         crf=True,
         optimizer='adam',
         token_key='token',
         tagging_scheme=None,
         anneal_factor: float = 0.5,
         delimiter=None,
         anneal_patience=2,
         devices=None,
         token_delimiter=None,
         logger=None,
         verbose=True,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #19
0
    def __init__(self,
                 trn: str = None,
                 dev: str = None,
                 tst: str = None,
                 sampler_builder: SamplerBuilder = None,
                 dependencies: str = None,
                 scalar_mix: ScalarMixWithDropoutBuilder = None,
                 use_raw_hidden_states=False,
                 lr=2e-3,
                 separate_optimizer=False,
                 punct=False,
                 tree=True,
                 pad_rel=None,
                 apply_constraint=False,
                 single_root=True,
                 no_zero_head=None,
                 n_mlp_arc=500,
                 n_mlp_rel=100,
                 mlp_dropout=.33,
                 mu=.9,
                 nu=.9,
                 epsilon=1e-12,
                 decay=.75,
                 decay_steps=5000,
                 cls_is_bos=True,
                 use_pos=False,
                 **kwargs) -> None:
        r"""Implementation of "Stanford's graph-based neural dependency parser at
        the conll 2017 shared task" (:cite:`dozat2017stanford`).

        Args:
            trn: Path to training set.
            dev: Path to dev set.
            tst: Path to test set.
            sampler_builder: A builder which builds a sampler.
            dependencies: Its dependencies on other tasks.
            scalar_mix: A builder which builds a `ScalarMixWithDropout` object.
            use_raw_hidden_states: Whether to use raw hidden states from transformer without any pooling.
            lr: Learning rate for this task.
            separate_optimizer: Use customized separate optimizer for this task.
            punct: ``True`` to include punctuations in evaluation.
            pad_rel: Padding token for relations.
            apply_constraint: Enforce constraints (see following parameters).
            single_root: Force single root.
            no_zero_head: Every token has at least one head.
            n_mlp_arc: Number of features for arc representation.
            n_mlp_rel: Number of features for rel representation.
            mlp_dropout: Dropout applied to MLPs.
            mu: First coefficient used for computing running averages of gradient and its square in Adam.
            nu: Second coefficient used for computing running averages of gradient and its square in Adam.
            epsilon: Term added to the denominator to improve numerical stability
            decay: Decay rate for exceptional lr scheduler.
            decay_steps: Decay every ``decay_steps`` steps.
            cls_is_bos: ``True`` to treat the first token as ``BOS``.
            use_pos: Use pos feature.
            **kwargs: Not used.
        """
        super().__init__(**merge_locals_kwargs(locals(), kwargs))
        self.vocabs = VocabDict()
예제 #20
0
파일: conll_tf.py 프로젝트: lei1993/HanLP
 def __init__(self, config: SerializableDict = None, map_x=True, map_y=True, lower=True, n_buckets=32, min_freq=2,
              use_pos=True, **kwargs) -> None:
     super().__init__(**merge_locals_kwargs(locals(), kwargs))
     self.form_vocab: VocabTF = None
     if use_pos:
         self.cpos_vocab: VocabTF = None
     self.rel_vocab: VocabTF = None
     self.puncts: tf.Tensor = None
예제 #21
0
파일: pos.py 프로젝트: lei1993/HanLP
    def __init__(self,
                 trn: str = None,
                 dev: str = None,
                 tst: str = None,
                 sampler_builder: SamplerBuilder = None,
                 dependencies: str = None,
                 scalar_mix: ScalarMixWithDropoutBuilder = None,
                 use_raw_hidden_states=False,
                 lr=1e-3,
                 separate_optimizer=False,
                 cls_is_bos=False,
                 sep_is_eos=False,
                 max_seq_len=None,
                 sent_delimiter=None,
                 char_level=False,
                 hard_constraint=False,
                 crf=False,
                 token_key='token',
                 dict_tags: Union[DictInterface,
                                  Union[Dict[Union[str, Sequence[str]],
                                             Union[str,
                                                   Sequence[str]]]]] = None,
                 **kwargs) -> None:
        """A simple tagger using a linear layer with an optional CRF (:cite:`lafferty2001conditional`) layer for
        any tagging tasks including PoS tagging and many others. It also features with a custom dictionary ``dict_tags``
        to perform ``longest-prefix-matching`` which replaces matched tokens with given tags.


        .. Note:: For algorithm beginners, longest-prefix-matching is the prerequisite to understand what dictionary can
            do and what it can't do. The tutorial in `this book <http://nlp.hankcs.com/book.php>`_ can be very helpful.

        Args:
            trn: Path to training set.
            dev: Path to dev set.
            tst: Path to test set.
            sampler_builder: A builder which builds a sampler.
            dependencies: Its dependencies on other tasks.
            scalar_mix: A builder which builds a `ScalarMixWithDropout` object.
            use_raw_hidden_states: Whether to use raw hidden states from transformer without any pooling.
            lr: Learning rate for this task.
            separate_optimizer: Use customized separate optimizer for this task.
            cls_is_bos: ``True`` to treat the first token as ``BOS``.
            sep_is_eos: ``True`` to treat the last token as ``EOS``.
            max_seq_len: Sentences longer than ``max_seq_len`` will be split into shorter ones if possible.
            sent_delimiter: Delimiter between sentences, like period or comma, which indicates a long sentence can
                be split here.
            char_level: Whether the sequence length is measured at char level, which is never the case for
                lemmatization.
            hard_constraint: Whether to enforce hard length constraint on sentences. If there is no ``sent_delimiter``
                in a sentence, it will be split at a token anyway.
            crf: ``True`` to enable CRF (:cite:`lafferty2001conditional`).
            token_key: The key to tokens in dataset. This should always be set to ``token`` in MTL.
            dict_tags: A custom dictionary to override predicted tags by performing longest-prefix-matching.
            **kwargs: Not used.
        """
        super().__init__(**merge_locals_kwargs(locals(), kwargs))
        self.vocabs = VocabDict()
        self.dict_tags = dict_tags
예제 #22
0
    def __init__(self,
                 trn: str = None,
                 dev: str = None,
                 tst: str = None,
                 sampler_builder: SamplerBuilder = None,
                 dependencies: str = None,
                 scalar_mix: ScalarMixWithDropoutBuilder = None,
                 use_raw_hidden_states=False,
                 lr=1e-3,
                 separate_optimizer=False,
                 lexical_dropout=0.5,
                 dropout=0.2,
                 span_width_feature_size=20,
                 ffnn_size=150,
                 ffnn_depth=2,
                 argument_ratio=0.8,
                 predicate_ratio=0.4,
                 max_arg_width=30,
                 mlp_label_size=100,
                 enforce_srl_constraint=False,
                 use_gold_predicates=False,
                 doc_level_offset=True,
                 use_biaffine=False,
                 loss_reduction='mean',
                 with_argument=' ',
                 **kwargs) -> None:
        r""" An implementation of "Jointly Predicting Predicates and Arguments in Neural Semantic Role Labeling"
        (:cite:`he-etal-2018-jointly`). It generates candidates triples of (predicate, arg_start, arg_end) and rank them.

        Args:
            trn: Path to training set.
            dev: Path to dev set.
            tst: Path to test set.
            sampler_builder: A builder which builds a sampler.
            dependencies: Its dependencies on other tasks.
            scalar_mix: A builder which builds a `ScalarMixWithDropout` object.
            use_raw_hidden_states: Whether to use raw hidden states from transformer without any pooling.
            lr: Learning rate for this task.
            separate_optimizer: Use customized separate optimizer for this task.
            lexical_dropout: Dropout applied to hidden states of encoder.
            dropout: Dropout used for other layers except the encoder.
            span_width_feature_size: Span width feature size.
            ffnn_size: Feedforward size.
            ffnn_depth: Number of layers of feedforward MLPs.
            argument_ratio: Ratio of candidate arguments over number of tokens.
            predicate_ratio: Ratio of candidate predicates over number of tokens.
            max_arg_width: Maximum argument width.
            mlp_label_size: Feature size for label representation.
            enforce_srl_constraint: Enforce SRL constraints (number of core ARGs etc.).
            use_gold_predicates: Use gold predicates instead of predicting them.
            doc_level_offset: ``True`` to indicate the offsets in ``jsonlines`` are of document level.
            use_biaffine: ``True`` to use biaffine (:cite:`dozat:17a`) instead of lineary layer for label prediction.
            loss_reduction: The loss reduction used in aggregating losses.
            with_argument: The delimiter between tokens in arguments to be used for joining tokens for outputs.
            **kwargs: Not used.
        """
        super().__init__(**merge_locals_kwargs(locals(), kwargs))
        self.vocabs = VocabDict()
예제 #23
0
파일: dep.py 프로젝트: zouyanjian/HanLP
    def __init__(self,
                 trn: str = None,
                 dev: str = None,
                 tst: str = None,
                 sampler_builder: SamplerBuilder = None,
                 dependencies: str = None,
                 scalar_mix: ScalarMixWithDropoutBuilder = None,
                 use_raw_hidden_states=False,
                 lr=2e-3,
                 separate_optimizer=False,
                 cls_is_bos=True,
                 sep_is_eos=False,
                 punct=False,
                 tree=False,
                 proj=False,
                 n_mlp_arc=500,
                 n_mlp_rel=100,
                 mlp_dropout=.33,
                 mu=.9,
                 nu=.9,
                 epsilon=1e-12,
                 decay=.75,
                 decay_steps=5000,
                 use_pos=False,
                 max_seq_len=None,
                 **kwargs) -> None:
        """Biaffine dependency parsing (:cite:`dozat:17a`).

        Args:
            trn: Path to training set.
            dev: Path to dev set.
            tst: Path to test set.
            sampler_builder: A builder which builds a sampler.
            dependencies: Its dependencies on other tasks.
            scalar_mix: A builder which builds a `ScalarMixWithDropout` object.
            use_raw_hidden_states: Whether to use raw hidden states from transformer without any pooling.
            lr: Learning rate for this task.
            separate_optimizer: Use customized separate optimizer for this task.
            cls_is_bos: ``True`` to treat the first token as ``BOS``.
            sep_is_eos: ``True`` to treat the last token as ``EOS``.
            punct: ``True`` to include punctuations in evaluation.
            tree: ``True`` to enforce tree constraint.
            proj: ``True`` for projective parsing.
            n_mlp_arc: Number of features for arc representation.
            n_mlp_rel: Number of features for rel representation.
            mlp_dropout: Dropout applied to MLPs.
            mu: First coefficient used for computing running averages of gradient and its square in Adam.
            nu: Second coefficient used for computing running averages of gradient and its square in Adam.
            epsilon: Term added to the denominator to improve numerical stability
            decay: Decay rate for exceptional lr scheduler.
            decay_steps: Decay every ``decay_steps`` steps.
            use_pos: Use pos feature.
            max_seq_len: Prune samples longer than this length.
            **kwargs: Not used.
        """
        super().__init__(**merge_locals_kwargs(locals(), kwargs))
        self.vocabs = VocabDict()
예제 #24
0
파일: conll_tf.py 프로젝트: lei1993/HanLP
 def __init__(self, config: SerializableDict = None, map_x=True, map_y=True,
              lower=True, n_buckets=32, min_freq=0, max_seq_length=256, use_pos=False,
              mask_p=None, graph=False, topk=None,
              **kwargs) -> None:
     super().__init__(**merge_locals_kwargs(locals(), kwargs))
     self.tokenizer: PreTrainedTokenizer = None
     self.transformer_config: PretrainedConfig = None
     if graph:
         self.orphan_relation = ROOT
예제 #25
0
파일: tag_tok.py 프로젝트: lei1993/HanLP
    def __init__(self,
                 trn: str = None,
                 dev: str = None,
                 tst: str = None,
                 sampler_builder: SamplerBuilder = None,
                 dependencies: str = None,
                 scalar_mix: ScalarMixWithDropoutBuilder = None,
                 use_raw_hidden_states=False,
                 lr=1e-3, separate_optimizer=False,
                 cls_is_bos=True,
                 sep_is_eos=True,
                 delimiter=None,
                 max_seq_len=None, sent_delimiter=None, char_level=False, hard_constraint=False,
                 transform=None,
                 tagging_scheme='BMES',
                 crf=False,
                 token_key='token',
                 dict_force: Union[DictInterface, Union[Dict[str, Any], Set[str]]] = None,
                 dict_combine: Union[DictInterface, Union[Dict[str, Any], Set[str]]] = None,
                 **kwargs) -> None:
        """Tokenization which casts a chunking problem into a tagging problem.
        This task has to create batch of tokens containing both [CLS] and [SEP] since it's usually the first task
        and later tasks might need them.

        Args:
            trn: Path to training set.
            dev: Path to dev set.
            tst: Path to test set.
            sampler_builder: A builder which builds a sampler.
            dependencies: Its dependencies on other tasks.
            scalar_mix: A builder which builds a `ScalarMixWithDropout` object.
            use_raw_hidden_states: Whether to use raw hidden states from transformer without any pooling.
            lr: Learning rate for this task.
            separate_optimizer: Use customized separate optimizer for this task.
            cls_is_bos: ``True`` to treat the first token as ``BOS``.
            sep_is_eos: ``True`` to treat the last token as ``EOS``.
            delimiter: Delimiter used to split a line in the corpus.
            max_seq_len: Sentences longer than ``max_seq_len`` will be split into shorter ones if possible.
            sent_delimiter: Delimiter between sentences, like period or comma, which indicates a long sentence can
                be split here.
            char_level: Whether the sequence length is measured at char level.
            hard_constraint: Whether to enforce hard length constraint on sentences. If there is no ``sent_delimiter``
                in a sentence, it will be split at a token anyway.
            transform: An optional transform to be applied to samples. Usually a character normalization transform is
                passed in.
            tagging_scheme: Either ``BMES`` or ``BI``.
            crf: ``True`` to enable CRF (:cite:`lafferty2001conditional`).
            token_key: The key to tokens in dataset. This should always be set to ``token`` in MTL.
            **kwargs: Not used.
        """
        super().__init__(**merge_locals_kwargs(locals(), kwargs, excludes=(
            'self', 'kwargs', '__class__', 'dict_force', 'dict_combine')))  # avoid to config
        self.transform = transform
        self.vocabs = VocabDict()
        self.dict_force = dict_force
        self.dict_combine = dict_combine
예제 #26
0
    def fit(self, trn_data, dev_data, save_dir, transformer, average_subwords=False, word_dropout: float = 0.2,
            hidden_dropout=None, layer_dropout=0, scalar_mix=None, grad_norm=5.0,
            transformer_grad_norm=None, lr=5e-5,
            transformer_lr=None, transformer_layers=None, gradient_accumulation=1,
            adam_epsilon=1e-8, weight_decay=0, warmup_steps=0.1, crf=False, reduction='sum',
            batch_size=32, sampler_builder: SamplerBuilder = None, epochs=30, patience=5, token_key=None,
            tagging_scheme='BMES', delimiter=None,
            max_seq_len=None, sent_delimiter=None, char_level=False, hard_constraint=False, transform=None, logger=None,
            devices: Union[float, int, List[int]] = None, **kwargs):
        """

        Args:
            trn_data: Training set.
            dev_data: Development set.
            save_dir: The directory to save trained component.
            transformer: An identifier of a pre-trained transformer.
            average_subwords: ``True`` to average subword representations.
            word_dropout: Dropout rate to randomly replace a subword with MASK.
            hidden_dropout: Dropout rate applied to hidden states.
            layer_dropout: Randomly zero out hidden states of a transformer layer.
            scalar_mix: Layer attention.
            grad_norm: Gradient norm for clipping.
            transformer_grad_norm: Gradient norm for clipping transformer gradient.
            lr: Learning rate for decoder.
            transformer_lr: Learning for encoder.
            transformer_layers: The number of bottom layers to use.
            gradient_accumulation: Number of batches per update.
            adam_epsilon: The epsilon to use in Adam.
            weight_decay: The weight decay to use.
            warmup_steps: The number of warmup steps.
            crf: ``True`` to enable CRF (:cite:`lafferty2001conditional`).
            reduction: The loss reduction used in aggregating losses.
            batch_size: The number of samples in a batch.
            sampler_builder: The builder to build sampler, which will override batch_size.
            epochs: The number of epochs to train.
            patience: The number of patience epochs before early stopping.
            token_key: The key to tokens in dataset.
            tagging_scheme: Either ``BMES`` or ``BI``.
            delimiter: Delimiter between tokens used to split a line in the corpus.
            max_seq_len: Sentences longer than ``max_seq_len`` will be split into shorter ones if possible.
            sent_delimiter: Delimiter between sentences, like period or comma, which indicates a long sentence can
                be split here.
            char_level: Whether the sequence length is measured at char level.
            hard_constraint: Whether to enforce hard length constraint on sentences. If there is no ``sent_delimiter``
                in a sentence, it will be split at a token anyway.
            transform: An optional transform to be applied to samples. Usually a character normalization transform is
                passed in.
            devices: Devices this component will live on.
            logger: Any :class:`logging.Logger` instance.
            seed: Random seed to reproduce this training.
            **kwargs: Not used.

        Returns:
            Best metrics on dev set.
        """
        return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #27
0
 def __init__(self,
              config: SerializableDict = None,
              map_x=True,
              map_y=True,
              use_char=False,
              **kwargs) -> None:
     super().__init__(**merge_locals_kwargs(locals(), kwargs))
     self.word_vocab: Optional[VocabTF] = None
     self.tag_vocab: Optional[VocabTF] = None
     self.char_vocab: Optional[VocabTF] = None
예제 #28
0
 def fit(self,
         trn_data,
         dev_data,
         save_dir,
         feat=None,
         n_embed=100,
         pretrained_embed=None,
         transformer=None,
         average_subwords=False,
         word_dropout: float = 0.2,
         transformer_hidden_dropout=None,
         layer_dropout=0,
         mix_embedding: int = None,
         embed_dropout=.33,
         n_lstm_hidden=400,
         n_lstm_layers=3,
         hidden_dropout=.33,
         n_mlp_arc=500,
         n_mlp_rel=100,
         mlp_dropout=.33,
         arc_dropout=None,
         rel_dropout=None,
         arc_loss_interpolation=0.4,
         lr=2e-3,
         transformer_lr=5e-5,
         mu=.9,
         nu=.9,
         epsilon=1e-12,
         clip=5.0,
         decay=.75,
         decay_steps=5000,
         weight_decay=0,
         warmup_steps=0.1,
         separate_optimizer=True,
         patience=100,
         batch_size=None,
         sampler_builder=None,
         lowercase=False,
         epochs=50000,
         apply_constraint=False,
         single_root=None,
         no_zero_head=None,
         punct=False,
         min_freq=2,
         logger=None,
         verbose=True,
         unk=UNK,
         pad_rel=None,
         max_sequence_length=512,
         gradient_accumulation=1,
         devices: Union[float, int, List[int]] = None,
         transform=None,
         **kwargs):
     return super().fit(**merge_locals_kwargs(locals(), kwargs))
예제 #29
0
파일: tacred.py 프로젝트: lei1993/HanLP
 def __init__(self,
              config: SerializableDict = None,
              map_x=True,
              map_y=True,
              lower=False,
              **kwargs) -> None:
     super().__init__(**merge_locals_kwargs(locals(), kwargs))
     self.token_vocab = VocabTF()
     self.pos_vocab = VocabTF(pad_token=None, unk_token=None)
     self.ner_vocab = VocabTF(pad_token=None)
     self.deprel_vocab = VocabTF(pad_token=None, unk_token=None)
     self.rel_vocab = VocabTF(pad_token=None, unk_token=None)
예제 #30
0
 def distill(self,
             teacher: str,
             trn_data,
             dev_data,
             save_dir,
             transformer: str,
             batch_size=None,
             temperature_scheduler='flsw',
             epochs=None,
             devices=None,
             logger=None,
             seed=None,
             **kwargs):
     return super().distill(**merge_locals_kwargs(locals(), kwargs))