def __init__(
        self,
        embedding_dim: int,
        pooling_type: str,
        mlp_layer_dims: List[int],
        feature_buckets: Dict[int, int],
    ) -> None:
        super().__init__(embedding_dim)
        self.pooling_type = pooling_type
        self.mlp_layer_dims = mlp_layer_dims
        self.num_intput_features = len(feature_buckets)
        input_dim = (self.num_intput_features * embedding_dim
                     if self.pooling_type == "none" else embedding_dim)
        self.mlp = nn.Sequential(*(nn.Sequential(nn.Linear(m, n), nn.ReLU())
                                   for m, n in zip(
                                       [input_dim] + list(mlp_layer_dims),
                                       mlp_layer_dims,
                                   )))

        self.feature_buckets = {int(k): v for k, v in feature_buckets.items()}
        self.feature_embeddings = nn.ModuleDict({
            str(k): nn.Embedding(v, embedding_dim)
            for k, v in feature_buckets.items()
        })
        log_class_usage(__class__)
 def __init__(
     self,
     loss_fn: Loss,
     ignore_impossible: bool = Config.ignore_impossible,
     pos_loss_weight: float = Config.pos_loss_weight,
     has_answer_loss_weight: float = Config.has_answer_loss_weight,
     has_answer_labels: Iterable[str] = ("False", "True"),
     false_label: str = Config.false_label,
     max_answer_len: int = Config.max_answer_len,
     hard_weight: float = Config.hard_weight,
     use_zero_answer: bool = Config.use_zero_answer,
     is_kd: bool = False,
 ) -> None:
     super().__init__(loss_fn=loss_fn)
     self.pos_loss_weight = pos_loss_weight
     self.has_answer_loss_weight = has_answer_loss_weight
     self.has_answer_labels = has_answer_labels
     self.ignore_impossible = ignore_impossible
     self.max_answer_len = max_answer_len
     if not ignore_impossible:
         self.false_idx = 1 if has_answer_labels[1] == false_label else 0
         self.true_idx = 1 - self.false_idx
     self.is_kd = is_kd
     self.hard_weight = hard_weight
     self.use_zero_answer = use_zero_answer
     log_class_usage(__class__)
Beispiel #3
0
    def __init__(
        self,
        embedding_dim: int,
        weight_scale: float,
        embedding_bag_mode: str,
        ignore_weight: bool,
        pooling_type: str,
        mlp_layer_dims: List[int],
        feature_buckets: Dict[int, int],
    ) -> None:
        super().__init__(embedding_dim)

        self.weight_scale = weight_scale
        self.ignore_weight = ignore_weight
        if not ignore_weight:
            assert embedding_bag_mode == "sum"  # EmbeddingBag required.
        self.pooling_type = pooling_type
        self.mlp_layer_dims = mlp_layer_dims

        self.feature_buckets = {int(k): v for k, v in feature_buckets.items()}
        self.feature_embeddings = nn.ModuleDict({
            str(k): nn.EmbeddingBag(v, embedding_dim, mode=embedding_bag_mode)
            for k, v in feature_buckets.items()
        })

        self.num_intput_features = len(feature_buckets)
        input_dim = (self.num_intput_features * embedding_dim
                     if self.pooling_type == "none" else embedding_dim)
        self.mlp = nn.Sequential(*(nn.Sequential(nn.Linear(m, n), nn.ReLU())
                                   for m, n in zip(
                                       [input_dim] + list(mlp_layer_dims),
                                       mlp_layer_dims,
                                   )))
        log_class_usage(__class__)
Beispiel #4
0
 def __init__(
     self,
     num_embeddings: int,
     embed_dim: int,
     out_channels: int,
     kernel_sizes: List[int],
     highway_layers: int,
     projection_dim: Optional[int],
     *args,
     **kwargs,
 ) -> None:
     output_dim = CNNCharacterEmbedding.output_dim(
         num_kernels=len(kernel_sizes),
         out_channels=out_channels,
         projection_dim=projection_dim,
     )
     super().__init__(output_dim)
     self.embedding = CNNCharacterEmbedding(
         num_embeddings=num_embeddings,
         embed_dim=embed_dim,
         out_channels=out_channels,
         kernel_sizes=kernel_sizes,
         highway_layers=highway_layers,
         projection_dim=projection_dim,
     )
     log_class_usage(__class__)
Beispiel #5
0
    def __init__(self, config: Config, input_size: int, padding_value: float = 0.0):
        super().__init__()
        self.num_layers = config.num_layers
        self.dropout = nn.Dropout(config.dropout)
        self.concat_layers = config.concat_layers
        self.padding_value = padding_value
        self.rnns = nn.ModuleList()

        rnn_module = RNN_TYPE_DICT.get(config.rnn_type)
        assert rnn_module is not None, "rnn_cell cannot be None"
        for i in range(config.num_layers):
            input_size = input_size if i == 0 else 2 * config.hidden_size
            self.rnns.append(
                rnn_module(
                    input_size,
                    config.hidden_size,
                    num_layers=1,
                    bidirectional=config.bidirectional,
                )
            )
        self.representation_dim = (
            (config.num_layers if config.concat_layers else 1)
            * config.hidden_size
            * (2 if config.bidirectional else 1)
        )
        log_class_usage(__class__)
Beispiel #6
0
    def __init__(self, config: Config, embed_dim: int) -> None:
        super().__init__(config)

        self.dropout = nn.Dropout(config.dropout)

        # BiLSTM representation.
        self.lstm = create_module(config.lstm, embed_dim=embed_dim)

        # Slot attention.
        self.attention = None
        word_representation_dim = self.lstm.representation_dim
        if config.slot_attention:
            self.attention = SlotAttention(config.slot_attention,
                                           self.lstm.representation_dim,
                                           batch_first=True)
            word_representation_dim += self.lstm.representation_dim

        # Projection over attended representation.
        self.dense = None
        self.representation_dim: int = self.lstm.representation_dim
        if config.mlp_decoder:
            self.dense = MLPDecoder(config.mlp_decoder,
                                    in_dim=self.lstm.representation_dim)
            self.representation_dim = self.dense.out_dim
        log_class_usage(__class__)
    def __init__(self, config: Config, embed_dim: int) -> None:
        """embed_dim is the dimension of embedded_tokens
        """
        super().__init__(config)

        self.dropout = nn.Dropout(config.dropout)

        # Document attention.
        self.attention = (
            create_module(config.pooling, n_input=embed_dim)
            if config.pooling is not None
            else None
        )

        # Non-linear projection over attended representation.
        self.dense = None
        if (
            isinstance(config.pooling, BoundaryPool.Config)
            and config.pooling.boundary_type == "firstlast"
        ):
            # the dimension double because of concatenating bos and eos
            self.representation_dim = embed_dim * 2
        else:
            self.representation_dim = embed_dim

        if config.mlp_decoder:
            self.dense = MLPDecoder(config.mlp_decoder, in_dim=embed_dim)
            self.representation_dim = self.dense.out_dim
        log_class_usage(__class__)
Beispiel #8
0
 def __init__(self, config: Config, embed_dim: int) -> None:
     super().__init__(config)
     self.word_rep = create_module(config.word_representation, embed_dim)
     self.word_representation_dim = self.word_rep.representation_dim
     self.doc_representation_dim = self.word_rep.representation_dim
     self.pooling_type = config.pooling_type
     log_class_usage(__class__)
 def __init__(
     self, doc_output: ClassificationOutputLayer, word_output: WordTaggingOutputLayer
 ) -> None:
     super().__init__()
     self.doc_output = doc_output
     self.word_output = word_output
     log_class_usage(__class__)
Beispiel #10
0
    def __init__(self,
                 config: Config,
                 output_encoded_layers=False,
                 *args,
                 **kwargs) -> None:
        super().__init__(config)
        self.pooling = config.pooling
        self.output_dropout = nn.Dropout(config.output_dropout)
        self.output_encoded_layers = output_encoded_layers
        self.export = config.export

        assert (
            self.pooling != PoolingMethod.NO_POOL or self.output_encoded_layers
        ), "If PoolingMethod is no_pool then output_encoded_layers should be True"

        if self.pooling == PoolingMethod.AVG_CONCAT_LAST_4_LAYERS:
            representation_dim = config.embedding_dim * 4
        else:
            representation_dim = config.embedding_dim

        self.projection = (torch.nn.Linear(representation_dim,
                                           config.projection_dim)
                           if config.projection_dim > 0 else None)

        self.representation_dim = config.projection_dim or representation_dim
        self.normalize_output_rep = config.normalize_output_rep

        log_class_usage(__class__)
Beispiel #11
0
 def __init__(
     self, sp_model_path: str = "", max_input_text_length: Optional[int] = None
 ):
     self.sp_model_path = sp_model_path
     self.max_input_text_length = max_input_text_length
     self._load_processor()
     log_class_usage(__class__)
Beispiel #12
0
 def __init__(self, model: torch.jit.ScriptModule,
              tensorizer: ScriptTensorizer):
     super().__init__()
     self.model = model
     self.tensorizer = tensorizer
     self.argno = -1
     log_class_usage(self.__class__)
Beispiel #13
0
    def __init__(
        self,
        model: RNNModel,
        output_layer: Seq2SeqOutputLayer,
        sequence_generator: ScriptedSequenceGenerator,
        src_vocab: Vocabulary,
        trg_vocab: Vocabulary,
        dictfeat_vocab: Vocabulary,
    ):
        BaseModel.__init__(self)
        self.model = model
        self.encoder = self.model.encoder
        self.decoder = self.model.decoder
        self.output_layer = output_layer
        self.sequence_generator = sequence_generator

        # Target vocab EOS index is useful for recognizing when to stop generating
        self.trg_eos_index = trg_vocab.get_eos_index()

        # Target vocab PAD index is useful for shifting source/target prior to decoding
        self.trg_pad_index = trg_vocab.get_pad_index()

        # Source, target and dictfeat vocab are needed for export so that we can handle
        # string input
        self.src_dict = src_vocab
        self.trg_dict = trg_vocab
        self.dictfeat_dict = dictfeat_vocab

        self.force_eval_predictions = False
        log_class_usage(__class__)
Beispiel #14
0
    def __init__(self, config: Config, output_encoded_layers: bool,
                 **kwarg) -> None:
        super().__init__(config, output_encoded_layers=output_encoded_layers)
        # assert config.pretrained_encoder.load_path, "Load path cannot be empty."
        self.encoder = SentenceEncoder(transformer=Transformer(
            vocab_size=config.vocab_size,
            embedding_dim=config.embedding_dim,
            layers=[
                TransformerLayer(
                    embedding_dim=config.embedding_dim,
                    attention=MultiheadSelfAttention(
                        config.embedding_dim, config.num_attention_heads),
                ) for _ in range(config.num_encoder_layers)
            ],
        ))
        self.apply(init_params)
        if config.model_path:
            with PathManager.open(config.model_path, "rb") as f:
                roberta_state = torch.load(f,
                                           map_location=lambda s, l:
                                           default_restore_location(s, "cpu"))
            # In case the model has previously been loaded in PyText and finetuned,
            # then we dont need to do the special state dict translation. Load
            # it directly
            if not config.is_finetuned:
                self.encoder.load_roberta_state_dict(roberta_state["model"])
            else:
                self.load_state_dict(roberta_state)

        self.representation_dim = self._embedding().weight.size(-1)
        log_class_usage(__class__)
Beispiel #15
0
    def __init__(
        self,
        right_encoder,
        left_encoder,
        decoder,
        output_layer,
        use_shared_encoder,
        use_shared_embedding,
        vocab_size,
        hidden_dim,
        padding_idx,
        use_dense_in_decoder=False,
        stage=Stage.TRAIN,
    ) -> None:
        super().__init__(stage=stage)
        self.right_encoder = right_encoder
        self.use_shared_encoder = use_shared_encoder
        self.use_shared_embedding = use_shared_embedding
        self.use_dense_in_decoder = use_dense_in_decoder

        self.decoder = decoder
        if self.use_shared_encoder:
            self.module_list = [right_encoder, decoder]
        else:
            self.left_encoder = left_encoder
            self.module_list = [right_encoder, left_encoder, decoder]
        self.output_layer = output_layer
        self.vocab_size = vocab_size
        self.hidden_dim = hidden_dim
        self.padding_idx = padding_idx
        self.stage = stage
        log_class_usage(__class__)
Beispiel #16
0
 def __init__(
     self,
     data_source: DataSource,
     tensorizers: Dict[str, Tensorizer],
     batcher: Batcher = None,
     sort_key: Optional[str] = None,
     in_memory: Optional[bool] = True,
     init_tensorizers: Optional[bool] = True,
     init_tensorizers_from_scratch: Optional[bool] = True,
 ):
     """This function should also initialize the passed in tensorizers with
     metadata they need for model construction."""
     self.data_source = data_source
     self.tensorizers = tensorizers
     self.batcher = batcher or Batcher()
     self.sort_key = sort_key
     self.in_memory = in_memory
     self.numberized_cache: MutableMapping[str, Any] = {}
     self.cache_mutex: Dict[str, bool] = {}
     full_train_data = (data_source.train_unsharded if isinstance(
         data_source, ShardedDataSource) else data_source.train)
     if init_tensorizers:
         initialize_tensorizers(self.tensorizers, full_train_data,
                                init_tensorizers_from_scratch)
     else:
         print(
             "Skipped initializing tensorizers since they are loaded from a "
             "previously saved state.")
     log_class_usage(__class__)
Beispiel #17
0
    def __init__(
        self,
        eager_encoder: TransformerSentenceEncoderModule,
        tokens: torch.Tensor,
        segment_labels: torch.Tensor = None,
        positions: torch.Tensor = None,
    ) -> None:
        super().__init__()
        traceable_encoder = TraceableTransformerWrapper(eager_encoder)
        traced_encoder_inputs = self._prepare_inputs(tokens, segment_labels, positions)
        self.has_segment_labels = segment_labels is not None
        self.has_positions = positions is not None

        self.iter_ = 0

        # do not check trace because of non-deterministic ops (e.g. dropout)
        self.traced_encoder = torch.jit.trace(
            traceable_encoder, tuple(traced_encoder_inputs), check_trace=False
        )
        if torch.cuda.is_available():
            try:
                import torch_tvm

                torch_tvm.enable(
                    device_type="gpu",
                    device="cuda",
                    device_id=torch.cuda.current_device(),
                    is_training=True,
                )
                print("Using TVM in traced transformer")
            except ImportError:
                print("Not using TVM in traced transformer")
        log_class_usage(__class__)
Beispiel #18
0
    def __init__(
        self,
        num_embeddings: int,
        embed_dim: int,
        out_channels: int,
        kernel_sizes: List[int],
        highway_layers: int,
        projection_dim: Optional[int],
        *args,
        **kwargs,
    ) -> None:
        conv_out_dim = len(kernel_sizes) * out_channels
        output_dim = projection_dim or conv_out_dim
        super().__init__(output_dim)

        self.char_embed = nn.Embedding(num_embeddings, embed_dim)
        self.convs = nn.ModuleList([
            # in_channels = embed_dim because input is treated as sequence
            # of dim [max_word_length] with embed_dim channels
            # Adding padding to provide robustness in cases where input
            # length is less than conv filter width
            nn.Conv1d(embed_dim, out_channels, K, padding=K // 2)
            for K in kernel_sizes
        ])
        self.highway = None
        if highway_layers > 0:
            self.highway = Highway(conv_out_dim, highway_layers)
        self.projection = None
        if projection_dim:
            self.projection = nn.Linear(conv_out_dim, projection_dim)
        log_class_usage(__class__)
Beispiel #19
0
 def __init__(self,
              loss_fn: MSELoss,
              squash_to_unit_range: bool = False) -> None:
     super().__init__()
     self.loss_fn = loss_fn
     self.squash_to_unit_range = squash_to_unit_range
     log_class_usage(__class__)
Beispiel #20
0
 def __init__(self, embedding_dim: int):
     super().__init__()
     # By default has 1 embedding which is itself, for EmbeddingList, this num
     # can be greater than 1
     self.num_emb_modules = 1
     self.embedding_dim = embedding_dim
     log_class_usage(__class__)
Beispiel #21
0
 def __init__(self,
              eager_encoder: TransformerSentenceEncoderModule) -> None:
     super().__init__()
     assert hasattr(eager_encoder, "traceable")
     assert eager_encoder.traceable
     self.eager_encoder = eager_encoder
     log_class_usage(__class__)
Beispiel #22
0
    def __init__(self, config: Config, embed_dim: int) -> None:
        super().__init__(config)

        self.dropout = nn.Dropout(config.dropout)

        # BiLSTM representation.
        padding_value = (float("-inf") if isinstance(config.pooling,
                                                     MaxPool.Config) else 0.0)
        self.lstm = create_module(config.lstm,
                                  embed_dim=embed_dim,
                                  padding_value=padding_value)

        # Document attention.
        self.attention = (create_module(config.pooling,
                                        n_input=self.lstm.representation_dim)
                          if config.pooling is not None else None)

        # Non-linear projection over attended representation.
        self.dense = None
        self.representation_dim: int = self.lstm.representation_dim
        if config.mlp_decoder:
            self.dense = MLPDecoder(config.mlp_decoder,
                                    in_dim=self.lstm.representation_dim)
            self.representation_dim = self.dense.out_dim
        log_class_usage(__class__)
Beispiel #23
0
    def __init__(
        self,
        embed_dim: int,
        num_heads: int,
        scaling: Optional[float] = None,
        dropout: float = 0.1,
    ):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        expected_scaling = float(1 / math.sqrt(self.head_dim))

        # for backward compatibility with previous default
        if not scaling and self.head_dim == 64:
            scaling = 0.125

        if not scaling:
            raise Exception(f"""
                Scaling not set. Please manually set scaling for transformers with
                head_dim != 64. The suggested value in this case is {expected_scaling},
                or float(1 / math.sqrt(head_dim))
                where head_dim = embed_dim // num_heads = {self.head_dim}
                and embed_dim = {embed_dim} and num_heads = {num_heads}.
                """)

        self.scaling = scaling
        self.dropout = nn.Dropout(dropout)
        self.input_projection = nn.Linear(embed_dim, 3 * embed_dim)
        self.output_projection = nn.Linear(embed_dim, embed_dim)
        log_class_usage(__class__)
Beispiel #24
0
    def __init__(self, config: Config, embed_dim: Tuple[int, ...]) -> None:
        super().__init__(config)

        assert len(embed_dim) == 2

        if config.subrepresentation_right is not None:
            self.subrepresentations = nn.ModuleList([
                create_module(config.subrepresentation,
                              embed_dim=embed_dim[0]),
                create_module(config.subrepresentation_right,
                              embed_dim=embed_dim[1]),
            ])
            if config.encode_relations:
                assert (
                    self.subrepresentations[0].representation_dim ==
                    self.subrepresentations[1].representation_dim
                ), ("Representations must have the same dimension"
                    ", because `encode_relations` involves elementwise operations."
                    )
        else:
            assert embed_dim[0] == embed_dim[1], (
                "Embeddings must have the same dimension"
                ", because subrepresentation weights are tied.")
            subrep = create_module(config.subrepresentation,
                                   embed_dim=embed_dim[0])
            self.subrepresentations = nn.ModuleList([subrep, subrep])

        self.encode_relations = config.encode_relations
        self.representation_dim = self.subrepresentations[0].representation_dim
        if self.encode_relations:
            self.representation_dim *= 4
        else:
            self.representation_dim += self.subrepresentations[
                1].representation_dim
        log_class_usage(__class__)
Beispiel #25
0
    def __init__(self, config: Config, in_dim: int, out_dim: int = 0) -> None:
        super().__init__(config)

        layers = []
        for dim in config.hidden_dims or []:
            layers.append(nn.Linear(in_dim, dim, config.bias))
            layers.append(get_activation(config.activation))
            if config.layer_norm:
                layers.append(nn.LayerNorm(dim))
            if config.dropout > 0:
                layers.append(nn.Dropout(config.dropout))
            in_dim = dim
        if config.out_dim is not None:
            out_dim = config.out_dim
        if out_dim > 0:
            layers.append(nn.Linear(in_dim, out_dim, config.bias))

        assert len(layers) > 0
        if config.spectral_normalization:
            layers[-1] = torch.nn.utils.spectral_norm(layers[-1])
        self.mlp = nn.Sequential(*layers)
        self.out_dim = out_dim if out_dim > 0 else config.hidden_dims[-1]
        self.temperature = config.temperature

        if config.load_model_path:
            with PathManager.open(config.load_model_path, "rb") as f:
                mlp_state = torch.load(f,
                                       map_location=lambda s, l:
                                       default_restore_location(s, "cpu"))
            print("loaded mlp state")
            self.load_state_dict(mlp_state, strict=config.load_strict)

        log_class_usage(__class__)
 def __init__(self, encoder1, encoder2, decoder, output_layer,
              encode_relations) -> None:
     super().__init__(decoder, output_layer, encode_relations)
     self.encoder1 = encoder1
     self.encoder2 = encoder2
     self.encoders = [encoder1, encoder2]
     log_class_usage(__class__)
Beispiel #27
0
 def __init__(self, config: Config, output_encoded_layers: bool,
              **kwarg) -> None:
     super().__init__(config, output_encoded_layers=output_encoded_layers)
     assert config.pretrained_encoder.load_path, "Load path cannot be empty."
     self.encoder = create_module(config.pretrained_encoder)
     self.representation_dim = self.encoder.encoder.token_embedding.weight.size(
         -1)
     log_class_usage(__class__)
Beispiel #28
0
 def __init__(self, models, loss_weights) -> None:
     models = nn.ModuleDict(models)
     super().__init__(None, None, None, None)
     self.models = models
     # make this a list to prevent registering in state_dict
     self._current_model = [next(iter(models.values()))]
     self.loss_weights = loss_weights
     log_class_usage(__class__)
Beispiel #29
0
 def __init__(self, num_tags, labels: Vocabulary, *args) -> None:
     super().__init__(list(labels), *args)
     self.crf = CRF(
         num_tags=num_tags,
         ignore_index=labels.get_pad_index(Padding.DEFAULT_LABEL_PAD_IDX),
         default_label_pad_index=Padding.DEFAULT_LABEL_PAD_IDX,
     )
     log_class_usage(__class__)
Beispiel #30
0
 def __init__(self, encoder, decoder, output_layer, stage=Stage.TRAIN) -> None:
     super().__init__(stage=stage)
     self.encoder = encoder
     self.decoder = decoder
     self.module_list = [encoder, decoder]
     self.output_layer = output_layer
     self.stage = stage
     log_class_usage(__class__)