def __init__( self, embedding_dim: int, pooling_type: str, mlp_layer_dims: List[int], feature_buckets: Dict[int, int], ) -> None: super().__init__(embedding_dim) self.pooling_type = pooling_type self.mlp_layer_dims = mlp_layer_dims self.num_intput_features = len(feature_buckets) input_dim = (self.num_intput_features * embedding_dim if self.pooling_type == "none" else embedding_dim) self.mlp = nn.Sequential(*(nn.Sequential(nn.Linear(m, n), nn.ReLU()) for m, n in zip( [input_dim] + list(mlp_layer_dims), mlp_layer_dims, ))) self.feature_buckets = {int(k): v for k, v in feature_buckets.items()} self.feature_embeddings = nn.ModuleDict({ str(k): nn.Embedding(v, embedding_dim) for k, v in feature_buckets.items() }) log_class_usage(__class__)
def __init__( self, loss_fn: Loss, ignore_impossible: bool = Config.ignore_impossible, pos_loss_weight: float = Config.pos_loss_weight, has_answer_loss_weight: float = Config.has_answer_loss_weight, has_answer_labels: Iterable[str] = ("False", "True"), false_label: str = Config.false_label, max_answer_len: int = Config.max_answer_len, hard_weight: float = Config.hard_weight, use_zero_answer: bool = Config.use_zero_answer, is_kd: bool = False, ) -> None: super().__init__(loss_fn=loss_fn) self.pos_loss_weight = pos_loss_weight self.has_answer_loss_weight = has_answer_loss_weight self.has_answer_labels = has_answer_labels self.ignore_impossible = ignore_impossible self.max_answer_len = max_answer_len if not ignore_impossible: self.false_idx = 1 if has_answer_labels[1] == false_label else 0 self.true_idx = 1 - self.false_idx self.is_kd = is_kd self.hard_weight = hard_weight self.use_zero_answer = use_zero_answer log_class_usage(__class__)
def __init__( self, embedding_dim: int, weight_scale: float, embedding_bag_mode: str, ignore_weight: bool, pooling_type: str, mlp_layer_dims: List[int], feature_buckets: Dict[int, int], ) -> None: super().__init__(embedding_dim) self.weight_scale = weight_scale self.ignore_weight = ignore_weight if not ignore_weight: assert embedding_bag_mode == "sum" # EmbeddingBag required. self.pooling_type = pooling_type self.mlp_layer_dims = mlp_layer_dims self.feature_buckets = {int(k): v for k, v in feature_buckets.items()} self.feature_embeddings = nn.ModuleDict({ str(k): nn.EmbeddingBag(v, embedding_dim, mode=embedding_bag_mode) for k, v in feature_buckets.items() }) self.num_intput_features = len(feature_buckets) input_dim = (self.num_intput_features * embedding_dim if self.pooling_type == "none" else embedding_dim) self.mlp = nn.Sequential(*(nn.Sequential(nn.Linear(m, n), nn.ReLU()) for m, n in zip( [input_dim] + list(mlp_layer_dims), mlp_layer_dims, ))) log_class_usage(__class__)
def __init__( self, num_embeddings: int, embed_dim: int, out_channels: int, kernel_sizes: List[int], highway_layers: int, projection_dim: Optional[int], *args, **kwargs, ) -> None: output_dim = CNNCharacterEmbedding.output_dim( num_kernels=len(kernel_sizes), out_channels=out_channels, projection_dim=projection_dim, ) super().__init__(output_dim) self.embedding = CNNCharacterEmbedding( num_embeddings=num_embeddings, embed_dim=embed_dim, out_channels=out_channels, kernel_sizes=kernel_sizes, highway_layers=highway_layers, projection_dim=projection_dim, ) log_class_usage(__class__)
def __init__(self, config: Config, input_size: int, padding_value: float = 0.0): super().__init__() self.num_layers = config.num_layers self.dropout = nn.Dropout(config.dropout) self.concat_layers = config.concat_layers self.padding_value = padding_value self.rnns = nn.ModuleList() rnn_module = RNN_TYPE_DICT.get(config.rnn_type) assert rnn_module is not None, "rnn_cell cannot be None" for i in range(config.num_layers): input_size = input_size if i == 0 else 2 * config.hidden_size self.rnns.append( rnn_module( input_size, config.hidden_size, num_layers=1, bidirectional=config.bidirectional, ) ) self.representation_dim = ( (config.num_layers if config.concat_layers else 1) * config.hidden_size * (2 if config.bidirectional else 1) ) log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.dropout = nn.Dropout(config.dropout) # BiLSTM representation. self.lstm = create_module(config.lstm, embed_dim=embed_dim) # Slot attention. self.attention = None word_representation_dim = self.lstm.representation_dim if config.slot_attention: self.attention = SlotAttention(config.slot_attention, self.lstm.representation_dim, batch_first=True) word_representation_dim += self.lstm.representation_dim # Projection over attended representation. self.dense = None self.representation_dim: int = self.lstm.representation_dim if config.mlp_decoder: self.dense = MLPDecoder(config.mlp_decoder, in_dim=self.lstm.representation_dim) self.representation_dim = self.dense.out_dim log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: int) -> None: """embed_dim is the dimension of embedded_tokens """ super().__init__(config) self.dropout = nn.Dropout(config.dropout) # Document attention. self.attention = ( create_module(config.pooling, n_input=embed_dim) if config.pooling is not None else None ) # Non-linear projection over attended representation. self.dense = None if ( isinstance(config.pooling, BoundaryPool.Config) and config.pooling.boundary_type == "firstlast" ): # the dimension double because of concatenating bos and eos self.representation_dim = embed_dim * 2 else: self.representation_dim = embed_dim if config.mlp_decoder: self.dense = MLPDecoder(config.mlp_decoder, in_dim=embed_dim) self.representation_dim = self.dense.out_dim log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.word_rep = create_module(config.word_representation, embed_dim) self.word_representation_dim = self.word_rep.representation_dim self.doc_representation_dim = self.word_rep.representation_dim self.pooling_type = config.pooling_type log_class_usage(__class__)
def __init__( self, doc_output: ClassificationOutputLayer, word_output: WordTaggingOutputLayer ) -> None: super().__init__() self.doc_output = doc_output self.word_output = word_output log_class_usage(__class__)
def __init__(self, config: Config, output_encoded_layers=False, *args, **kwargs) -> None: super().__init__(config) self.pooling = config.pooling self.output_dropout = nn.Dropout(config.output_dropout) self.output_encoded_layers = output_encoded_layers self.export = config.export assert ( self.pooling != PoolingMethod.NO_POOL or self.output_encoded_layers ), "If PoolingMethod is no_pool then output_encoded_layers should be True" if self.pooling == PoolingMethod.AVG_CONCAT_LAST_4_LAYERS: representation_dim = config.embedding_dim * 4 else: representation_dim = config.embedding_dim self.projection = (torch.nn.Linear(representation_dim, config.projection_dim) if config.projection_dim > 0 else None) self.representation_dim = config.projection_dim or representation_dim self.normalize_output_rep = config.normalize_output_rep log_class_usage(__class__)
def __init__( self, sp_model_path: str = "", max_input_text_length: Optional[int] = None ): self.sp_model_path = sp_model_path self.max_input_text_length = max_input_text_length self._load_processor() log_class_usage(__class__)
def __init__(self, model: torch.jit.ScriptModule, tensorizer: ScriptTensorizer): super().__init__() self.model = model self.tensorizer = tensorizer self.argno = -1 log_class_usage(self.__class__)
def __init__( self, model: RNNModel, output_layer: Seq2SeqOutputLayer, sequence_generator: ScriptedSequenceGenerator, src_vocab: Vocabulary, trg_vocab: Vocabulary, dictfeat_vocab: Vocabulary, ): BaseModel.__init__(self) self.model = model self.encoder = self.model.encoder self.decoder = self.model.decoder self.output_layer = output_layer self.sequence_generator = sequence_generator # Target vocab EOS index is useful for recognizing when to stop generating self.trg_eos_index = trg_vocab.get_eos_index() # Target vocab PAD index is useful for shifting source/target prior to decoding self.trg_pad_index = trg_vocab.get_pad_index() # Source, target and dictfeat vocab are needed for export so that we can handle # string input self.src_dict = src_vocab self.trg_dict = trg_vocab self.dictfeat_dict = dictfeat_vocab self.force_eval_predictions = False log_class_usage(__class__)
def __init__(self, config: Config, output_encoded_layers: bool, **kwarg) -> None: super().__init__(config, output_encoded_layers=output_encoded_layers) # assert config.pretrained_encoder.load_path, "Load path cannot be empty." self.encoder = SentenceEncoder(transformer=Transformer( vocab_size=config.vocab_size, embedding_dim=config.embedding_dim, layers=[ TransformerLayer( embedding_dim=config.embedding_dim, attention=MultiheadSelfAttention( config.embedding_dim, config.num_attention_heads), ) for _ in range(config.num_encoder_layers) ], )) self.apply(init_params) if config.model_path: with PathManager.open(config.model_path, "rb") as f: roberta_state = torch.load(f, map_location=lambda s, l: default_restore_location(s, "cpu")) # In case the model has previously been loaded in PyText and finetuned, # then we dont need to do the special state dict translation. Load # it directly if not config.is_finetuned: self.encoder.load_roberta_state_dict(roberta_state["model"]) else: self.load_state_dict(roberta_state) self.representation_dim = self._embedding().weight.size(-1) log_class_usage(__class__)
def __init__( self, right_encoder, left_encoder, decoder, output_layer, use_shared_encoder, use_shared_embedding, vocab_size, hidden_dim, padding_idx, use_dense_in_decoder=False, stage=Stage.TRAIN, ) -> None: super().__init__(stage=stage) self.right_encoder = right_encoder self.use_shared_encoder = use_shared_encoder self.use_shared_embedding = use_shared_embedding self.use_dense_in_decoder = use_dense_in_decoder self.decoder = decoder if self.use_shared_encoder: self.module_list = [right_encoder, decoder] else: self.left_encoder = left_encoder self.module_list = [right_encoder, left_encoder, decoder] self.output_layer = output_layer self.vocab_size = vocab_size self.hidden_dim = hidden_dim self.padding_idx = padding_idx self.stage = stage log_class_usage(__class__)
def __init__( self, data_source: DataSource, tensorizers: Dict[str, Tensorizer], batcher: Batcher = None, sort_key: Optional[str] = None, in_memory: Optional[bool] = True, init_tensorizers: Optional[bool] = True, init_tensorizers_from_scratch: Optional[bool] = True, ): """This function should also initialize the passed in tensorizers with metadata they need for model construction.""" self.data_source = data_source self.tensorizers = tensorizers self.batcher = batcher or Batcher() self.sort_key = sort_key self.in_memory = in_memory self.numberized_cache: MutableMapping[str, Any] = {} self.cache_mutex: Dict[str, bool] = {} full_train_data = (data_source.train_unsharded if isinstance( data_source, ShardedDataSource) else data_source.train) if init_tensorizers: initialize_tensorizers(self.tensorizers, full_train_data, init_tensorizers_from_scratch) else: print( "Skipped initializing tensorizers since they are loaded from a " "previously saved state.") log_class_usage(__class__)
def __init__( self, eager_encoder: TransformerSentenceEncoderModule, tokens: torch.Tensor, segment_labels: torch.Tensor = None, positions: torch.Tensor = None, ) -> None: super().__init__() traceable_encoder = TraceableTransformerWrapper(eager_encoder) traced_encoder_inputs = self._prepare_inputs(tokens, segment_labels, positions) self.has_segment_labels = segment_labels is not None self.has_positions = positions is not None self.iter_ = 0 # do not check trace because of non-deterministic ops (e.g. dropout) self.traced_encoder = torch.jit.trace( traceable_encoder, tuple(traced_encoder_inputs), check_trace=False ) if torch.cuda.is_available(): try: import torch_tvm torch_tvm.enable( device_type="gpu", device="cuda", device_id=torch.cuda.current_device(), is_training=True, ) print("Using TVM in traced transformer") except ImportError: print("Not using TVM in traced transformer") log_class_usage(__class__)
def __init__( self, num_embeddings: int, embed_dim: int, out_channels: int, kernel_sizes: List[int], highway_layers: int, projection_dim: Optional[int], *args, **kwargs, ) -> None: conv_out_dim = len(kernel_sizes) * out_channels output_dim = projection_dim or conv_out_dim super().__init__(output_dim) self.char_embed = nn.Embedding(num_embeddings, embed_dim) self.convs = nn.ModuleList([ # in_channels = embed_dim because input is treated as sequence # of dim [max_word_length] with embed_dim channels # Adding padding to provide robustness in cases where input # length is less than conv filter width nn.Conv1d(embed_dim, out_channels, K, padding=K // 2) for K in kernel_sizes ]) self.highway = None if highway_layers > 0: self.highway = Highway(conv_out_dim, highway_layers) self.projection = None if projection_dim: self.projection = nn.Linear(conv_out_dim, projection_dim) log_class_usage(__class__)
def __init__(self, loss_fn: MSELoss, squash_to_unit_range: bool = False) -> None: super().__init__() self.loss_fn = loss_fn self.squash_to_unit_range = squash_to_unit_range log_class_usage(__class__)
def __init__(self, embedding_dim: int): super().__init__() # By default has 1 embedding which is itself, for EmbeddingList, this num # can be greater than 1 self.num_emb_modules = 1 self.embedding_dim = embedding_dim log_class_usage(__class__)
def __init__(self, eager_encoder: TransformerSentenceEncoderModule) -> None: super().__init__() assert hasattr(eager_encoder, "traceable") assert eager_encoder.traceable self.eager_encoder = eager_encoder log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.dropout = nn.Dropout(config.dropout) # BiLSTM representation. padding_value = (float("-inf") if isinstance(config.pooling, MaxPool.Config) else 0.0) self.lstm = create_module(config.lstm, embed_dim=embed_dim, padding_value=padding_value) # Document attention. self.attention = (create_module(config.pooling, n_input=self.lstm.representation_dim) if config.pooling is not None else None) # Non-linear projection over attended representation. self.dense = None self.representation_dim: int = self.lstm.representation_dim if config.mlp_decoder: self.dense = MLPDecoder(config.mlp_decoder, in_dim=self.lstm.representation_dim) self.representation_dim = self.dense.out_dim log_class_usage(__class__)
def __init__( self, embed_dim: int, num_heads: int, scaling: Optional[float] = None, dropout: float = 0.1, ): super().__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.head_dim = embed_dim // num_heads expected_scaling = float(1 / math.sqrt(self.head_dim)) # for backward compatibility with previous default if not scaling and self.head_dim == 64: scaling = 0.125 if not scaling: raise Exception(f""" Scaling not set. Please manually set scaling for transformers with head_dim != 64. The suggested value in this case is {expected_scaling}, or float(1 / math.sqrt(head_dim)) where head_dim = embed_dim // num_heads = {self.head_dim} and embed_dim = {embed_dim} and num_heads = {num_heads}. """) self.scaling = scaling self.dropout = nn.Dropout(dropout) self.input_projection = nn.Linear(embed_dim, 3 * embed_dim) self.output_projection = nn.Linear(embed_dim, embed_dim) log_class_usage(__class__)
def __init__(self, config: Config, embed_dim: Tuple[int, ...]) -> None: super().__init__(config) assert len(embed_dim) == 2 if config.subrepresentation_right is not None: self.subrepresentations = nn.ModuleList([ create_module(config.subrepresentation, embed_dim=embed_dim[0]), create_module(config.subrepresentation_right, embed_dim=embed_dim[1]), ]) if config.encode_relations: assert ( self.subrepresentations[0].representation_dim == self.subrepresentations[1].representation_dim ), ("Representations must have the same dimension" ", because `encode_relations` involves elementwise operations." ) else: assert embed_dim[0] == embed_dim[1], ( "Embeddings must have the same dimension" ", because subrepresentation weights are tied.") subrep = create_module(config.subrepresentation, embed_dim=embed_dim[0]) self.subrepresentations = nn.ModuleList([subrep, subrep]) self.encode_relations = config.encode_relations self.representation_dim = self.subrepresentations[0].representation_dim if self.encode_relations: self.representation_dim *= 4 else: self.representation_dim += self.subrepresentations[ 1].representation_dim log_class_usage(__class__)
def __init__(self, config: Config, in_dim: int, out_dim: int = 0) -> None: super().__init__(config) layers = [] for dim in config.hidden_dims or []: layers.append(nn.Linear(in_dim, dim, config.bias)) layers.append(get_activation(config.activation)) if config.layer_norm: layers.append(nn.LayerNorm(dim)) if config.dropout > 0: layers.append(nn.Dropout(config.dropout)) in_dim = dim if config.out_dim is not None: out_dim = config.out_dim if out_dim > 0: layers.append(nn.Linear(in_dim, out_dim, config.bias)) assert len(layers) > 0 if config.spectral_normalization: layers[-1] = torch.nn.utils.spectral_norm(layers[-1]) self.mlp = nn.Sequential(*layers) self.out_dim = out_dim if out_dim > 0 else config.hidden_dims[-1] self.temperature = config.temperature if config.load_model_path: with PathManager.open(config.load_model_path, "rb") as f: mlp_state = torch.load(f, map_location=lambda s, l: default_restore_location(s, "cpu")) print("loaded mlp state") self.load_state_dict(mlp_state, strict=config.load_strict) log_class_usage(__class__)
def __init__(self, encoder1, encoder2, decoder, output_layer, encode_relations) -> None: super().__init__(decoder, output_layer, encode_relations) self.encoder1 = encoder1 self.encoder2 = encoder2 self.encoders = [encoder1, encoder2] log_class_usage(__class__)
def __init__(self, config: Config, output_encoded_layers: bool, **kwarg) -> None: super().__init__(config, output_encoded_layers=output_encoded_layers) assert config.pretrained_encoder.load_path, "Load path cannot be empty." self.encoder = create_module(config.pretrained_encoder) self.representation_dim = self.encoder.encoder.token_embedding.weight.size( -1) log_class_usage(__class__)
def __init__(self, models, loss_weights) -> None: models = nn.ModuleDict(models) super().__init__(None, None, None, None) self.models = models # make this a list to prevent registering in state_dict self._current_model = [next(iter(models.values()))] self.loss_weights = loss_weights log_class_usage(__class__)
def __init__(self, num_tags, labels: Vocabulary, *args) -> None: super().__init__(list(labels), *args) self.crf = CRF( num_tags=num_tags, ignore_index=labels.get_pad_index(Padding.DEFAULT_LABEL_PAD_IDX), default_label_pad_index=Padding.DEFAULT_LABEL_PAD_IDX, ) log_class_usage(__class__)
def __init__(self, encoder, decoder, output_layer, stage=Stage.TRAIN) -> None: super().__init__(stage=stage) self.encoder = encoder self.decoder = decoder self.module_list = [encoder, decoder] self.output_layer = output_layer self.stage = stage log_class_usage(__class__)