def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): label_vocab = tensorizers["tokens"].labels_vocab vocab = tensorizers["tokens"].vocab encoder = create_module( config.encoder, output_encoded_layers=True, padding_idx=vocab.get_pad_index(), vocab_size=vocab.__len__(), ) decoder = create_module( config.decoder, in_dim=encoder.representation_dim, out_dim=len(label_vocab) ) output_layer = create_module(config.output_layer, labels=label_vocab) return cls(encoder, decoder, output_layer)
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): # Although the RNN params are configurable, for DrQA we want to set # the following parameters for all cases. config.ques_rnn.dropout = config.dropout config.doc_rnn.dropout = config.dropout embedding = cls.create_embedding(config, tensorizers) ques_aligned_doc_attn = SequenceAlignedAttention( embedding.embedding_dim) ques_rnn = create_module(config.ques_rnn, input_size=embedding.embedding_dim) doc_rnn = create_module(config.doc_rnn, input_size=embedding.embedding_dim * 2) ques_self_attn = DotProductSelfAttention(ques_rnn.representation_dim) start_attn = MultiplicativeAttention(doc_rnn.representation_dim, ques_rnn.representation_dim, normalize=False) end_attn = MultiplicativeAttention(doc_rnn.representation_dim, ques_rnn.representation_dim, normalize=False) doc_rep_pool = SelfAttention( SelfAttention.Config(dropout=config.dropout), n_input=doc_rnn.representation_dim, ) has_answer_labels = ["False", "True"] tensorizers["has_answer"].vocab = Vocabulary(has_answer_labels) has_ans_decoder = MLPDecoder( config=MLPDecoder.Config(), in_dim=doc_rnn.representation_dim, out_dim=len(has_answer_labels), ) output_layer = create_module(config.output_layer, labels=has_answer_labels, is_kd=config.is_kd) return cls( dropout=nn.Dropout(config.dropout), embedding=embedding, ques_rnn=ques_rnn, doc_rnn=doc_rnn, ques_self_attn=ques_self_attn, ques_aligned_doc_attn=ques_aligned_doc_attn, start_attn=start_attn, end_attn=end_attn, doc_rep_pool=doc_rep_pool, has_ans_decoder=has_ans_decoder, output_layer=output_layer, is_kd=config.is_kd, )
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): vocab = tensorizers["tokens"].vocab labels = tensorizers["labels"].labels embedding = WordEmbedding(len(vocab), config.embedding.embed_dim, None, None, vocab.idx[UNK], []) representation = create_module(config.representation, embed_dim=embedding.embedding_dim) decoder = create_module( config.decoder, in_dim=representation.representation_dim, out_dim=len(labels), ) output_layer = ClassificationOutputLayer(labels, CrossEntropyLoss(None)) return cls(embedding, representation, decoder, output_layer)
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): labels = tensorizers["labels"].labels embedding = cls.create_embedding(config, tensorizers) representation = create_module( config.representation, embed_dim=embedding.embedding_dim ) decoder = create_module( config.decoder, in_dim=representation.representation_dim, out_dim=len(labels), ) # TODO change from_config function of ClassificationOutputLayer after migriting to new design output_layer = ClassificationOutputLayer( list(labels), create_loss(config.output_layer.loss) ) return cls(embedding, representation, decoder, output_layer)
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.word_rep = create_module(config.word_representation, embed_dim) self.word_representation_dim = self.word_rep.representation_dim self.doc_representation_dim = self.word_rep.representation_dim self.pooling_type = config.pooling_type log_class_usage(__class__)
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): labels = tensorizers["labels"].vocab embedding = cls.create_embedding(config, tensorizers) representation = create_module( config.representation, embed_dim=embedding.embedding_dim ) decoder = cls.create_decoder( config, representation.representation_dim, len(labels) ) label_weights = ( get_label_weights(labels.idx, config.output_layer.label_weights) if config.output_layer.label_weights else None ) loss = create_loss(config.output_layer.loss, weight=label_weights) if isinstance(loss, BinaryCrossEntropyLoss): output_layer_cls = BinaryClassificationOutputLayer elif isinstance(loss, MultiLabelSoftMarginLoss): output_layer_cls = MultiLabelOutputLayer else: output_layer_cls = MulticlassOutputLayer output_layer = output_layer_cls(list(labels), loss) return cls(embedding, representation, decoder, output_layer)
def _create_encoder(cls, config: Config, tensorizers: Dict[str, Tensorizer]) -> nn.ModuleList: encoder1 = create_module( config.encoder, padding_idx=tensorizers["tokens1"].vocab.get_pad_index(), vocab_size=len(tensorizers["tokens1"].vocab), ) if config.shared_encoder: encoder2 = encoder1 else: encoder2 = create_module( config.encoder, padding_idx=tensorizers["tokens2"].vocab.get_pad_index(), vocab_size=len(tensorizers["tokens2"].vocab), ) return encoder1, encoder2
def __init__(self, config: Config, embed_dim: int) -> None: """embed_dim is the dimension of embedded_tokens """ super().__init__(config) self.dropout = nn.Dropout(config.dropout) # Document attention. self.attention = ( create_module(config.pooling, n_input=embed_dim) if config.pooling is not None else None ) # Non-linear projection over attended representation. self.dense = None if ( isinstance(config.pooling, BoundaryPool.Config) and config.pooling.boundary_type == "firstlast" ): # the dimension double because of concatenating bos and eos self.representation_dim = embed_dim * 2 else: self.representation_dim = embed_dim if config.mlp_decoder: self.dense = MLPDecoder(config.mlp_decoder, in_dim=embed_dim) self.representation_dim = self.dense.out_dim log_class_usage(__class__)
def from_config(cls, config, tensorizers: Dict[str, Tensorizer]): model = super().from_config(config, tensorizers) user_embedding = create_module( config.user_embedding, tensorizer=tensorizers["uid"], init_from_saved_state=config.init_from_saved_state, ) # Init user embeddings to be a same vector because we assume user features # are not too different from each other. emb_shape = user_embedding.word_embedding.weight.data.shape with torch.no_grad(): user_embedding.word_embedding.weight.copy_( torch.rand(emb_shape[1]).repeat(emb_shape[0], 1) ) labels = tensorizers["labels"].vocab decoder = cls.create_decoder( config, model.representation.representation_dim + user_embedding.embedding_dim, len(labels), ) return cls( model.embedding, model.representation, decoder, model.output_layer, user_embedding, )
def create_embedding(cls, config: Config, tensorizers: Dict[str, Tensorizer]): return create_module( config.embedding, tensorizer=tensorizers["tokens"], init_from_saved_state=config.init_from_saved_state, )
def __init__(self, config: Config, embed_dim: int) -> None: super().__init__(config) self.dropout = nn.Dropout(config.dropout) # BiLSTM representation. self.lstm = create_module(config.lstm, embed_dim=embed_dim) # Slot attention. self.attention = None word_representation_dim = self.lstm.representation_dim if config.slot_attention: self.attention = SlotAttention( config.slot_attention, self.lstm.representation_dim, batch_first=True ) word_representation_dim += self.lstm.representation_dim # Projection over attended representation. self.dense = None self.representation_dim: int = self.lstm.representation_dim if config.mlp_decoder: self.dense = MLPDecoder( config.mlp_decoder, in_dim=self.lstm.representation_dim ) self.representation_dim = self.dense.out_dim
def __init__(self, config: Config, embed_dim: Tuple[int, ...]) -> None: super().__init__(config) assert len(embed_dim) == 2 self.sen_rep = create_module(config.sen_representation, embed_dim=embed_dim[1]) self.sen_representation_dim = self.sen_rep.representation_dim self.seq_rep = create_module(config.seq_representation, embed_dim=self.sen_representation_dim) self.seq_representation_dim = self.seq_rep.representation_dim self.joint_rep = create_module( config.joint_representation, embed_dim=embed_dim[0] + self.seq_representation_dim, ) self.doc_representation_dim = self.joint_rep.doc_representation_dim self.word_representation_dim = self.joint_rep.word_representation_dim
def from_config(cls, config, kernel_size): conv = create_module( config.encoder_conv_type, input_size=config.encoder_conv_dim, kernel_size=kernel_size, convolution_type="non-causal", ) if config.self_attention_type is not None: self_attention = create_module( config.self_attention_type, config.encoder_embed_dim, config.self_attention_heads, ) else: self_attention = None return cls(**config._asdict(), conv=conv, self_attention=self_attention)
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): encoder1, encoder2 = cls._create_encoder(config, tensorizers) decoder = cls._create_decoder(config, [encoder1, encoder2], tensorizers) output_layer = create_module( config.output_layer, labels=tensorizers["labels"].vocab ) return cls(encoder1, encoder2, decoder, output_layer, config.encode_relations)
def __init__(self, config: Config, output_encoded_layers: bool, **kwarg) -> None: super().__init__(config, output_encoded_layers=output_encoded_layers) assert config.pretrained_encoder.load_path, "Load path cannot be empty." self.encoder = create_module(config.pretrained_encoder) self.representation_dim = self.encoder.encoder.token_embedding.weight.size( -1)
def __init__( self, model: CNNModel, length_prediction_model: MaskedLengthPredictionModule, output_layer: NARSeq2SeqOutputLayer, src_vocab: Vocabulary, trg_vocab: Vocabulary, dictfeat_vocab: Vocabulary, tensorizer=None, generator_config=None, config: Config = None, ): super().__init__( model, output_layer, src_vocab, trg_vocab, dictfeat_vocab, generator_config=None, ) self.quantize = generator_config.quantize self.length_prediction_model = length_prediction_model self.sequence_generator_builder = ( lambda model, length_prediction_model, quantize: create_module( generator_config, model, length_prediction_model, trg_vocab, quantize ) ) self.force_eval_predictions = generator_config.force_eval_predictions self.generate_predictions_every = generator_config.generate_predictions_every self.tensorizer = tensorizer
def construct_length_prediction_module( cls, config: Config, ): return create_module( config.length_prediction_model, config.encoder_decoder.encoder.encoder_config.encoder_embed_dim, )
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): embeddings = cls._create_embeddings(config, tensorizers) representations = cls._create_representations(config, embeddings) decoder = cls._create_decoder(config, representations, tensorizers) output_layer = create_module(config.output_layer, labels=tensorizers["labels"].vocab) return cls(embeddings, representations, decoder, output_layer, config.encode_relations)
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): if config.use_shared_embedding: token_embedding = torch.nn.Embedding( config.vocab_size, config.hidden_dim, padding_idx=config.padding_idx) else: token_embedding = None right_vocab = tensorizers["right_tokens"].vocab right_encoder = create_module( config.right_encoder, token_embedding=token_embedding, padding_idx=right_vocab.get_pad_index(), vocab_size=len(right_vocab), ) left_vocab = tensorizers["left_tokens"].vocab left_encoder = create_module( config.left_encoder, token_embedding=token_embedding, padding_idx=left_vocab.get_pad_index(), vocab_size=len(left_vocab), ) right_dense_dim = tensorizers["right_dense"].dim left_dense_dim = tensorizers["left_dense"].dim decoder = create_module( config.decoder, right_dim=right_encoder.representation_dim + right_dense_dim, left_dim=left_encoder.representation_dim + left_dense_dim, to_dim=1, ) output_layer = RegressionOutputLayer.from_config(config.output_layer) return cls( right_encoder, left_encoder, decoder, output_layer, config.use_shared_encoder, config.use_shared_embedding, config.vocab_size, config.hidden_dim, config.padding_idx, )
def from_config(cls, config, tgt_dict, tgt_embedding): kernel_size_list = config.decoder_kernel_size_list layers = [] for size in kernel_size_list: assert (config.decoder_config.decoder_embed_dim == config.layer_config.decoder_embed_dim) layers.append(create_module(config.layer_config, kernel_size=size)) return cls(tgt_dict, tgt_embedding, layers, config.decoder_config)
def from_config( cls, config: Config, doc_meta: Optional[FieldMeta] = None, word_meta: Optional[FieldMeta] = None, doc_labels: Optional[Vocabulary] = None, word_labels: Optional[Vocabulary] = None, ): if word_labels and doc_labels: return cls( create_module(config.doc_output, labels=doc_labels), create_module(config.word_output, labels=word_labels), ) else: return cls( create_module(config.doc_output, metadata=doc_meta), create_module(config.word_output, metadata=word_meta), )
def _create_decoder( cls, config: Config, representations: nn.ModuleList, tensorizers: Dict[str, Tensorizer], ): labels = tensorizers["labels"].vocab num_reps = len(representations) rep_dim = representations[0].representation_dim decoder_in_dim = num_reps * rep_dim if config.encode_relations: decoder_in_dim += 2 * comb(num_reps, 2, exact=True) * rep_dim decoder = create_module( config.decoder, in_dim=decoder_in_dim, out_dim=len(labels) ) output_layer = create_module(config.output_layer, labels=labels) return decoder, output_layer
def _create_representations(cls, config: Config, embeddings: nn.ModuleList): if config.shared_representations: # create representation once and used for all embeddings embedding_dim = embeddings[0].embedding_dim representations = nn.ModuleList( itertools.repeat( create_module(config.representation, embed_dim=embedding_dim), len(embeddings), )) else: representations = nn.ModuleList([ create_module(config.representation, embed_dim=embedding.embedding_dim) for embedding in embeddings ]) return representations
def create_decoder(cls, config: Config, representation_dim: int, num_labels: int): num_decoder_modules = 0 in_dim = representation_dim if hasattr(config.inputs, "dense") and config.inputs.dense: num_decoder_modules += 1 in_dim += config.inputs.dense.dim decoder = create_module(config.decoder, in_dim=in_dim, out_dim=num_labels) decoder.num_decoder_modules = num_decoder_modules return decoder
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): # merge tensorizer vocab vocab_builder = VocabBuilder() for tensorizer in tensorizers.values(): vocab_builder.add_all(tensorizer.vocab.counts) merged_vocab = vocab_builder.make_vocab() for tensorizer in tensorizers.values(): tensorizer.vocab = merged_vocab # create embeddings positive_emb = create_module( config.embedding, None, tensorizers["pos_response"] ) negative_emb = positive_emb query_emb = positive_emb embeddings = nn.ModuleList([positive_emb, negative_emb, query_emb]) embedding_dim = embeddings[0].embedding_dim # create representations positive_repr = create_module(config.representation, embed_dim=embedding_dim) negative_repr = positive_repr query_repr = ( positive_repr if config.shared_representations else create_module(config.representation, embed_dim=embedding_dim) ) representations = nn.ModuleList([positive_repr, negative_repr, query_repr]) # representation.representation_dim: tuple(2, actual repr dim) decoder = create_module( config.decoder, from_dim=representations[0].representation_dim, to_dim=config.decoder_output_dim, ) output_layer = create_module(config.output_layer) return cls( embeddings, representations, decoder, output_layer, encode_relations=False, shared_representations=config.shared_representations, )
def from_config(cls, config: Config, tensorizers: Dict[str, Tensorizer]): labels = tensorizers["labels"].vocab if not labels: raise ValueError("Labels were not created, see preceding errors") right_vocab = tensorizers["right_tokens"].vocab right_encoder = create_module( config.right_encoder, padding_idx=right_vocab.get_pad_index(), vocab_size=len(right_vocab), ) left_vocab = tensorizers["left_tokens"].vocab left_encoder = create_module( config.left_encoder, padding_idx=left_vocab.get_pad_index(), vocab_size=len(left_vocab), ) right_dense_dim = tensorizers["right_dense"].dim left_dense_dim = tensorizers["left_dense"].dim decoder = create_module( config.decoder, right_dim=right_encoder.representation_dim + right_dense_dim, left_dim=left_encoder.representation_dim + left_dense_dim, to_dim=len(labels), ) label_weights = (get_label_weights(labels.idx, config.output_layer.label_weights) if config.output_layer.label_weights else None) loss = create_loss(config.output_layer.loss, weight=label_weights) if isinstance(loss, BinaryCrossEntropyLoss): output_layer_cls = BinaryClassificationOutputLayer elif isinstance(loss, MultiLabelSoftMarginLoss): output_layer_cls = MultiLabelOutputLayer else: output_layer_cls = MulticlassOutputLayer output_layer = output_layer_cls(list(labels), loss) return cls(right_encoder, left_encoder, decoder, output_layer)
def from_config(cls, config, src_dict, src_embedding): kernel_size_list = config.encoder_kernel_size_list layers = [] # Overwrite the config.layer_config.encoder_embed_dim so that it will always match with config.encoder_config.encoder_embed_dim config.layer_config.encoder_embed_dim = config.encoder_config.encoder_embed_dim for size in kernel_size_list: assert (config.encoder_config.encoder_embed_dim == config.layer_config.encoder_embed_dim) layers.append(create_module(config.layer_config, kernel_size=size)) return cls(src_dict, src_embedding, layers, config.encoder_config)
def from_config(cls, config: Config, tensorizers): has_answer_labels = ["False", "True"] tensorizers["has_answer"].vocab = Vocabulary(has_answer_labels) vocab = tensorizers["squad_input"].vocab encoder = create_module( config.encoder, output_encoded_layers=True, padding_idx=vocab.get_pad_index(), vocab_size=vocab.__len__(), ) decoder = create_module( config.decoder, in_dim=encoder.representation_dim, out_dim=2 ) has_ans_decoder = create_module( config.decoder, in_dim=encoder.representation_dim, out_dim=len(has_answer_labels), ) output_layer = create_module(config.output_layer, labels=has_answer_labels) return cls(encoder, decoder, has_ans_decoder, output_layer)
def __init__(self, config: Config, output_encoded_layers: bool, **kwarg) -> None: config.pretrained_encoder.load_path = ( resources.roberta.RESOURCE_MAP[config.pretrained_encoder.load_path] if config.pretrained_encoder.load_path in resources.roberta.RESOURCE_MAP else config.pretrained_encoder.load_path ) super().__init__(config, output_encoded_layers=output_encoded_layers) assert config.pretrained_encoder.load_path, "Load path cannot be empty." self.encoder = create_module(config.pretrained_encoder) self.representation_dim = self.encoder.encoder.token_embedding.weight.size(-1) log_class_usage(__class__)
def create_embedding(cls, model_config: Config, tensorizers: Dict[str, Tensorizer]): squad_tensorizer = tensorizers["squad_input"] # Initialize the embedding module. embedding_module = create_module(model_config.embedding, None, squad_tensorizer) # Set ques and doc tensorizer vocab to squad_tensorizer.vocab. squad_tensorizer.ques_tensorizer.vocab = squad_tensorizer.vocab squad_tensorizer.doc_tensorizer.vocab = squad_tensorizer.vocab return embedding_module