Example #1
0
 def __init__(self,
              dropout: float,
              lr: float,
              n_classes: int,
              vocab_size: int,
              char_vocab_size: int,
              max_word_len: int,
              pos_len: int,
              chunk_len: int,
              word_emb_dimensions=300,
              char_emb_dimensions=100,
              word_hidden_dim=150,
              char_hidden_dim=100,
              word_emb_weights=None,
              char_emb_weights=None,
              pos_emb_weights=None,
              chunk_emb_weights=None,
              word_emb_trainable=False,
              char_emb_trainable=True,
              word_mask_zero=True,
              char_mask_zero=True):
     embedding = EmbeddingLayer(vocab_size, char_vocab_size, max_word_len,
                                pos_len, chunk_len, word_emb_dimensions,
                                char_emb_dimensions, char_hidden_dim,
                                word_emb_weights, char_emb_weights,
                                pos_emb_weights, chunk_emb_weights,
                                word_emb_trainable, char_emb_trainable,
                                word_mask_zero, char_mask_zero)
     self.inputs, self.emb_layer = embedding.embedding()
     self.word_hidden_dim = word_hidden_dim
     self.dropout = dropout
     self.n_classes = n_classes
     self.crf = CRF(self.n_classes, sparse_target=False)
     self.lr = lr
Example #2
0
    def __init__(self, config, vocab, model_type, model_mode='TRAIN'):
        """
        Hierarchy-Aware Global Model class
        :param config: helper.configure, Configure Object
        :param vocab: data_modules.vocab, Vocab Object
        :param model_type: Str, ('HiAGM-TP' for the serial variant of text propagation,
                                 'HiAGM-LA' for the parallel variant of multi-label soft attention,
                                 'Origin' without hierarchy-aware module)
        :param model_mode: Str, ('TRAIN', 'EVAL'), initialize with the pretrained word embedding if value is 'TRAIN'
        """
        super(HiAGM, self).__init__()
        self.config = config
        self.vocab = vocab
        self.device = config.train.device_setting.device

        self.token_map, self.label_map = vocab.v2i['token'], vocab.v2i['label']

        self.token_embedding = EmbeddingLayer(
            vocab_map=self.token_map,
            embedding_dim=config.embedding.token.dimension,
            vocab_name='token',
            config=config,
            padding_index=vocab.padding_index,
            pretrained_dir=config.embedding.token.pretrained_file,
            model_mode=model_mode,
            initial_type=config.embedding.token.init_type)

        self.dataflow_type = DATAFLOW_TYPE[model_type]

        self.text_encoder = TextEncoder(config)
        self.structure_encoder = StructureEncoder(
            config=config,
            label_map=vocab.v2i['label'],
            device=self.device,
            graph_model_type=config.structure_encoder.type)

        if self.dataflow_type == 'serial':
            self.hiagm = HiAGMTP(config=config,
                                 device=self.device,
                                 graph_model=self.structure_encoder,
                                 label_map=self.label_map)
        elif self.dataflow_type == 'parallel':
            self.hiagm = HiAGMLA(config=config,
                                 device=self.device,
                                 graph_model=self.structure_encoder,
                                 label_map=self.label_map,
                                 model_mode=model_mode)
        else:
            self.hiagm = Classifier(config=config,
                                    vocab=vocab,
                                    device=self.device)
Example #3
0
    def __init__(self, config, label_map, model_mode, graph_model, device):
        """
        Hierarchy-Aware Global Model : (Parallel) Multi-label attention Variant
        :param config: helper.configure, Configure Object
        :param label_map: helper.vocab.Vocab.v2i['label'] -> Dict{str:int}
        :param model_mode: 'TRAIN', 'EVAL'
        :param graph_model: computational graph for graph model
        :param device: torch.device, config.train.device_setting.device
        """
        super(HiAGMLA, self).__init__()

        self.config = config
        self.device = device
        self.label_map = label_map

        self.label_embedding = EmbeddingLayer(
            vocab_map=self.label_map,
            embedding_dim=config.embedding.label.dimension,
            vocab_name='label',
            config=config,
            padding_index=None,
            pretrained_dir=None,
            model_mode=model_mode,
            initial_type=config.embedding.label.init_type)
        self.graph_model = graph_model

        # classifier
        self.linear = nn.Linear(
            len(self.label_map) * config.embedding.label.dimension,
            len(self.label_map))

        # dropout
        self.dropout = nn.Dropout(p=config.model.classifier.dropout)

        self.model_mode = model_mode
    def __init__(self, args):
        """
        Basic initialization of Transformer.

        Arguments
        ---------
        args: <argparse.Namespace> Arguments used for overall process.
        """
        super().__init__()

        self.args = args
        self.num_stacks = self.args.num_stacks
        self.d_model = self.args.d_model
        self.vocab_size = self.args.vocab_size

        self.emb = EmbeddingLayer(self.args)

        encoders = [Encoder(self.args) for _ in range(self.num_stacks)]
        self.encoder_stack = nn.Sequential(*encoders)

        decoders = [Decoder(self.args) for _ in range(self.num_stacks)]
        self.decoder_stack = nn.ModuleList(decoders)

        self.output_linear = nn.Linear(in_features=self.d_model,
                                       out_features=self.vocab_size,
                                       bias=False)
        self.output_linear.weight = self.emb.embedding_layer.weight

        self.softmax = nn.LogSoftmax(dim=-1)
        self.dropout = nn.Dropout(p=0.1)
Example #5
0
    def __init__(self, config, vocab, model_mode='TRAIN'):
        """
        HTCInfoMax Model class
        :param config: helper.configure, Configure Object
        :param vocab: data_modules.vocab, Vocab Object
        :param model_mode: Str, ('TRAIN', 'EVAL'), initialize with the pretrained word embedding if value is 'TRAIN'
        """
        super(HTCInfoMax, self).__init__()
        self.config = config
        self.vocab = vocab
        self.device = config.train.device_setting.device

        self.token_map, self.label_map = vocab.v2i['token'], vocab.v2i['label']
        self.index2label = vocab.i2v['label']

        self.token_embedding = EmbeddingLayer(
            vocab_map=self.token_map,
            embedding_dim=config.embedding.token.dimension,
            vocab_name='token',
            config=config,
            padding_index=vocab.padding_index,
            pretrained_dir=config.embedding.token.pretrained_file,
            model_mode=model_mode,
            initial_type=config.embedding.token.init_type)

        # linear layer used for learning the weights for text_label_mi loss and label_prior_matching loss
        self.labelpriorweight_linear = nn.Linear(
            len(self.label_map) * config.embedding.label.dimension, 1)
        self.text_label_MI_weight_linear = nn.Linear(
            config.embedding.label.dimension, 1)

        self.text_encoder = TextEncoder(config)
        self.structure_encoder = StructureEncoder(
            config=config,
            label_map=vocab.v2i['label'],
            device=self.device,
            graph_model_type=config.structure_encoder.type)

        self.label_prior_d = LabelPriorDiscriminator()
        self.text_label_mi_d = TextLabelMIDiscriminator()

        self.htcinfomax = HiAGMLA(config=config,
                                  device=self.device,
                                  graph_model=self.structure_encoder,
                                  label_map=self.index2label,
                                  model_mode=model_mode)
    def __init__(self, config, label_map, model_mode, graph_model, device):
        """
        Hierarchy-Aware Global Model : (Parallel) Multi-label attention Variant
		paper: Jie Zhou, Chunping Ma, Dingkun Long, Guangwei Xu, Ning Ding, Haoyu Zhang, Pengjun Xie, and Gongshen Liu. Hierarchy-aware global model for hierarchical text classification. ACL 2020.
        :param config: helper.configure, Configure Object
        :param label_map: helper.vocab.Vocab.v2i['label'] -> Dict{str:int}
        :param model_mode: 'TRAIN', 'EVAL'
        :param graph_model: computational graph for graph model
        :param device: torch.device, config.train.device_setting.device
        """
        super(HiAGMLA, self).__init__()

        self.config = config
        self.device = device
        self.label_map = label_map

        self.label_embedding = EmbeddingLayer(
            vocab_map=self.label_map,
            embedding_dim=config.embedding.label.dimension,
            vocab_name='label',
            config=config,
            padding_index=None,
            pretrained_dir=None,
            model_mode=model_mode,
            initial_type=config.embedding.label.init_type)
        self.graph_model = graph_model

        # classifier
        self.linear = nn.Linear(
            len(self.label_map) * config.embedding.label.dimension,
            len(self.label_map))

        # dropout
        self.dropout = nn.Dropout(p=config.model.classifier.dropout)

        self.model_mode = model_mode
        self.label_feature_transform_weight = Parameter(
            torch.Tensor(config.embedding.label.dimension,
                         config.embedding.label.dimension))
        nn.init.xavier_uniform_(self.label_feature_transform_weight)
Example #7
0
class HTCInfoMax(nn.Module):
    def __init__(self, config, vocab, model_mode='TRAIN'):
        """
        HTCInfoMax Model class
        :param config: helper.configure, Configure Object
        :param vocab: data_modules.vocab, Vocab Object
        :param model_mode: Str, ('TRAIN', 'EVAL'), initialize with the pretrained word embedding if value is 'TRAIN'
        """
        super(HTCInfoMax, self).__init__()
        self.config = config
        self.vocab = vocab
        self.device = config.train.device_setting.device

        self.token_map, self.label_map = vocab.v2i['token'], vocab.v2i['label']
        self.index2label = vocab.i2v['label']

        self.token_embedding = EmbeddingLayer(
            vocab_map=self.token_map,
            embedding_dim=config.embedding.token.dimension,
            vocab_name='token',
            config=config,
            padding_index=vocab.padding_index,
            pretrained_dir=config.embedding.token.pretrained_file,
            model_mode=model_mode,
            initial_type=config.embedding.token.init_type)

        # linear layer used for learning the weights for text_label_mi loss and label_prior_matching loss
        self.labelpriorweight_linear = nn.Linear(
            len(self.label_map) * config.embedding.label.dimension, 1)
        self.text_label_MI_weight_linear = nn.Linear(
            config.embedding.label.dimension, 1)

        self.text_encoder = TextEncoder(config)
        self.structure_encoder = StructureEncoder(
            config=config,
            label_map=vocab.v2i['label'],
            device=self.device,
            graph_model_type=config.structure_encoder.type)

        self.label_prior_d = LabelPriorDiscriminator()
        self.text_label_mi_d = TextLabelMIDiscriminator()

        self.htcinfomax = HiAGMLA(config=config,
                                  device=self.device,
                                  graph_model=self.structure_encoder,
                                  label_map=self.index2label,
                                  model_mode=model_mode)

    def optimize_params_dict(self):
        """
        get parameters of the overall model
        :return: List[Dict{'params': Iteration[torch.Tensor],
                           'lr': Float (predefined learning rate for specified module,
                                        which is different from the others)
                          }]
        """
        params = list()
        params.append({'params': self.text_encoder.parameters()})
        params.append({'params': self.token_embedding.parameters()})
        params.append({'params': self.htcinfomax.parameters()})
        return params

    def forward(self, batch):
        """
        forward pass of the overall architecture
        :param batch: DataLoader._DataLoaderIter[Dict{'token_len': List}], each batch sampled from the current epoch
        :return: 
        """

        # get distributed representation of tokens, (batch_size, max_length, embedding_dimension)
        embedding = self.token_embedding(batch['token'].to(
            self.config.train.device_setting.device))

        # get the length of sequences for dynamic rnn, (batch_size, 1)
        seq_len = batch['token_len']
        token_output = self.text_encoder(embedding, seq_len)

        all_labels_feature, logits = self.htcinfomax(token_output)

        text_feature = token_output
        idx = np.random.permutation(text_feature.shape[0])
        negative_text = text_feature[idx, :, :]

        for i, label_index in enumerate(batch['label_list']):
            # Label Selector: select the corresponding labels for each text sample
            label_feature = all_labels_feature[label_index, :]
            label_feature_mean = torch.mean(label_feature, dim=0, keepdim=True)
            if i == 0:
                label_feature_y = label_feature_mean
            else:
                label_feature_y = torch.cat(
                    (label_feature_y, label_feature_mean), dim=0)

        # compute the text-label mutual information maximization loss
        t = text_feature.permute(0, 2, 1)
        t_prime = negative_text.permute(0, 2, 1)
        E_joint = -F.softplus(-self.text_label_mi_d(label_feature_y, t)).mean()
        E_marginal = F.softplus(self.text_label_mi_d(label_feature_y,
                                                     t_prime)).mean()
        text_label_mi_disc_loss = (E_marginal - E_joint)

        # compute the label prior matching loss
        label_totalnum = all_labels_feature.shape[0]
        label_prior_loss = 0.0
        for i in range(label_totalnum):
            label_y = all_labels_feature[i]
            label_prior = torch.rand_like(label_y)
            term_a = torch.log(self.label_prior_d(label_prior)).mean()
            term_b = torch.log(1.0 - self.label_prior_d(label_y)).mean()
            label_prior_loss += -(term_a + term_b)
        label_prior_loss /= label_totalnum

        # loss weight estimator: compute the weights for above two losses
        text_feature_temp = torch.mean(text_feature, dim=1)
        text_feature_mean = torch.mean(text_feature_temp, dim=0)
        text_label_MI_weightlogit = self.text_label_MI_weight_linear(
            text_feature_mean)
        labelprior_weightlogit = self.labelpriorweight_linear(
            all_labels_feature.view(-1))
        fusiongate = F.sigmoid(text_label_MI_weightlogit +
                               labelprior_weightlogit)

        return text_label_mi_disc_loss, label_prior_loss, logits, fusiongate
Example #8
0
class HiAGM(nn.Module):
    def __init__(self, config, vocab, model_type, model_mode='TRAIN'):
        """
        Hierarchy-Aware Global Model class
        :param config: helper.configure, Configure Object
        :param vocab: data_modules.vocab, Vocab Object
        :param model_type: Str, ('HiAGM-TP' for the serial variant of text propagation,
                                 'HiAGM-LA' for the parallel variant of multi-label soft attention,
                                 'Origin' without hierarchy-aware module)
        :param model_mode: Str, ('TRAIN', 'EVAL'), initialize with the pretrained word embedding if value is 'TRAIN'
        """
        super(HiAGM, self).__init__()
        self.config = config
        self.vocab = vocab
        self.device = config.train.device_setting.device

        self.token_map, self.label_map = vocab.v2i['token'], vocab.v2i['label']

        self.token_embedding = EmbeddingLayer(
            vocab_map=self.token_map,
            embedding_dim=config.embedding.token.dimension,
            vocab_name='token',
            config=config,
            padding_index=vocab.padding_index,
            pretrained_dir=config.embedding.token.pretrained_file,
            model_mode=model_mode,
            initial_type=config.embedding.token.init_type)

        self.dataflow_type = DATAFLOW_TYPE[model_type]

        self.text_encoder = TextEncoder(config)
        self.structure_encoder = StructureEncoder(
            config=config,
            label_map=vocab.v2i['label'],
            device=self.device,
            graph_model_type=config.structure_encoder.type)

        if self.dataflow_type == 'serial':
            self.hiagm = HiAGMTP(config=config,
                                 device=self.device,
                                 graph_model=self.structure_encoder,
                                 label_map=self.label_map)
        elif self.dataflow_type == 'parallel':
            self.hiagm = HiAGMLA(config=config,
                                 device=self.device,
                                 graph_model=self.structure_encoder,
                                 label_map=self.label_map,
                                 model_mode=model_mode)
        else:
            self.hiagm = Classifier(config=config,
                                    vocab=vocab,
                                    device=self.device)

    def optimize_params_dict(self):
        """
        get parameters of the overall model
        :return: List[Dict{'params': Iteration[torch.Tensor],
                           'lr': Float (predefined learning rate for specified module,
                                        which is different from the others)
                          }]
        """
        params = list()
        params.append({'params': self.text_encoder.parameters()})
        params.append({'params': self.token_embedding.parameters()})
        params.append({'params': self.hiagm.parameters()})
        return params

    def forward(self, batch):
        """
        forward pass of the overall architecture
        :param batch: DataLoader._DataLoaderIter[Dict{'token_len': List}], each batch sampled from the current epoch
        :return: 
        """

        # get distributed representation of tokens, (batch_size, max_length, embedding_dimension)
        embedding = self.token_embedding(batch['token'].to(
            self.config.train.device_setting.device))

        # get the length of sequences for dynamic rnn, (batch_size, 1)
        seq_len = batch['token_len']

        token_output = self.text_encoder(embedding, seq_len)

        logits = self.hiagm(token_output)

        return logits