def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() self.__C = __C self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) # spacy_tool = en_vectors_web_lg.load() # cls_vector = np.expand_dims(spacy_tool('CLS').vector, axis=0) # pretrained_emb = np.concatenate((cls_vector, pretrained_emb), axis=0) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True) self.adapter = Adapter(__C) self.norm1 = LayerNorm(__C.HIDDEN_SIZE) self.norm2 = LayerNorm(__C.HIDDEN_SIZE) self.backbone = UnifiedLayers(__C) # Flatten to vector self.flat = TokenFlat(token_pos=0) # Classification layers # self.proj_norm = LayerNorm(__C.HIDDEN_SIZE) self.proj = nn.Linear(__C.HIDDEN_SIZE, answer_size)
def __init__(self, in_channels, mid_channels, out_channels, split_num, dropout_rate=0, ops_order='full'): super(CoGraphLayer, self).__init__() self.in_channels = in_channels # self.out_channels = out_channels self.split_num = split_num self.dropout_rate = dropout_rate self.ops_order = ops_order self.add_module( 'mhseatt', MHSEAtt(in_channels, out_channels, split_num, dropout_rate)) self.add_module( 'ffn', FFN(in_channels, mid_channels, out_channels, dropout_rate)) self.add_module('dropout1', nn.Dropout(dropout_rate)) self.add_module('norm1', LayerNorm(out_channels)) self.add_module('dropout2', nn.Dropout(dropout_rate)) self.add_module('norm2', LayerNorm(out_channels)) """ modules """
def __init__(self, __C): super(SA, self).__init__() self.mhatt = MHAtt(__C) self.ffn = FFN(__C) self.dropout1 = nn.Dropout(__C.DROPOUT_R) self.norm1 = LayerNorm(__C.HIDDEN_SIZE) self.dropout2 = nn.Dropout(__C.DROPOUT_R) self.norm2 = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() self.__C = __C self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) #self.lstm = nn.LSTM( # input_size=__C.WORD_EMBED_SIZE, # hidden_size=__C.HIDDEN_SIZE, # num_layers=1, # batch_first=True #) self.lstm = LSTM_MultiModal(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, second_input_size=2048) self.adapter = Adapter(__C) self.backbone = MCA_ED(__C) # Flatten to vector self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) # Classification layers self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() self.__C = __C self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True) self.adapter = Adapter(__C) self.backbone = NAS_ED(__C) # Projection of relation embedding self.linear_rel = nn.Linear(4, __C.REL_SIZE) self.relu = nn.ReLU() # Flatten to vector self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) # Classification layers self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, __C, pretrained_emb, token_size, answer_size, token_to_ix): super(Net, self).__init__() self.__C = __C self.token_to_ix = token_to_ix self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) # Loading the GloVe embedding weights if __C.USE_GLOVE: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True, bidirectional=True) self.lstm_proj = nn.Linear(__C.HIDDEN_SIZE * 2, __C.HIDDEN_SIZE) self.token_proj = nn.Linear(__C.WORD_EMBED_SIZE, __C.HIDDEN_SIZE) self.adapter = Adapter(__C) self.backbone = VQA_BERT(__C) self.text_pooler = BERTPooler(__C) self.img_pooler = BERTPooler(__C) # Classification layers self.dense = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE) self.activation = nn.Tanh() self.layer_norm = LayerNorm(__C.HIDDEN_SIZE) self.dropout = nn.Dropout(__C.DROPOUT_R) self.cls = nn.Linear(__C.HIDDEN_SIZE, answer_size)
def __init__(self, __C): super(GA, self).__init__() self.mhatt = MHAtt(__C) self.dropout = nn.Dropout(__C.DROPOUT_R) self.norm = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, size=1024): super(RSA, self).__init__() self.mhatt = RelMHAtt(__C) self.dropout = nn.Dropout(__C.DROPOUT_R) self.norm = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, pretrained_emb, token_size, answer_size, token_to_ix): super(Net, self).__init__() self.__C = __C self.token_to_ix = token_to_ix self.word_embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE) # self.text_position_embeddings = nn.Embedding(43, __C.HIDDEN_SIZE) # Loading the GloVe embedding weights self.word_embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) # Segment embedding self.segment_embedding = nn.Embedding(2, __C.HIDDEN_SIZE) self.lstm = nn.LSTM(input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True, bidirectional=True) self.lstm_proj = nn.Linear(__C.HIDDEN_SIZE * 2, __C.HIDDEN_SIZE) self.cls_project = nn.Linear(__C.WORD_EMBED_SIZE, __C.HIDDEN_SIZE) self.img_encoder = Adapter(__C) self.img_pos_emb = nn.Linear(2, __C.HIDDEN_SIZE) self.transformer = Transformer(__C) self.layer_norm1 = LayerNorm(__C.HIDDEN_SIZE) self.embbeding_dropout = nn.Dropout(__C.DROPOUT_R) # Classification layers self.pooler = TransformerPooler(__C) self.dense = nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE) self.activation = nn.Tanh() self.layer_norm2 = LayerNorm(__C.HIDDEN_SIZE) self.cls_dropout = nn.Dropout(__C.DROPOUT_R) self.classifier = nn.Linear(__C.HIDDEN_SIZE, answer_size)
def __init__(self, __C): super(FFN, self).__init__() self.mlp = MLP( in_size=__C.HIDDEN_SIZE, mid_size=__C.HIDDEN_SIZE * 4, out_size=__C.HIDDEN_SIZE, dropout_r=__C.DROPOUT_R, use_relu=True ) self.dropout = nn.Dropout(__C.DROPOUT_R) self.norm = LayerNorm(__C.HIDDEN_SIZE)
def __init__(self, __C, pretrained_emb, token_size, answer_size): super(Net, self).__init__() self.__C = __C if self.__C.BERT_ENCODER: self.encoder = BertModel.from_pretrained(self.__C.BERT_VER) elif not self.__C.BERT_ENCODER and self.__C.USE_BERT: self.bert_layer = BertModel.from_pretrained(self.__C.BERT_VER, output_hidden_states=True) # Freeze BERT layers for param in self.bert_layer.parameters(): param.requires_grad = False # Loading the GloVe embedding weights elif __C.USE_GLOVE: self.embedding = nn.Embedding( num_embeddings=token_size, embedding_dim=__C.WORD_EMBED_SIZE ) self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) self.lstm = nn.LSTM( input_size=__C.WORD_EMBED_SIZE, hidden_size=__C.HIDDEN_SIZE, num_layers=1, batch_first=True ) self.adapter = Adapter(__C) self.backbone = MCA_ED(__C) # Flatten to vector self.attflat_img = AttFlat(__C) self.attflat_lang = AttFlat(__C) # Classification layers self.proj_norm = LayerNorm(__C.FLAT_OUT_SIZE) self.proj = nn.Linear(__C.FLAT_OUT_SIZE, answer_size)
def __init__(self, size, ans_size): super(Classifier, self).__init__() self.proj_norm = LayerNorm(size) self.proj = nn.Linear(size, ans_size)