def __init__(self, dataset, feature_size, vocab_size): super().__init__() self.dataset = dataset self.feature_size = feature_size hidden_size = args.hid_dim self.hidden_size = hidden_size self.emb_dim = args.emb_dim # Resnet Feature Extractor resnet_extractor = models.resnet101(pretrained=True) modules = list(resnet_extractor.children())[:-2] self.resnet_extractor = nn.Sequential(*modules) for p in self.resnet_extractor.parameters(): p.requires_grad = False # ResNet Feat Mean and Std self.feat_mean = torch.from_numpy(dataset.feat_mean).cuda() self.feat_std = torch.from_numpy(dataset.feat_std).cuda() # Image Encoder self.pre_fc = LinearAct(self.feature_size, self.hidden_size, 'relu') # Lang Encoder self.w_emb = nn.Embedding(vocab_size, self.emb_dim) self.drop = nn.Dropout(args.dropout) self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True) # Decision self.classify = nn.Sequential( LinearAct(hidden_size * 3, hidden_size, 'tanh'), self.drop, LinearAct(hidden_size, 1, bias=False))
def __init__(self, dim, hid_dim=None): super().__init__() if hid_dim is None: hid_dim = dim self.dim = dim self.hid_dim = hid_dim self.q_fc = LinearAct(dim, hid_dim, bias=False) self.k_fc = LinearAct(dim, hid_dim, bias=False) self.mix = LinearAct(3 * dim, dim, 'relu', bias=False)
def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None): super().__init__() self.hid_dim = hid_dim if out_dim is None: out_dim = hid_dim self.query_linear = LinearAct(query_dim, hid_dim) self.src_linear = LinearAct(feat_dim, hid_dim) self.trg_linear = self.src_linear self.out_linear = LinearAct(hid_dim + query_dim, out_dim)
def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None): super().__init__() self.hid_dim = hid_dim if out_dim is None: out_dim = hid_dim self.query_linear = LinearAct(query_dim, hid_dim, bias=False) self.src_linear = LinearAct(feat_dim, hid_dim, bias=False) self.trg_linear = self.src_linear self.out_linear = LinearAct(hid_dim * 4 + query_dim, out_dim, act='relu', bias=False)
def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None): super().__init__() self.hid_dim = hid_dim if out_dim is None: out_dim = hid_dim self.query_linear = LinearAct(query_dim, hid_dim) self.src_linear = LinearAct(feat_dim, hid_dim) self.trg_linear = self.src_linear # self.alpha = 1. self.alpha = nn.Parameter(torch.ones(1)) self.out_linear = LinearAct(hid_dim * 2 + query_dim, out_dim, act='relu')
def __init__(self, ntoken, ctx_size): super().__init__() hidden_size = args.hid_dim self.hidden_size = hidden_size self.ctx_size = ctx_size self.emb_dim = args.emb_dim self.img_fc = LinearAct(ctx_size, hidden_size, 'tanh') self.w_emb = nn.Embedding(ntoken, self.emb_dim) self.drop = nn.Dropout(args.dropout) self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True) self.att = DynamicAttention2( query_dim=hidden_size, feat_dim=hidden_size, hid_dim=hidden_size, out_dim=hidden_size ) self.projection = LinearAct(hidden_size, ntoken)
def __init__(self, ntoken, ctx_size, heads=1): super().__init__() hidden_size = args.hid_dim self.hidden_size = hidden_size self.ctx_size = ctx_size self.emb_dim = args.emb_dim self.heads = heads self.img_fc = LinearAct(ctx_size, hidden_size, 'tanh') self.w_emb = nn.Embedding(ntoken, self.emb_dim) self.drop = nn.Dropout(args.dropout) self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True) # self.attention_layers = [] # for _ in range(self.heads): # self.attention_layers.append( # TransformerAttention(hidden_size, ctx_size, hidden_size, merge_info=True).cuda() # ) self.attention_layers = nn.ModuleList([ OneAttention(hidden_size, hidden_size) for _ in range(self.heads) ]) self.projection = LinearAct(hidden_size, ntoken)
def __init__(self, ntoken, ctx_size, heads=2): super().__init__() hidden_size = args.hid_dim self.hidden_size = hidden_size self.ctx_size = ctx_size self.emb_dim = args.emb_dim self.heads = heads self.img_fc = LinearAct(ctx_size, hidden_size, 'tanh') self.w_emb = nn.Embedding(ntoken, self.emb_dim) self.drop = nn.Dropout(args.dropout) self.lstm = nn.LSTM(self.emb_dim, hidden_size, batch_first=True) self.att = DynamicAttention5( query_dim=hidden_size, feat_dim=hidden_size, hid_dim=hidden_size, out_dim=hidden_size ) self.attention_layers = nn.ModuleList([ OneAttention(hidden_size, hidden_size) for _ in range(self.heads) ]) self.projection = LinearAct(hidden_size, ntoken)
def __init__(self, query_dim, feat_dim, hid_dim, out_dim=None): super().__init__() self.hid_dim = hid_dim if out_dim is None: out_dim = hid_dim self.query_linear = LinearAct(query_dim, hid_dim) self.src_linear = LinearAct(feat_dim, hid_dim) self.trg_linear = LinearAct(feat_dim, hid_dim) self.src_value_linear = LinearAct(feat_dim, hid_dim) self.trg_value_linear = LinearAct(feat_dim, hid_dim) self.out_linear = LinearAct(hid_dim * 2 + query_dim, out_dim, act='relu')
def __init__(self, dim): super().__init__() self.fc = nn.Sequential(LinearAct(dim, dim // 2, 'relu', bias=False), LinearAct(dim // 2, 1, bias=False))