Ejemplo n.º 1
0
 def __init__(self, in_features, mid_features, out_features, drop):
     super().__init__()
     self.drop = nn.Dropout(drop)
     self.lin11 = nn.Linear(in_features[0], mid_features)
     self.lin12 = nn.Linear(in_features[1], mid_features)
     self.lin2 = FCNet(mid_features, mid_features, activate='relu')
     self.lin3 = FCNet(mid_features, out_features)
Ejemplo n.º 2
0
    def __init__(self,
                 num_block,
                 v_size,
                 q_size,
                 output_size,
                 num_inter_head,
                 num_intra_head,
                 drop=0.0):
        super(MultiBlock, self).__init__()
        self.v_size = v_size
        self.q_size = q_size
        self.output_size = output_size
        self.num_inter_head = num_inter_head
        self.num_intra_head = num_intra_head
        self.num_block = num_block

        self.v_lin = FCNet(v_size, output_size, drop=drop)
        self.q_lin = FCNet(q_size, output_size, drop=drop)
        self.trans_lin = FCNet(q_size, output_size, drop=drop)

        blocks = []
        for i in range(num_block):
            blocks.append(
                OneSideInterModalityUpdate(output_size, output_size,
                                           output_size, num_inter_head, drop))
            blocks.append(
                OneSideInterModalityUpdate(output_size, output_size,
                                           output_size, num_inter_head, drop))
            blocks.append(
                DyIntraModalityUpdate(output_size, output_size, output_size,
                                      num_intra_head, drop))
        self.multi_blocks = nn.ModuleList(blocks)
Ejemplo n.º 3
0
    def __init__(self,
                 num_block,
                 v_size,
                 q_size,
                 output_size,
                 num_inter_head,
                 num_intra_head,
                 drop=0.0):
        super(SingleBlock, self).__init__()
        self.v_size = v_size
        self.q_size = q_size
        self.output_size = output_size
        self.num_inter_head = num_inter_head
        self.num_intra_head = num_intra_head
        self.num_block = num_block

        self.v_lin = FCNet(v_size, output_size, drop=drop)
        self.q_lin = FCNet(q_size, output_size, drop=drop)

        self.v2q_interBlock = OneSideInterModalityUpdate(
            output_size, output_size, output_size, num_inter_head, drop)
        self.q2v_interBlock = OneSideInterModalityUpdate(
            output_size, output_size, output_size, num_inter_head, drop)
        self.intraBlock = DyIntraModalityUpdate(output_size, output_size,
                                                output_size, num_intra_head,
                                                drop)
Ejemplo n.º 4
0
 def __init__(self, in_features, mid_features, out_features, drop=0.0):
     super(Classifier, self).__init__()
     self.lin1 = FCNet(in_features,
                       mid_features,
                       activate='relu',
                       drop=drop / 2.5)
     self.lin2 = FCNet(mid_features, out_features, drop=drop)
Ejemplo n.º 5
0
    def __init__(self, src_size, tgt_size, output_size, num_head, drop=0.0):
        super(OneSideInterModalityUpdate, self).__init__()
        self.src_size = src_size
        self.tgt_size = tgt_size
        self.output_size = output_size
        self.num_head = num_head

        self.src_lin = FCNet(src_size, output_size * 2, drop=drop)
        self.tgt_lin = FCNet(tgt_size, output_size, drop=drop)

        self.tgt_output = FCNet(output_size + tgt_size, output_size, drop=drop)
Ejemplo n.º 6
0
    def __init__(self, hidden_size, mid_size, num_token):
        super(TimeReasoningCell, self).__init__()
        self.linear_h = FCNet(hidden_size, mid_size, activate='relu')
        self.linear_q = FCNet(mid_size, mid_size, activate='relu')

        self.hidden_size = hidden_size
        # self.bilinear = modulars.BiAttention(mid_size, hidden_size, hidden_size, glimpse=1)

        self.encoder_prior = Encoder(hidden_size, 1, num_layers=0)
        self.encoder_posterior = Encoder(mid_size, 1, num_layers=0)
        self.encoder_back = Encoder(mid_size, 1, num_layers=0)
Ejemplo n.º 7
0
    def __init__(self, v_features, q_features, mid_features, glimpses, drop=0.0):
        super(BiAttention, self).__init__()
        self.hidden_aug = 3
        self.glimpses = glimpses
        self.lin_v = FCNet(v_features, int(mid_features * self.hidden_aug), activate='relu', drop=drop/2.5)  # let self.lin take care of bias
        self.lin_q = FCNet(q_features, int(mid_features * self.hidden_aug), activate='relu', drop=drop/2.5)
        
        self.h_weight = nn.Parameter(torch.Tensor(1, glimpses, 1, int(mid_features * self.hidden_aug)).normal_())
        self.h_bias = nn.Parameter(torch.Tensor(1, glimpses, 1, 1).normal_())

        self.drop = nn.Dropout(drop)
Ejemplo n.º 8
0
    def __init__(self, v_size, q_size, output_size, num_head, drop=0.0):
        super(InterModalityUpdate, self).__init__()
        self.v_size = v_size
        self.q_size = q_size
        self.output_size = output_size
        self.num_head = num_head

        self.v_lin = FCNet(v_size, output_size * 3, drop=drop)
        self.q_lin = FCNet(q_size, output_size * 3, drop=drop)

        self.v_output = FCNet(output_size + v_size, output_size, drop=drop)
        self.q_output = FCNet(output_size + q_size, output_size, drop=drop)
Ejemplo n.º 9
0
    def __init__(self, **kwargs):
        """
        kwargs:
             vocab,
             dim_hidden, 
             dim_vision,
             mid_size, 
             glimpses,
             dropout_prob,
             device,
        """
        super(Net, self).__init__()
        for k, v in kwargs.items():
            setattr(self, k, v)

        self.num_classes = len(self.vocab['answer_token_to_idx'])

        self.num_token = len(self.vocab['question_token_to_idx'])

        self.classifier = Classifier(
            in_features=(self.glimpses * self.dim_vision, self.mid_size),
            mid_features=self.dim_hidden,
            out_features=self.num_classes,
            drop=self.dropout_prob)

        self.token_embedding = nn.Embedding(self.num_token, self.dim_word)

        for p in self.token_embedding.parameters():
            p.require_grad = False

        self.dropout = nn.Dropout(self.dropout_prob)

        self.text = modulars.BiGRUEncoder_2(
            dim_word=self.dim_word,
            dim_hidden=self.dim_hidden,
        )

        self.vision_to_v = FCNet(self.dim_vision,
                                 self.dim_hidden,
                                 drop=0.3,
                                 bias=False)
        self.map_two_v_to_edge = FCNet(self.dim_hidden * 2,
                                       self.dim_edge,
                                       bias=False)

        self.timereasoning = TimeReasoning(hidden_size=self.dim_vision,
                                           mid_size=self.mid_size,
                                           state_size=self.state_size,
                                           num_token=self.num_token,
                                           edge_size=self.dim_edge)
Ejemplo n.º 10
0
 def __init__(self,
              v_features,
              q_features,
              mid_features,
              num_obj,
              drop=0.0):
     super(ApplySingleAttention, self).__init__()
     self.lin_v = FCNet(v_features,
                        mid_features,
                        activate='relu',
                        drop=drop)  # let self.lin take care of bias
     self.lin_q = FCNet(q_features,
                        mid_features,
                        activate='relu',
                        drop=drop)
     self.lin_atten = FCNet(mid_features, mid_features, drop=drop)
Ejemplo n.º 11
0
    def __init__(self, v_size, q_size, output_size, num_head, drop=0.0):
        super(DyIntraModalityUpdate, self).__init__()
        self.v_size = v_size
        self.q_size = q_size
        self.output_size = output_size
        self.num_head = num_head

        self.v4q_gate_lin = FCNet(v_size, output_size, drop=drop)
        self.q4v_gate_lin = FCNet(q_size, output_size, drop=drop)

        self.v_lin = FCNet(v_size, output_size * 3, drop=drop)
        self.q_lin = FCNet(q_size, output_size * 3, drop=drop)

        self.v_output = FCNet(output_size, output_size, drop=drop)
        self.q_output = FCNet(output_size, output_size, drop=drop)

        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid()
Ejemplo n.º 12
0
 def __init__(self, edge_size, hidden_size):
     super(TransCell, self).__init__()
     # self.linear_r = FCNet(edge_size, edge_size//2)
     self.linear_q = FCNet(hidden_size, edge_size)
     self.linear_out = FCNet(edge_size, 1, drop=0.1)
Ejemplo n.º 13
0
    def __init__(self, **kwargs):
        """
        kwargs:
             vocab,
             dim_hidden, 
             dim_vision,
             mid_size, 
             glimpses,
             dropout_prob,
             device,
        """
        super(Net, self).__init__()
        for k, v in kwargs.items():
            setattr(self, k, v)

        glimpses = 5
        objects = 10

        self.num_classes = len(self.vocab['answer_token_to_idx'])

        self.num_token = len(self.vocab['question_token_to_idx'])

        self.classifier = Classifier(in_features=self.mid_size,
                                     mid_features=self.dim_hidden * 2,
                                     out_features=self.num_classes,
                                     drop=self.dropout_prob)

        self.token_embedding = nn.Embedding(self.num_token, self.dim_word)

        for p in self.token_embedding.parameters():
            p.require_grad = False

        self.dropout = nn.Dropout(self.dropout_prob)

        self.text = modulars.BiGRUEncoder_2(
            dim_word=self.dim_word,
            dim_hidden=self.dim_hidden,
        )

        self.vision_to_v = FCNet(self.dim_vision,
                                 self.dim_hidden,
                                 drop=0.3,
                                 bias=False)
        self.map_two_v_to_edge = FCNet(self.dim_hidden * 2,
                                       self.dim_edge,
                                       bias=False)

        self.timereasoning = TimeReasoning(hidden_size=self.dim_vision,
                                           mid_size=self.mid_size,
                                           state_size=self.state_size,
                                           num_token=self.num_token,
                                           edge_size=self.dim_edge)

        self.count = Counter(objects)

        self.attention = weight_norm(BiAttention(
            v_features=self.dim_vision,
            q_features=self.dim_hidden,
            mid_features=self.dim_hidden,
            glimpses=glimpses,
            drop=0.5,
        ),
                                     name='h_weight',
                                     dim=None)

        self.apply_attention = ApplyAttention(
            v_features=self.dim_vision,
            q_features=self.dim_hidden,
            mid_features=self.dim_hidden,
            glimpses=glimpses,
            num_obj=objects,
            drop=0.2,
        )
Ejemplo n.º 14
0
 def __init__(self, edge_size, hidden_size):
     super(TransCell, self).__init__()
     self.linear_r = FCNet(edge_size, 256)
     self.linear_q = FCNet(hidden_size, 256)
     self.linear_out = FCNet(256, 1)