Example #1
0
    def __init__(self, dims, act="ReLU", dropout=0):
        super(FCNet, self).__init__()

        layers = []
        for i in range(len(dims) - 2):
            in_dim = dims[i]
            out_dim = dims[i + 1]

            if dropout > 0:
                layers.append(nn.Dropout(dropout))

            layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))

            if act is not None:
                layers.append(getattr(nn, act)())

        if dropout > 0:
            layers.append(nn.Dropout(dropout))

        layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None))

        if act is not None:
            layers.append(getattr(nn, act)())

        self.main = nn.Sequential(*layers)
Example #2
0
    def __init__(self, in_dim, hid_dim, out_dim, dropout):
        super(QAmodel, self).__init__()
        self.grad_reverse = GradReverse.apply

        self.l1 = weight_norm(nn.Linear(in_dim, hid_dim), dim=None)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(dropout, inplace=True)
        self.l2 = weight_norm(nn.Linear(hid_dim, out_dim), dim=None)
Example #3
0
    def __init__(self, x_dim, y_dim, z_dim, glimpse, dropout=[.2, .5]):
        super(BiAttention_both, self).__init__()

        self.glimpse = glimpse
        self.logits_v = weight_norm(BCNet(x_dim, y_dim, z_dim, glimpse, dropout=dropout, k=3), \
            name='h_mat', dim=None)
        self.logits_q = weight_norm(BCNet_q(v_num, x_dim, y_dim, z_dim, glimpse, dropout=dropout, k=3), \
            name='h_mat', dim=None)
Example #4
0
 def __init__(self, v_dim, q_dim, num_hid, dropout=0.2, max_len=35):
     super(CoAttention, self).__init__()
     self.v_proj = FCNet([v_dim, num_hid])
     self.q_proj = FCNet([q_dim, num_hid])
     self.tran_linear = weight_norm(nn.Linear(num_hid, num_hid))
     self.dropout = nn.Dropout(dropout)
     self.linear_q = weight_norm(nn.Linear(max_len, 1), dim=None)
     self.linear_v = weight_norm(nn.Linear(max_len, 1), dim=None)
 def __init__(self, in_dim, hid_dim, out_dim, dropout):
     super(SimpleClassifier, self).__init__()
     layers = [weight_norm(nn.Linear(in_dim, hid_dim), dim=None),
               nn.ReLU(),
               nn.Dropout(dropout, inplace=True),
               weight_norm(nn.Linear(hid_dim, out_dim), dim=None)
               ]
     self.main = nn.Sequential(*layers)
Example #6
0
 def __init__(self, in_dim, out_dim, **kwargs):
     super(WeightNormClassifier, self).__init__()
     layers = [
         weight_norm(nn.Linear(in_dim, kwargs['hidden_dim']), dim=None),
         nn.ReLU(),
         nn.Dropout(kwargs['dropout'], inplace=True),
         weight_norm(nn.Linear(kwargs['hidden_dim'], out_dim), dim=None)
     ]
     self.main = nn.Sequential(*layers)
Example #7
0
 def __init__(self, input_size, name, monitor=None):
     super(HighwayNetwork, self).__init__()
     self.name = name
     self.monitor = monitor
     self.fc_gate = weight_norm(nn.Linear(input_size, input_size,
                                          bias=True),
                                dim=None)
     self.fc = weight_norm(nn.Linear(input_size, input_size, bias=True),
                           dim=None)
 def __init__(self, in_dim, hid_dim, out_dim, dropout):
     super(SimpleClassifier, self).__init__()
     layers = [
         weight_norm(nn.Linear(in_dim, hid_dim), dim=None),
         nn.ReLU(),
         nn.Dropout(dropout, inplace=True),
         weight_norm(nn.Linear(hid_dim, out_dim), dim=None)
     ]
     self.main = nn.Sequential(*layers)
Example #9
0
 def __init__(self, img_feat_size, hidden_size, dropout_r):
     super(AttnMap, self).__init__()
     self.linear_q = weight_norm(nn.Linear(hidden_size, hidden_size),
                                 dim=None)
     self.linear_v = weight_norm(nn.Linear(img_feat_size, img_feat_size),
                                 dim=None)
     self.nonlinear = MLP([img_feat_size + hidden_size, hidden_size],
                          dropout_r=dropout_r)
     self.linear = weight_norm(nn.Linear(hidden_size, 1), dim=None)
Example #10
0
 def __init__(self, hidden_dim, dropout, *args, **kwargs):
     super().__init__(*args, **kwargs)
     layers = [
         weight_norm(nn.Linear(self.in_dim, hidden_dim), dim=None),
         nn.ReLU(),
         nn.Dropout(dropout, inplace=True),
         weight_norm(nn.Linear(hidden_dim, self.out_dim), dim=None),
     ]
     self.main = nn.Sequential(*layers)
Example #11
0
    def __init__(self, dims):
        super(FCNet, self).__init__()

        in_dim = dims[0]
        out_dim = dims[1]
        self.first_lin = weight_norm(nn.Linear(in_dim, out_dim), dim=None)
        self.tanh = nn.Tanh()
        self.second_lin = weight_norm(nn.Linear(in_dim, out_dim), dim=None)
        self.sigmoid = nn.Sigmoid()
Example #12
0
 def __init__(self, __C):
     super(AttnMap, self).__init__()
     self.__C = __C
     self.linear_q = weight_norm(
         nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE), dim=None)
     self.linear_v = weight_norm(
         nn.Linear(__C.IMG_FEAT_SIZE, __C.IMG_FEAT_SIZE), dim=None)
     self.nonlinear = MLP(
         [__C.IMG_FEAT_SIZE + __C.HIDDEN_SIZE, __C.HIDDEN_SIZE], dropout_r=__C.DROPOUT_R)
     self.linear = weight_norm(nn.Linear(__C.HIDDEN_SIZE, 1), dim=None)
Example #13
0
    def __init__(self, dims):
        super(fully_connected_network, self).__init__()
        layers = []
        for i in range(len(dims) - 2):
            layers.append(weight_norm(nn.Linear(dims[i], dims[i+1]), dim=None))
            layers.append(nn.ReLU())
        layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None))
        layers.append(nn.ReLU())

        self.model = nn.Sequential(*layers)
 def __init__(self,
              in_dim: int,
              hid_dim: int,
              out_dim: int,
              dropout: float = 0.):
     super().__init__()
     self.main = nn.Sequential(
         weight_norm(nn.Linear(in_dim, hid_dim), dim=None), nn.ReLU(),
         nn.Dropout(dropout, inplace=True),
         weight_norm(nn.Linear(hid_dim, out_dim), dim=None))
Example #15
0
    def __init__(self, features_dim, decoder_dim, attention_dim, dropout=0.5):

        super(Attention, self).__init__()
        self.features_att = weight_norm(
            nn.Linear(features_dim, attention_dim))  # linear layer to transform encoded image
        self.decoder_att = weight_norm(
            nn.Linear(decoder_dim, attention_dim))  # linear layer to transform decoder's output
        self.full_att = weight_norm(nn.Linear(attention_dim, 1))  # linear layer to calculate values to be softmax-ed
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout)
        self.softmax = nn.Softmax(dim=1)  # softmax layer to calculate weights
Example #16
0
    def __init__(self, decoder_dims, encoder_dims, output_dims, vocab_size):
        super(Output, self).__init__()

        self.lang_linear = weight_norm(nn.Linear(decoder_dims, output_dims))
        self.att_linear = weight_norm(nn.Linear(decoder_dims, output_dims))
        #self.fc_linear = weight_norm(nn.Linear(encoder_dims,output_dims))
        self.relu = nn.ReLU()
        self.fc = weight_norm(nn.Linear(
            output_dims * 2,
            vocab_size))  # linear layer to find scores over vocabulary
        self.dropout = nn.Dropout(p=0.5)
 def __init__(self, dims):
     super(FCNet, self).__init__()
     layers = []
     for i in range(len(dims) - 2):
         in_dim = dims[i]
         out_dim = dims[i + 1]
         layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None))
         layers.append(nn.ReLU())
     layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None))
     layers.append(nn.ReLU())
     self.main = nn.Sequential(*layers)
Example #18
0
 def __init__(self, x_dim, q_dim, hidden_size, drop=0.0):
     super(Attention, self).__init__()
     self.x_dim = x_dim
     self.dropout = nn.Dropout(drop)
     self.x_with_q_att = weight_norm(nn.Linear((x_dim + q_dim),
                                               hidden_size),
                                     dim=None)
     self.fc = FC([x_dim + q_dim, hidden_size])
     self.x_proj = FC([x_dim, hidden_size])  # x_dim -> H
     self.q_proj = FC([q_dim, hidden_size])  # q_dim(H*2) -> H
     self.linear = weight_norm(nn.Linear(hidden_size, 1), dim=None)
Example #19
0
def get_model(num_features):
    model = nn.Sequential(weight_norm(nn.Linear(num_features, 1000)), nn.ELU(),
                          nn.Dropout(0.5), weight_norm(nn.Linear(1000, 1000)),
                          nn.ELU(), nn.Dropout(0.5),
                          weight_norm(nn.Linear(1000, 1000)), nn.ELU(),
                          nn.Dropout(0.5), weight_norm(nn.Linear(1000, 1)))
    for m in model:
        if isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight_v)
            nn.init.kaiming_normal_(m.weight_g)
            nn.init.constant_(m.bias, 0)
    return model.to(DEVICE)
Example #20
0
    def __init__(self, cfgs, pretrained_emb, token_size):
        """
        :param cfgs: configurations of XTQA.
        :param pretrained_emb:
        :param token_size: the size of vocabulary table.
        """
        super(Net, self).__init__()
        self.cfgs = cfgs
        self.embedding = nn.Embedding(num_embeddings=token_size,
                                      embedding_dim=cfgs.word_emb_size)
        if cfgs.pretrained_emb['name']:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb))

        # use rnn to encode the closest paragraph.
        self.rnn_cp = nn.GRU(input_size=cfgs.word_emb_size,
                             hidden_size=cfgs.hidden_size,
                             num_layers=1,
                             batch_first=True,
                             bidirectional=True)

        # use rnn to encode the question and option
        self.rnn_qo = nn.GRU(input_size=cfgs.word_emb_size,
                             hidden_size=cfgs.hidden_size_qo,
                             num_layers=1,
                             batch_first=True,
                             bidirectional=True)

        self.flatten_opt = FlattenAtt(cfgs)

        # use simclr to encode the diagram
        self.simclr = SimCLR(cfgs)

        # hierarchical bilinear attention networks
        self.backbone = HMFN_V1(cfgs)

        # Note: if use this strategy, you must set the batch size=1
        if cfgs.divide_and_rule in 'True':
            # predict whether this question needs multimodal context
            self.classifer0 = weight_norm(nn.Linear(cfgs.hidden_size, 1),
                                          dim=None)
            self.sigmoid = nn.Sigmoid()
            self.ban = BAN(cfgs)

        # Classification layers
        layers = [
            weight_norm(nn.Linear(cfgs.hidden_size, cfgs.flat_out_size),
                        dim=None),
            nn.ReLU(),
            nn.Dropout(cfgs.classifer_dropout_r, inplace=True),
            # weight_norm(nn.Linear(cfgs.flat_out_size, 1), dim=None)
        ]
        self.flatten = nn.Sequential(*layers)
        self.classifer1 = nn.CosineSimilarity(dim=2)
Example #21
0
    def __init__(self, v_dim, q_dim, num_hid, dropout=0.2):
        super(DualAttention, self).__init__()

        self.v_proj1 = FCNet([v_dim, num_hid])
        self.v_proj2 = FCNet([v_dim, num_hid])
        self.q_proj1 = FCNet([q_dim, num_hid])
        self.q_proj2 = FCNet([q_dim, num_hid])
        self.dropout = nn.Dropout(dropout)
        self.on_repr1 = FCNet([num_hid, num_hid])
        self.on_repr2 = FCNet([num_hid, num_hid])
        self.linear1 = weight_norm(nn.Linear(num_hid, 1), dim=None)
        self.linear2 = weight_norm(nn.Linear(num_hid, 1), dim=None)
Example #22
0
def update_grouped_resnet18(model, config, tasks, model_dir=None):

    groups = config['model']['grouping']['groups']
    grouping_method = config['model']['grouping']['method']
    parameters = config['model']['parameters']
    gpu_info = activate_gpus(config)

    # 1x1convをグループ化して再定義
    if grouping_method == 'half':
        model.conv1.Wt.semseg = weight_norm(
            STConv2d(model.conv1.Wt.semseg.in_channels, model.conv1.Wt.semseg.out_channels, 1, stride=1, groups=groups, bias=model.conv1.Wt.semseg.bias))

        for modules in [model.layer1, model.layer2, model.layer3, model.layer4]:
            for m in modules:
                m.conv1.Wt.semseg = weight_norm(
                    STConv2d(m.conv1.Wt.semseg.in_channels, m.conv1.Wt.semseg.out_channels, 1, stride=1, groups=groups, bias=m.conv1.Wt.semseg.bias))
                m.conv2.Wt.semseg = weight_norm(
                    STConv2d(m.conv2.Wt.semseg.in_channels, m.conv2.Wt.semseg.out_channels, 1, stride=1, groups=groups, bias=m.conv2.Wt.semseg.bias))

    elif grouping_method == 'clustering':
        model.conv1.Wt.semseg = Grouped1x1Conv(original_module=model.conv1.Wt.semseg, groups=groups, gpu_info=gpu_info)

        for modules in [model.layer1, model.layer2, model.layer3, model.layer4]:
            for m in modules:
                m.conv1.Wt.semseg = Grouped1x1Conv(original_module=m.conv1.Wt.semseg, groups=groups, gpu_info=gpu_info)
                m.conv2.Wt.semseg = Grouped1x1Conv(original_module=m.conv2.Wt.semseg, groups=groups, gpu_info=gpu_info)
    else:
        raise ValueError("There is no method called {}.".format(grouping_method))

    # for name, m in model.named_parameters(): 
    #     if 'Wt' in name and any(task in name for task in tasks):
    #         print(name)

    # 再度学習済みパラメータを入れる
    model_name = 'resnet18'
    print('Loading Imagenet {} for {} initialization'.format(model_name, parameters['pretrained_architecture']))
    state_dict = get_state_dict(model_dir, model_name, parameters['pretrained_architecture'])
    state_dict = adjust_downsample_dic(state_dict)

    if parameters['pretrained_architecture'] == 'RC_RI':
        state_dict = create_grouped_decompositions(state_dict, parameters['decomp_path'], parameters['pretrained_architecture'], groups, grouping_method)
    elif parameters['pretrained_architecture'] == 'Conv':
        state_dict = rename_weights(state_dict, parameters['pretrained_architecture'])

    if parameters['conv_layer'] != 'RCM' and parameters['NFF']:
        raise ValueError("Only RCM architecture supports NFF. You are using {}".format(parameters['conv_layer']))
    elif parameters['conv_layer'] == 'RCM' and parameters['NFF']:
        state_dict = create_nff_layer(state_dict, parameters['pretrained_architecture'])

    new_dict = adjust_old_to_new_dict(model, state_dict, tasks, parameters['pretrained_architecture'])
    model.load_state_dict(new_dict)

    return model
 def __init__(self,
              in_dim: int,
              hid_dim: int,
              out_dim: int,
              dropout: float = 0.):
     super().__init__()
     self.main = nn.Sequential(
         nn.BatchNorm1d(in_dim),  # Added this
         weight_norm(nn.Conv1d(in_dim, hid_dim, 5, padding=2), dim=None),
         nn.ReLU(),
         nn.Dropout(dropout, inplace=True),
         weight_norm(nn.Conv1d(hid_dim, out_dim, 3, padding=1), dim=None))
Example #24
0
    def __init__(self,
                 attention_dim,
                 embed_dim,
                 decoder_dim,
                 vocab_size,
                 features_dim=2048,
                 dropout=0.5,
                 encoded_image_size=7):
        """
        :param attention_dim: size of attention network
        :param embed_dim: embedding size
        :param decoder_dim: size of decoder's RNN
        :param vocab_size: size of vocabulary
        :param features_dim: feature size of encoded images
        :param dropout: dropout
        """
        super(BUTD, self).__init__()
        self.enc_image_size = encoded_image_size
        resnet = torchvision.models.resnet101(
            pretrained=True)  # pretrained ImageNet ResNet-101
        # Remove linear and pool layers (since we're not doing classification)
        modules = list(resnet.children())[:-2]
        self.resnet = nn.Sequential(*modules)
        # Resize image to fixed size to allow input images of variable size
        self.adaptive_pool = nn.AdaptiveAvgPool2d(
            (encoded_image_size, encoded_image_size))
        for p in self.resnet.parameters():
            p.requires_grad = False
        self.fine_tune()  # finetune encoder

        self.features_dim = features_dim
        self.attention_dim = attention_dim
        self.embed_dim = embed_dim
        self.decoder_dim = decoder_dim
        self.vocab_size = vocab_size
        self.dropout = dropout
        self.attention = Attention(features_dim, decoder_dim,
                                   attention_dim)  # attention network
        self.embedding = nn.Embedding(vocab_size, embed_dim)  # embedding layer
        self.dropout = nn.Dropout(p=self.dropout)
        self.top_down_attention = nn.LSTMCell(
            embed_dim + features_dim + decoder_dim, decoder_dim,
            bias=True)  # top down attention LSTMCell
        self.language_model = nn.LSTMCell(features_dim + decoder_dim,
                                          decoder_dim,
                                          bias=True)  # language model LSTMCell
        self.fc1 = weight_norm(nn.Linear(decoder_dim, vocab_size))
        self.fc = weight_norm(nn.Linear(
            decoder_dim,
            vocab_size))  # linear layer to find scores over vocabulary
        self.init_weights(
        )  # initialize some layers with the uniform distribution
    def __init__(self,
                 attention_dim,
                 embed_dim,
                 decoder_dim,
                 vocab_size,
                 features_dim=4096,
                 dropout=0.5,
                 num_regions=15,
                 topic_dim=500):
        """
        :param attention_dim: size of attention network
        :param embed_dim: embedding size
        :param decoder_dim: size of decoder's RNN
        :param vocab_size: size of vocabulary
        :param features_dim: feature size of encoded images
        :param num_regions: number of regions used to encode images
        :param dropout: dropout
        """
        super(DecoderWithAttention, self).__init__()

        self.features_dim = features_dim
        self.attention_dim = attention_dim
        self.embed_dim = embed_dim
        self.decoder_dim = decoder_dim
        self.vocab_size = vocab_size
        self.dropout = dropout
        self.num_regions = num_regions
        self.topic_dim = topic_dim

        self.autoencoder = AutoEncoder(num_regions, features_dim)
        self.attention = Attention(features_dim, decoder_dim, attention_dim,
                                   topic_dim)  # attention network

        self.embedding = nn.Embedding(vocab_size, embed_dim)  # embedding layer
        self.dropout = nn.Dropout(p=self.dropout)

        #print('SIZE', embed_dim + features_dim + decoder_dim)

        self.top_down_attention = nn.LSTMCell(
            embed_dim + features_dim + decoder_dim, decoder_dim,
            bias=True)  # top down attention LSTMCell

        self.language_model = nn.LSTMCell(features_dim + decoder_dim +
                                          topic_dim,
                                          decoder_dim,
                                          bias=True)  # language model LSTMCell
        self.fc1 = weight_norm(nn.Linear(decoder_dim, vocab_size))
        self.fc = weight_norm(nn.Linear(
            decoder_dim,
            vocab_size))  # linear layer to find scores over vocabulary
        self.init_weights(
        )  # initialize some layers with the uniform distribution
Example #26
0
 def __init__(self, in_dim, hid_dim, out_dim, args):
     super(SimpleClassifier, self).__init__()
     activation_dict = {'relu': nn.ReLU()}
     try:
         activation_func = activation_dict[args.activation]
     except:
         raise AssertionError(args.activation + " is not supported yet!")
     layers = [
         weight_norm(nn.Linear(in_dim, hid_dim), dim=None), activation_func,
         nn.Dropout(args.dropout, inplace=True),
         weight_norm(nn.Linear(hid_dim, out_dim), dim=None)
     ]
     self.main = nn.Sequential(*layers)
Example #27
0
    def __init__(self, ms, in_ch, out_ch, norm, dropout):
        layers = []
        skipFirstReLU = False
        if in_ch != out_ch:
            assert (out_ch == 2 * in_ch)
            layers.append(ncReLU())
            skipFirstReLU = True
        if downsample:
            layers.append(nn.AvgPool2d(
                2))  #could be learned, but this allows a better identity?
        if len(layers) > 0:
            self.transform = nn.Sequential(*layers)
        else:
            self.transform = lambda x: x

        layers = []
        if not skipFirstReLU:
            #I'm not sure if this is the best thing
            #there should be a way to normalize (mask after normalization?)
            if 'batch' in norm:
                layers.append(nn.BatchNorm2d(out_ch))
            if 'instance' in norm:
                layers.append(nn.InstanceNorm2d(out_ch))
            if 'group' in norm:
                layers.append(nn.GroupNorm(getGroupSize(out_ch), out_ch))
            layers.append(nn.ReLU(inplace=True))
        if 'weight' in norm and not skipFirstReLU:  #or just use this normalization?
            layers.append(weight_norm(ms[0]))
        else:
            layers.append(ms[0])

        for m in ms[1:]:
            if 'batch' in norm:
                layers.append(nn.BatchNorm2d(out_ch))
            if 'instance' in norm:
                layers.append(nn.InstanceNorm2d(out_ch))
            if 'group' in norm:
                layers.append(nn.GroupNorm(getGroupSize(out_ch), out_ch))
            if dropout is not None:
                if dropout == True or dropout == '2d':
                    layers.append(nn.Dropout2d(p=0.1, inplace=True))
                elif dropout == 'normal':
                    layers.append(nn.Dropout2d(p=0.1, inplace=True))
            layers.append(nn.ReLU(inplace=True))
            assert (secondKernel % 2 == 1)
            if 'weight' in norm:
                layers.append(weight_norm(m))
            else:
                layers.append(m)

        self.side = nn.Sequential(*layers)
Example #28
0
    def __init__(self, size, n_lockups, p=0.1):
        """
        :param size: float number that is necessary for estimation scaling factor
        :param p: drop prob
        """
        super(EmbeddingAttention, self).__init__()

        self.n_lockups = n_lockups

        self.attention = nn.Sequential(
            weight_norm(nn.Linear(size, size, bias=False)), nn.Tanh(),
            weight_norm(nn.Linear(size, n_lockups, bias=False)))

        self.dropout = nn.Dropout(p)
Example #29
0
 def __init__(self, image_feat_dim, embed_dim, **kwargs):
     super().__init__()
     self.fa_image = weight_norm(nn.Linear(image_feat_dim, kwargs["attention_dim"]))
     self.fa_hidden = weight_norm(
         nn.Linear(kwargs["hidden_dim"], kwargs["attention_dim"])
     )
     self.top_down_lstm = nn.LSTMCell(
         embed_dim + image_feat_dim + kwargs["hidden_dim"],
         kwargs["hidden_dim"],
         bias=True,
     )
     self.relu = nn.ReLU()
     self.dropout = nn.Dropout(kwargs["dropout"])
     self.out_dim = kwargs["attention_dim"]
    def __init__(self, attention_dim, reduction=4, temp=10.0):
        super(PAM, self).__init__()

        self.temp = temp
        self.linear1 = weight_norm(nn.Linear(attention_dim,
                                             attention_dim // reduction,
                                             bias=False),
                                   dim=None)
        self.ln1 = nn.LayerNorm(attention_dim // reduction)
        self.relu1 = nn.ReLU(inplace=True)
        self.linear2 = weight_norm(nn.Linear(attention_dim // reduction,
                                             attention_dim,
                                             bias=False),
                                   dim=None)
Example #31
0
    def __init__(self, dims):
        super(FC, self).__init__()

        layers = []
        for i in range(len(dims) - 2):
            layers.append(
                weight_norm(nn.Linear(dims[i], dims[i + 1]), dim=None))
            layers.append(nn.ReLU())
        len_dims = len(dims)
        layers.append(
            weight_norm(nn.Linear(dims[len_dims - 2], dims[len_dims - 1]),
                        dim=None))
        layers.append(nn.ReLU())

        self.fc = nn.Sequential(*layers)
    def __init__(self, v_dim, q_dim, num_hid, dropout=0.2):
        super(NewAttention, self).__init__()

        self.v_proj = FCNet([v_dim, num_hid])
        self.q_proj = FCNet([q_dim, num_hid])
        self.dropout = nn.Dropout(dropout)
        self.linear = weight_norm(nn.Linear(q_dim, 1), dim=None)
 def __init__(self, v_dim, q_dim, num_hid):
     super(Attention, self).__init__()
     self.nonlinear = FCNet([v_dim + q_dim, num_hid])
     self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None)
 def __init__(self, in_dim, **kwargs):
     super(LinearTransform, self).__init__()
     self.lc = weight_norm(
         nn.Linear(in_features=in_dim,
                   out_features=kwargs['out_dim']), dim=None)
     self.out_dim = kwargs['out_dim']