def __init__(self, dims, act="ReLU", dropout=0): super(FCNet, self).__init__() layers = [] for i in range(len(dims) - 2): in_dim = dims[i] out_dim = dims[i + 1] if dropout > 0: layers.append(nn.Dropout(dropout)) layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None)) if act is not None: layers.append(getattr(nn, act)()) if dropout > 0: layers.append(nn.Dropout(dropout)) layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None)) if act is not None: layers.append(getattr(nn, act)()) self.main = nn.Sequential(*layers)
def __init__(self, in_dim, hid_dim, out_dim, dropout): super(QAmodel, self).__init__() self.grad_reverse = GradReverse.apply self.l1 = weight_norm(nn.Linear(in_dim, hid_dim), dim=None) self.relu = nn.ReLU() self.drop = nn.Dropout(dropout, inplace=True) self.l2 = weight_norm(nn.Linear(hid_dim, out_dim), dim=None)
def __init__(self, x_dim, y_dim, z_dim, glimpse, dropout=[.2, .5]): super(BiAttention_both, self).__init__() self.glimpse = glimpse self.logits_v = weight_norm(BCNet(x_dim, y_dim, z_dim, glimpse, dropout=dropout, k=3), \ name='h_mat', dim=None) self.logits_q = weight_norm(BCNet_q(v_num, x_dim, y_dim, z_dim, glimpse, dropout=dropout, k=3), \ name='h_mat', dim=None)
def __init__(self, v_dim, q_dim, num_hid, dropout=0.2, max_len=35): super(CoAttention, self).__init__() self.v_proj = FCNet([v_dim, num_hid]) self.q_proj = FCNet([q_dim, num_hid]) self.tran_linear = weight_norm(nn.Linear(num_hid, num_hid)) self.dropout = nn.Dropout(dropout) self.linear_q = weight_norm(nn.Linear(max_len, 1), dim=None) self.linear_v = weight_norm(nn.Linear(max_len, 1), dim=None)
def __init__(self, in_dim, hid_dim, out_dim, dropout): super(SimpleClassifier, self).__init__() layers = [weight_norm(nn.Linear(in_dim, hid_dim), dim=None), nn.ReLU(), nn.Dropout(dropout, inplace=True), weight_norm(nn.Linear(hid_dim, out_dim), dim=None) ] self.main = nn.Sequential(*layers)
def __init__(self, in_dim, out_dim, **kwargs): super(WeightNormClassifier, self).__init__() layers = [ weight_norm(nn.Linear(in_dim, kwargs['hidden_dim']), dim=None), nn.ReLU(), nn.Dropout(kwargs['dropout'], inplace=True), weight_norm(nn.Linear(kwargs['hidden_dim'], out_dim), dim=None) ] self.main = nn.Sequential(*layers)
def __init__(self, input_size, name, monitor=None): super(HighwayNetwork, self).__init__() self.name = name self.monitor = monitor self.fc_gate = weight_norm(nn.Linear(input_size, input_size, bias=True), dim=None) self.fc = weight_norm(nn.Linear(input_size, input_size, bias=True), dim=None)
def __init__(self, in_dim, hid_dim, out_dim, dropout): super(SimpleClassifier, self).__init__() layers = [ weight_norm(nn.Linear(in_dim, hid_dim), dim=None), nn.ReLU(), nn.Dropout(dropout, inplace=True), weight_norm(nn.Linear(hid_dim, out_dim), dim=None) ] self.main = nn.Sequential(*layers)
def __init__(self, img_feat_size, hidden_size, dropout_r): super(AttnMap, self).__init__() self.linear_q = weight_norm(nn.Linear(hidden_size, hidden_size), dim=None) self.linear_v = weight_norm(nn.Linear(img_feat_size, img_feat_size), dim=None) self.nonlinear = MLP([img_feat_size + hidden_size, hidden_size], dropout_r=dropout_r) self.linear = weight_norm(nn.Linear(hidden_size, 1), dim=None)
def __init__(self, hidden_dim, dropout, *args, **kwargs): super().__init__(*args, **kwargs) layers = [ weight_norm(nn.Linear(self.in_dim, hidden_dim), dim=None), nn.ReLU(), nn.Dropout(dropout, inplace=True), weight_norm(nn.Linear(hidden_dim, self.out_dim), dim=None), ] self.main = nn.Sequential(*layers)
def __init__(self, dims): super(FCNet, self).__init__() in_dim = dims[0] out_dim = dims[1] self.first_lin = weight_norm(nn.Linear(in_dim, out_dim), dim=None) self.tanh = nn.Tanh() self.second_lin = weight_norm(nn.Linear(in_dim, out_dim), dim=None) self.sigmoid = nn.Sigmoid()
def __init__(self, __C): super(AttnMap, self).__init__() self.__C = __C self.linear_q = weight_norm( nn.Linear(__C.HIDDEN_SIZE, __C.HIDDEN_SIZE), dim=None) self.linear_v = weight_norm( nn.Linear(__C.IMG_FEAT_SIZE, __C.IMG_FEAT_SIZE), dim=None) self.nonlinear = MLP( [__C.IMG_FEAT_SIZE + __C.HIDDEN_SIZE, __C.HIDDEN_SIZE], dropout_r=__C.DROPOUT_R) self.linear = weight_norm(nn.Linear(__C.HIDDEN_SIZE, 1), dim=None)
def __init__(self, dims): super(fully_connected_network, self).__init__() layers = [] for i in range(len(dims) - 2): layers.append(weight_norm(nn.Linear(dims[i], dims[i+1]), dim=None)) layers.append(nn.ReLU()) layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None)) layers.append(nn.ReLU()) self.model = nn.Sequential(*layers)
def __init__(self, in_dim: int, hid_dim: int, out_dim: int, dropout: float = 0.): super().__init__() self.main = nn.Sequential( weight_norm(nn.Linear(in_dim, hid_dim), dim=None), nn.ReLU(), nn.Dropout(dropout, inplace=True), weight_norm(nn.Linear(hid_dim, out_dim), dim=None))
def __init__(self, features_dim, decoder_dim, attention_dim, dropout=0.5): super(Attention, self).__init__() self.features_att = weight_norm( nn.Linear(features_dim, attention_dim)) # linear layer to transform encoded image self.decoder_att = weight_norm( nn.Linear(decoder_dim, attention_dim)) # linear layer to transform decoder's output self.full_att = weight_norm(nn.Linear(attention_dim, 1)) # linear layer to calculate values to be softmax-ed self.relu = nn.ReLU() self.dropout = nn.Dropout(p=dropout) self.softmax = nn.Softmax(dim=1) # softmax layer to calculate weights
def __init__(self, decoder_dims, encoder_dims, output_dims, vocab_size): super(Output, self).__init__() self.lang_linear = weight_norm(nn.Linear(decoder_dims, output_dims)) self.att_linear = weight_norm(nn.Linear(decoder_dims, output_dims)) #self.fc_linear = weight_norm(nn.Linear(encoder_dims,output_dims)) self.relu = nn.ReLU() self.fc = weight_norm(nn.Linear( output_dims * 2, vocab_size)) # linear layer to find scores over vocabulary self.dropout = nn.Dropout(p=0.5)
def __init__(self, dims): super(FCNet, self).__init__() layers = [] for i in range(len(dims) - 2): in_dim = dims[i] out_dim = dims[i + 1] layers.append(weight_norm(nn.Linear(in_dim, out_dim), dim=None)) layers.append(nn.ReLU()) layers.append(weight_norm(nn.Linear(dims[-2], dims[-1]), dim=None)) layers.append(nn.ReLU()) self.main = nn.Sequential(*layers)
def __init__(self, x_dim, q_dim, hidden_size, drop=0.0): super(Attention, self).__init__() self.x_dim = x_dim self.dropout = nn.Dropout(drop) self.x_with_q_att = weight_norm(nn.Linear((x_dim + q_dim), hidden_size), dim=None) self.fc = FC([x_dim + q_dim, hidden_size]) self.x_proj = FC([x_dim, hidden_size]) # x_dim -> H self.q_proj = FC([q_dim, hidden_size]) # q_dim(H*2) -> H self.linear = weight_norm(nn.Linear(hidden_size, 1), dim=None)
def get_model(num_features): model = nn.Sequential(weight_norm(nn.Linear(num_features, 1000)), nn.ELU(), nn.Dropout(0.5), weight_norm(nn.Linear(1000, 1000)), nn.ELU(), nn.Dropout(0.5), weight_norm(nn.Linear(1000, 1000)), nn.ELU(), nn.Dropout(0.5), weight_norm(nn.Linear(1000, 1))) for m in model: if isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight_v) nn.init.kaiming_normal_(m.weight_g) nn.init.constant_(m.bias, 0) return model.to(DEVICE)
def __init__(self, cfgs, pretrained_emb, token_size): """ :param cfgs: configurations of XTQA. :param pretrained_emb: :param token_size: the size of vocabulary table. """ super(Net, self).__init__() self.cfgs = cfgs self.embedding = nn.Embedding(num_embeddings=token_size, embedding_dim=cfgs.word_emb_size) if cfgs.pretrained_emb['name']: self.embedding.weight.data.copy_(torch.from_numpy(pretrained_emb)) # use rnn to encode the closest paragraph. self.rnn_cp = nn.GRU(input_size=cfgs.word_emb_size, hidden_size=cfgs.hidden_size, num_layers=1, batch_first=True, bidirectional=True) # use rnn to encode the question and option self.rnn_qo = nn.GRU(input_size=cfgs.word_emb_size, hidden_size=cfgs.hidden_size_qo, num_layers=1, batch_first=True, bidirectional=True) self.flatten_opt = FlattenAtt(cfgs) # use simclr to encode the diagram self.simclr = SimCLR(cfgs) # hierarchical bilinear attention networks self.backbone = HMFN_V1(cfgs) # Note: if use this strategy, you must set the batch size=1 if cfgs.divide_and_rule in 'True': # predict whether this question needs multimodal context self.classifer0 = weight_norm(nn.Linear(cfgs.hidden_size, 1), dim=None) self.sigmoid = nn.Sigmoid() self.ban = BAN(cfgs) # Classification layers layers = [ weight_norm(nn.Linear(cfgs.hidden_size, cfgs.flat_out_size), dim=None), nn.ReLU(), nn.Dropout(cfgs.classifer_dropout_r, inplace=True), # weight_norm(nn.Linear(cfgs.flat_out_size, 1), dim=None) ] self.flatten = nn.Sequential(*layers) self.classifer1 = nn.CosineSimilarity(dim=2)
def __init__(self, v_dim, q_dim, num_hid, dropout=0.2): super(DualAttention, self).__init__() self.v_proj1 = FCNet([v_dim, num_hid]) self.v_proj2 = FCNet([v_dim, num_hid]) self.q_proj1 = FCNet([q_dim, num_hid]) self.q_proj2 = FCNet([q_dim, num_hid]) self.dropout = nn.Dropout(dropout) self.on_repr1 = FCNet([num_hid, num_hid]) self.on_repr2 = FCNet([num_hid, num_hid]) self.linear1 = weight_norm(nn.Linear(num_hid, 1), dim=None) self.linear2 = weight_norm(nn.Linear(num_hid, 1), dim=None)
def update_grouped_resnet18(model, config, tasks, model_dir=None): groups = config['model']['grouping']['groups'] grouping_method = config['model']['grouping']['method'] parameters = config['model']['parameters'] gpu_info = activate_gpus(config) # 1x1convをグループ化して再定義 if grouping_method == 'half': model.conv1.Wt.semseg = weight_norm( STConv2d(model.conv1.Wt.semseg.in_channels, model.conv1.Wt.semseg.out_channels, 1, stride=1, groups=groups, bias=model.conv1.Wt.semseg.bias)) for modules in [model.layer1, model.layer2, model.layer3, model.layer4]: for m in modules: m.conv1.Wt.semseg = weight_norm( STConv2d(m.conv1.Wt.semseg.in_channels, m.conv1.Wt.semseg.out_channels, 1, stride=1, groups=groups, bias=m.conv1.Wt.semseg.bias)) m.conv2.Wt.semseg = weight_norm( STConv2d(m.conv2.Wt.semseg.in_channels, m.conv2.Wt.semseg.out_channels, 1, stride=1, groups=groups, bias=m.conv2.Wt.semseg.bias)) elif grouping_method == 'clustering': model.conv1.Wt.semseg = Grouped1x1Conv(original_module=model.conv1.Wt.semseg, groups=groups, gpu_info=gpu_info) for modules in [model.layer1, model.layer2, model.layer3, model.layer4]: for m in modules: m.conv1.Wt.semseg = Grouped1x1Conv(original_module=m.conv1.Wt.semseg, groups=groups, gpu_info=gpu_info) m.conv2.Wt.semseg = Grouped1x1Conv(original_module=m.conv2.Wt.semseg, groups=groups, gpu_info=gpu_info) else: raise ValueError("There is no method called {}.".format(grouping_method)) # for name, m in model.named_parameters(): # if 'Wt' in name and any(task in name for task in tasks): # print(name) # 再度学習済みパラメータを入れる model_name = 'resnet18' print('Loading Imagenet {} for {} initialization'.format(model_name, parameters['pretrained_architecture'])) state_dict = get_state_dict(model_dir, model_name, parameters['pretrained_architecture']) state_dict = adjust_downsample_dic(state_dict) if parameters['pretrained_architecture'] == 'RC_RI': state_dict = create_grouped_decompositions(state_dict, parameters['decomp_path'], parameters['pretrained_architecture'], groups, grouping_method) elif parameters['pretrained_architecture'] == 'Conv': state_dict = rename_weights(state_dict, parameters['pretrained_architecture']) if parameters['conv_layer'] != 'RCM' and parameters['NFF']: raise ValueError("Only RCM architecture supports NFF. You are using {}".format(parameters['conv_layer'])) elif parameters['conv_layer'] == 'RCM' and parameters['NFF']: state_dict = create_nff_layer(state_dict, parameters['pretrained_architecture']) new_dict = adjust_old_to_new_dict(model, state_dict, tasks, parameters['pretrained_architecture']) model.load_state_dict(new_dict) return model
def __init__(self, in_dim: int, hid_dim: int, out_dim: int, dropout: float = 0.): super().__init__() self.main = nn.Sequential( nn.BatchNorm1d(in_dim), # Added this weight_norm(nn.Conv1d(in_dim, hid_dim, 5, padding=2), dim=None), nn.ReLU(), nn.Dropout(dropout, inplace=True), weight_norm(nn.Conv1d(hid_dim, out_dim, 3, padding=1), dim=None))
def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, features_dim=2048, dropout=0.5, encoded_image_size=7): """ :param attention_dim: size of attention network :param embed_dim: embedding size :param decoder_dim: size of decoder's RNN :param vocab_size: size of vocabulary :param features_dim: feature size of encoded images :param dropout: dropout """ super(BUTD, self).__init__() self.enc_image_size = encoded_image_size resnet = torchvision.models.resnet101( pretrained=True) # pretrained ImageNet ResNet-101 # Remove linear and pool layers (since we're not doing classification) modules = list(resnet.children())[:-2] self.resnet = nn.Sequential(*modules) # Resize image to fixed size to allow input images of variable size self.adaptive_pool = nn.AdaptiveAvgPool2d( (encoded_image_size, encoded_image_size)) for p in self.resnet.parameters(): p.requires_grad = False self.fine_tune() # finetune encoder self.features_dim = features_dim self.attention_dim = attention_dim self.embed_dim = embed_dim self.decoder_dim = decoder_dim self.vocab_size = vocab_size self.dropout = dropout self.attention = Attention(features_dim, decoder_dim, attention_dim) # attention network self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer self.dropout = nn.Dropout(p=self.dropout) self.top_down_attention = nn.LSTMCell( embed_dim + features_dim + decoder_dim, decoder_dim, bias=True) # top down attention LSTMCell self.language_model = nn.LSTMCell(features_dim + decoder_dim, decoder_dim, bias=True) # language model LSTMCell self.fc1 = weight_norm(nn.Linear(decoder_dim, vocab_size)) self.fc = weight_norm(nn.Linear( decoder_dim, vocab_size)) # linear layer to find scores over vocabulary self.init_weights( ) # initialize some layers with the uniform distribution
def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, features_dim=4096, dropout=0.5, num_regions=15, topic_dim=500): """ :param attention_dim: size of attention network :param embed_dim: embedding size :param decoder_dim: size of decoder's RNN :param vocab_size: size of vocabulary :param features_dim: feature size of encoded images :param num_regions: number of regions used to encode images :param dropout: dropout """ super(DecoderWithAttention, self).__init__() self.features_dim = features_dim self.attention_dim = attention_dim self.embed_dim = embed_dim self.decoder_dim = decoder_dim self.vocab_size = vocab_size self.dropout = dropout self.num_regions = num_regions self.topic_dim = topic_dim self.autoencoder = AutoEncoder(num_regions, features_dim) self.attention = Attention(features_dim, decoder_dim, attention_dim, topic_dim) # attention network self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer self.dropout = nn.Dropout(p=self.dropout) #print('SIZE', embed_dim + features_dim + decoder_dim) self.top_down_attention = nn.LSTMCell( embed_dim + features_dim + decoder_dim, decoder_dim, bias=True) # top down attention LSTMCell self.language_model = nn.LSTMCell(features_dim + decoder_dim + topic_dim, decoder_dim, bias=True) # language model LSTMCell self.fc1 = weight_norm(nn.Linear(decoder_dim, vocab_size)) self.fc = weight_norm(nn.Linear( decoder_dim, vocab_size)) # linear layer to find scores over vocabulary self.init_weights( ) # initialize some layers with the uniform distribution
def __init__(self, in_dim, hid_dim, out_dim, args): super(SimpleClassifier, self).__init__() activation_dict = {'relu': nn.ReLU()} try: activation_func = activation_dict[args.activation] except: raise AssertionError(args.activation + " is not supported yet!") layers = [ weight_norm(nn.Linear(in_dim, hid_dim), dim=None), activation_func, nn.Dropout(args.dropout, inplace=True), weight_norm(nn.Linear(hid_dim, out_dim), dim=None) ] self.main = nn.Sequential(*layers)
def __init__(self, ms, in_ch, out_ch, norm, dropout): layers = [] skipFirstReLU = False if in_ch != out_ch: assert (out_ch == 2 * in_ch) layers.append(ncReLU()) skipFirstReLU = True if downsample: layers.append(nn.AvgPool2d( 2)) #could be learned, but this allows a better identity? if len(layers) > 0: self.transform = nn.Sequential(*layers) else: self.transform = lambda x: x layers = [] if not skipFirstReLU: #I'm not sure if this is the best thing #there should be a way to normalize (mask after normalization?) if 'batch' in norm: layers.append(nn.BatchNorm2d(out_ch)) if 'instance' in norm: layers.append(nn.InstanceNorm2d(out_ch)) if 'group' in norm: layers.append(nn.GroupNorm(getGroupSize(out_ch), out_ch)) layers.append(nn.ReLU(inplace=True)) if 'weight' in norm and not skipFirstReLU: #or just use this normalization? layers.append(weight_norm(ms[0])) else: layers.append(ms[0]) for m in ms[1:]: if 'batch' in norm: layers.append(nn.BatchNorm2d(out_ch)) if 'instance' in norm: layers.append(nn.InstanceNorm2d(out_ch)) if 'group' in norm: layers.append(nn.GroupNorm(getGroupSize(out_ch), out_ch)) if dropout is not None: if dropout == True or dropout == '2d': layers.append(nn.Dropout2d(p=0.1, inplace=True)) elif dropout == 'normal': layers.append(nn.Dropout2d(p=0.1, inplace=True)) layers.append(nn.ReLU(inplace=True)) assert (secondKernel % 2 == 1) if 'weight' in norm: layers.append(weight_norm(m)) else: layers.append(m) self.side = nn.Sequential(*layers)
def __init__(self, size, n_lockups, p=0.1): """ :param size: float number that is necessary for estimation scaling factor :param p: drop prob """ super(EmbeddingAttention, self).__init__() self.n_lockups = n_lockups self.attention = nn.Sequential( weight_norm(nn.Linear(size, size, bias=False)), nn.Tanh(), weight_norm(nn.Linear(size, n_lockups, bias=False))) self.dropout = nn.Dropout(p)
def __init__(self, image_feat_dim, embed_dim, **kwargs): super().__init__() self.fa_image = weight_norm(nn.Linear(image_feat_dim, kwargs["attention_dim"])) self.fa_hidden = weight_norm( nn.Linear(kwargs["hidden_dim"], kwargs["attention_dim"]) ) self.top_down_lstm = nn.LSTMCell( embed_dim + image_feat_dim + kwargs["hidden_dim"], kwargs["hidden_dim"], bias=True, ) self.relu = nn.ReLU() self.dropout = nn.Dropout(kwargs["dropout"]) self.out_dim = kwargs["attention_dim"]
def __init__(self, attention_dim, reduction=4, temp=10.0): super(PAM, self).__init__() self.temp = temp self.linear1 = weight_norm(nn.Linear(attention_dim, attention_dim // reduction, bias=False), dim=None) self.ln1 = nn.LayerNorm(attention_dim // reduction) self.relu1 = nn.ReLU(inplace=True) self.linear2 = weight_norm(nn.Linear(attention_dim // reduction, attention_dim, bias=False), dim=None)
def __init__(self, dims): super(FC, self).__init__() layers = [] for i in range(len(dims) - 2): layers.append( weight_norm(nn.Linear(dims[i], dims[i + 1]), dim=None)) layers.append(nn.ReLU()) len_dims = len(dims) layers.append( weight_norm(nn.Linear(dims[len_dims - 2], dims[len_dims - 1]), dim=None)) layers.append(nn.ReLU()) self.fc = nn.Sequential(*layers)
def __init__(self, v_dim, q_dim, num_hid, dropout=0.2): super(NewAttention, self).__init__() self.v_proj = FCNet([v_dim, num_hid]) self.q_proj = FCNet([q_dim, num_hid]) self.dropout = nn.Dropout(dropout) self.linear = weight_norm(nn.Linear(q_dim, 1), dim=None)
def __init__(self, v_dim, q_dim, num_hid): super(Attention, self).__init__() self.nonlinear = FCNet([v_dim + q_dim, num_hid]) self.linear = weight_norm(nn.Linear(num_hid, 1), dim=None)
def __init__(self, in_dim, **kwargs): super(LinearTransform, self).__init__() self.lc = weight_norm( nn.Linear(in_features=in_dim, out_features=kwargs['out_dim']), dim=None) self.out_dim = kwargs['out_dim']