def __init__(self, configs): super(MMNIST_ConvLSTM, self).__init__() _KEYS = ['encoder_configs', 'reconstruct_configs', 'predict_configs'] en_conf, rec_conf, pred_conf = unpack(configs, _KEYS) self.encoder = ConvLSTM(en_conf) self.reconstructor = Generator(rec_conf) self.predictor = Generator(pred_conf)
def __init__(self, num_classes=61, mem_size=512, no_cam=False): super(NewAttentionModelBi, self).__init__() self.num_classes = num_classes self.noCam = no_cam self.mem_size = mem_size self.resnet = resnet34(pretrained=True, noBN=True) self.attention_rgb = Variable( (torch.FloatTensor(512).normal_(0, .05)).unsqueeze(0).cuda()) self.attention_flow = Variable( (torch.FloatTensor(512).normal_(0, .05)).unsqueeze(0).cuda()) self.lstm_cell = ConvLSTM(1024, mem_size) self.avgpool = nn.AvgPool2d(7) self.dropout = nn.Dropout(0.7) self.fc = nn.Linear(mem_size, self.num_classes) self.classifier = nn.Sequential(self.dropout, self.fc) self._custom_train_mode = True
def __init__(self, num_classes=61, mem_size=512, no_cam=False, enable_motion_segmentation=False): super(AttentionModel, self).__init__() self.num_classes = num_classes self.noCam = no_cam self.mem_size = mem_size self.enable_motion_segmentation = enable_motion_segmentation self.resnet = resnet34(pretrained=True, noBN=True) self.weight_softmax = self.resnet.fc.weight self.lstm_cell = ConvLSTM(512, mem_size) self.avgpool = nn.AvgPool2d(7) self.dropout = nn.Dropout(0.7) self.fc = nn.Linear(mem_size, self.num_classes) self.classifier = nn.Sequential(self.dropout, self.fc) self.motion_segmentation = MotionSegmentationBlock() self._custom_train_mode = True
def __init__(self, in_dim, ae_en_h_dims, ae_de_h_dims, conv_lstm_in_size, conv_lstm_in_dim, conv_lstm_h_dim, conv_lstm_kernel_sizes, conv_lstm_n_layers, fc_in_dim, fc_h_dims, fc_out_dim, **kwargs): super(DeepAP, self).__init__() self.device = kwargs.get('device', 'cpu') ################ # masked layer # ################ mask = [[i for i in range(in_dim)], [i for i in range(in_dim)]] self.mask_layer = MaskNet(in_dim, in_dim, mask, device=self.device) self.mask_thre = kwargs.get('mask_thre', 0.0001) ###################### # auto_encoder layer # ###################### self.ae = AutoEncoder(in_dim=in_dim, en_h_dims=ae_en_h_dims, de_h_dims=ae_de_h_dims) if kwargs.get('ae_pretrain_weight') is not None: self.ae.load_state_dict(kwargs['ae_pretrain_weight']) else: raise ValueError('AutoEncoder not pretrained.') if kwargs.get('if_trainable'): for p in self.ae.parameters(): p.requires_grad = kwargs['if_trainable'] else: self.ae.weight.requires_grad = False #################### # conv_lstm layers # #################### self.conv_lstm_list = nn.ModuleList() for i in conv_lstm_kernel_sizes: i_kernel_size = (i, i) conv_lstm = ConvLSTM( in_size=conv_lstm_in_size, in_dim=conv_lstm_in_dim, h_dim=conv_lstm_h_dim, kernel_size=i_kernel_size, num_layers=conv_lstm_n_layers, batch_first=kwargs.get('conv_lstm_batch_first', True), bias=kwargs.get('conv_lstm_bias', True), only_last_state=kwargs.get('only_last_state', True), device=self.device) self.conv_lstm_list.append(conv_lstm) ######################### # fully-connected layer # ######################### self.fc = FC( in_dim=fc_in_dim, # assert in_size == n_conv_lstm * conv_lstm_h_dim h_dims=fc_h_dims, out_dim=fc_out_dim, p_dropout=kwargs.get('fc_p_dropout', 0.1))