def __init__(self, filter_size=1, feature_dim=256, feature_stride=16, pool_square=False, filter_norm=True, num_filter_pre_convs=1, num_filter_post_convs=0): super().__init__() self.filter_pool = FilterPool(filter_size=filter_size, feature_stride=feature_stride, pool_square=pool_square) self.filter_norm = filter_norm # Make pre conv pre_conv_layers = [] for i in range(num_filter_pre_convs): pre_conv_layers.append(conv_block(feature_dim, feature_dim, kernel_size=3, padding=1)) self.filter_pre_layers = nn.Sequential(*pre_conv_layers) if pre_conv_layers else None # Make post conv post_conv_layers = [] for i in range(num_filter_post_convs): post_conv_layers.append(conv_block(feature_dim, feature_dim, kernel_size=1, padding=0)) post_conv_layers.append(nn.Conv2d(feature_dim, feature_dim, kernel_size=1, padding=0)) self.filter_post_layers = nn.Sequential(*post_conv_layers) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, layer_dims, use_bn=True): super().__init__() self.conv_block = conv_block(1, layer_dims[0], kernel_size=3, stride=2, padding=1, batch_norm=use_bn) self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) ds1 = nn.Conv2d(layer_dims[0], layer_dims[1], kernel_size=3, padding=1, stride=2) self.res1 = BasicBlock(layer_dims[0], layer_dims[1], stride=2, downsample=ds1, use_bn=use_bn) ds2 = nn.Conv2d(layer_dims[1], layer_dims[2], kernel_size=3, padding=1, stride=2) self.res2 = BasicBlock(layer_dims[1], layer_dims[2], stride=2, downsample=ds2, use_bn=use_bn) self.label_pred = conv_block(layer_dims[2], layer_dims[3], kernel_size=3, stride=1, padding=1, relu=True, batch_norm=use_bn) self.samp_w_pred = nn.Conv2d(layer_dims[2], layer_dims[3], kernel_size=3, padding=1, stride=1) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() self.samp_w_pred.weight.data.fill_(0) self.samp_w_pred.bias.data.fill_(1)
def __init__(self, input_dim, hidden_dim, kernel_size, padding_mode='zeros'): " Referenced from https://github.com/happyjin/ConvGRU-pytorch" super(ConvGRUCell, self).__init__() self.hidden_dim = hidden_dim if padding_mode == 'zeros': if not isinstance(kernel_size, (list, tuple)): kernel_size = (kernel_size, kernel_size) padding = kernel_size[0] // 2, kernel_size[1] // 2 self.conv_reset = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding) self.conv_update = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding) self.conv_state_new = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding) else: self.conv_reset = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1, padding=int(kernel_size // 2), batch_norm=False, relu=False, padding_mode=padding_mode) self.conv_update = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1, padding=int(kernel_size // 2), batch_norm=False, relu=False, padding_mode=padding_mode) self.conv_state_new = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1, padding=int(kernel_size // 2), batch_norm=False, relu=False, padding_mode=padding_mode)
def __init__(self, layer_dims, feat_dim, use_final_relu=True, use_gauss=True, use_bn=True, non_default_init=True, init_bn=1, gauss_scale=0.25, final_bn=True): super().__init__() in_layer_dim = (feat_dim + 1, ) + tuple(list(layer_dims)[:-2]) out_layer_dim = tuple(list(layer_dims)[:-1]) self.use_gauss = use_gauss res = [] for in_d, out_d in zip(in_layer_dim, out_layer_dim): ds = nn.Conv2d(in_d, out_d, kernel_size=3, padding=1, stride=1) res.append( BasicBlock(in_d, out_d, stride=1, downsample=ds, use_bn=use_bn)) self.res = nn.Sequential(*res) self.label_pred = conv_block(layer_dims[-2], layer_dims[-1], kernel_size=3, stride=1, padding=1, relu=use_final_relu, batch_norm=final_bn) self.gauss_scale = gauss_scale if non_default_init: for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(init_bn) m.bias.data.zero_()
def __init__(self, state_dim=8, representation_predictor_dims=(64, 32), gru_ksz=3, prev_max_pool_ksz=1, conf_measure='max', dimp_thresh=None): super().__init__() self.prev_max_pool_ksz = prev_max_pool_ksz self.conf_measure = conf_measure self.dimp_thresh = dimp_thresh cvproc_ksz = [3, 3] use_bn = True padding_val = [int((s - 1) / 2) for s in cvproc_ksz] self.cost_volume_proc1 = nn.Sequential( conv_block(1, 8, kernel_size=cvproc_ksz[0], stride=1, padding=padding_val[0], batch_norm=use_bn, relu=True), conv_block(8, 1, kernel_size=cvproc_ksz[1], stride=1, padding=padding_val[1], batch_norm=use_bn, relu=False)) self.cost_volume_proc2 = nn.Sequential( conv_block(1, 8, kernel_size=cvproc_ksz[0], stride=1, padding=padding_val[0], batch_norm=use_bn, relu=True), conv_block(8, 1, kernel_size=cvproc_ksz[1], stride=1, padding=padding_val[1], batch_norm=use_bn, relu=False)) in_dim = state_dim + 1 + (conf_measure != 'none') representation_predictor_list = [] for out_dim in representation_predictor_dims: representation_predictor_list.append( conv_block(in_dim, out_dim, kernel_size=3, stride=1, padding=1, batch_norm=False, relu=True)) in_dim = out_dim self.representation_predictor = nn.Sequential( *representation_predictor_list) self.representation_dim = in_dim self.response_predictor = nn.Sequential( conv_block(in_dim, 1, kernel_size=3, stride=1, padding=1, batch_norm=False, relu=False), nn.Sigmoid()) self.state_predictor = ConvGRUCell(4, state_dim, gru_ksz) self.init_hidden_state_predictor = nn.Sequential( conv_block(1, state_dim, kernel_size=3, stride=1, padding=1, batch_norm=False, relu=False, bias=False), nn.Tanh()) self.is_target_predictor = nn.Sequential( conv_block(state_dim, 4, kernel_size=gru_ksz, stride=1, padding=int(gru_ksz // 2), batch_norm=False, relu=True), conv_block(4, 1, kernel_size=gru_ksz, stride=1, padding=int(gru_ksz // 2), batch_norm=False, relu=False)) # Init weights for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data, mode='fan_in') if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()