def __init__(self, R_channels, A_channels, output_mode='error', gpu_id=-1): super(PredNet, self).__init__() self.r_channels = R_channels + ( 0, ) # for convenience via i+1 (3, 48, 96, 192, 0)# representation neurons (R_channels = (3, 48, 96, 192)+(0,) self.a_channels = A_channels # layer-specific prediction A_channels = (3, 48, 96, 192) self.n_layers = len(R_channels) # 4 = len(3, 48, 96, 192) self.output_mode = output_mode self.gpu_id = gpu_id self.extrap_start_time = None default_output_modes = ['prediction', 'error'] assert output_mode in default_output_modes, 'Invalid output_mode: ' + str( output_mode) # for i in range(self.n_layers): # in_channels, out_channels?, kernel_size(3, 3), stride=1, padding=1, dilation=1, groups=1, bias=True): # 入力はrepresentation neurons[i+1] とan error term= layer-specific prediction +/- target cell = ConvLSTMCell( 2 * self.a_channels[i] + self.r_channels[i + 1], self.r_channels[i], (3, 3)) setattr(self, 'cell{}'.format(i), cell) for i in range(self.n_layers): # hut_A[l] input representation neurons output a_channels[i] # sequential 形式で定義している conv = nn.Sequential( nn.Conv2d(self.r_channels[i], self.a_channels[i], 3, padding=1), nn.ReLU()) # chnage to use clamp #if i == 0: # はじめ層だけ SatLU() を追加している # conv.add_module('satlu', SatLU()) setattr(self, 'conv{}'.format(i), conv) self.upsample = nn.Upsample(scale_factor=2) # A[l+1] 用の max poolingの定義 スライドが2で大きさ半分か self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) for l in range(self.n_layers - 1): # A[l+1] input error term[l] output a_channels[l+1] update_A = nn.Sequential( nn.Conv2d(2 * self.a_channels[l], self.a_channels[l + 1], (3, 3), padding=1), self.maxpool) setattr(self, 'update_A{}'.format(l), update_A) self.reset_parameters()
def __init__(self, in_channels, out_channels, batch_size=1): super(LSTMInCell, self).__init__() out_channels = out_channels // 6 self.in_channels = in_channels self.out_channels = out_channels self.batch_size = batch_size self.relu = nn.ReLU() self._1x1x1_conv_1 = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1, 1)) self._1x1x1_conv_2 = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1, 1)) self._1x1x1_conv_3 = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1, 1)) self._3x3x3_max_pool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1, 1, 1), ceil_mode=True) self._pad = nn.ConstantPad3d( self.get_padding_shape(filter_shape=(3, 3, 3), stride=(1, 1, 1)), 0) self._1x1x1_lstm_conv = ConvLSTMCell(input_channels=in_channels, hidden_channels=out_channels, kernel_size=1) self._3x3x3_lstm_conv = ConvLSTMCell(input_channels=out_channels, hidden_channels=out_channels * 2, kernel_size=3) self._5x5x5_lstm_conv = ConvLSTMCell(input_channels=out_channels, hidden_channels=out_channels * 2, kernel_size=5) self._1x1x1_lstm_h = None self._1x1x1_lstm_c = None self._3x3x3_lstm_h = None self._3x3x3_lstm_c = None self._5x5x5_lstm_h = None self._5x5x5_lstm_c = None self._1x1x1_conv_h = None
def __init__(self, R_channels, A_channels, device='cpu', t_extrap=float('inf'), scale=4): super(PredNet, self).__init__() self.r_channels = R_channels + (0, ) # for convenience self.a_channels = A_channels self.n_layers = len(R_channels) self.device = device self.t_extrap = t_extrap for i in range(self.n_layers): cell = ConvLSTMCell( 2 * self.a_channels[i] + self.r_channels[i + 1], self.r_channels[i], (3, 3)) setattr(self, 'cell{}'.format(i), cell) for i in range(self.n_layers): conv = nn.Sequential( nn.Conv2d(self.r_channels[i], self.a_channels[i], 3, padding=1), nn.ReLU()) if i == 0: conv.add_module('satlu', SatLU()) setattr(self, 'conv{}'.format(i), conv) self.scale = scale self.upsample = nn.Upsample(scale_factor=scale) self.maxpool = nn.MaxPool2d(kernel_size=scale, stride=scale) for l in range(self.n_layers - 1): update_A = nn.Sequential( nn.Conv2d(2 * self.a_channels[l], self.a_channels[l + 1], (3, 3), padding=1), self.maxpool) setattr(self, 'update_A{}'.format(l), update_A) self.reset_parameters()
def __init__(self, R_channels, A_channels, output_mode='error'): super(PredNet, self).__init__() self.r_channels = R_channels + (0, ) # for convenience self.a_channels = A_channels self.n_layers = len(R_channels) self.output_mode = output_mode self.prediction_all = [] self.error_all = [] default_output_modes = [ 'prediction', 'error', 'prediction_all', 'error_all' ] assert output_mode in default_output_modes, 'Invalid output_mode: ' + str( output_mode) for i in range(self.n_layers): cell = ConvLSTMCell( 2 * self.a_channels[i] + self.r_channels[i + 1], self.r_channels[i], (3, 3)) setattr(self, 'cell{}'.format(i), cell) for i in range(self.n_layers): conv = nn.Sequential( nn.Conv2d(self.r_channels[i], self.a_channels[i], 3, padding=1), nn.ReLU()) if i == 0: conv.add_module('satlu', SatLU()) setattr(self, 'conv{}'.format(i), conv) self.upsample = nn.Upsample(scale_factor=2) self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) for l in range(self.n_layers - 1): update_A = nn.Sequential( nn.Conv2d(2 * self.a_channels[l], self.a_channels[l + 1], (3, 3), padding=1), self.maxpool) setattr(self, 'update_A{}'.format(l), update_A) self.reset_parameters()
def __init__( self, # input to the model input_channels, # architecture of the model layers_per_block, hidden_channels, skip_stride=None, # scope of convolutional tensor-train layers scope="all", scope_params={}, # parameters of convolutional tensor-train layers cell="convlstm", cell_params={}, # parameters of convolutional operation kernel_size=3, bias=True, # output function and output format output_sigmoid=False): """ Initialization of a Conv-LSTM network. Arguments: ---------- (Hyper-parameters of input interface) input_channels: int The number of channels for input video. Note: 3 for colored video, 1 for gray video. (Hyper-parameters of model architecture) layers_per_block: list of ints Number of Conv-LSTM layers in each block. hidden_channels: list of ints Number of output channels. Note: The length of hidden_channels (or layers_per_block) is equal to number of blocks. skip_stride: int The stride (in term of blocks) of the skip connections default: None, i.e. no skip connection [cell_params: dictionary order: int The recurrent order of convolutional tensor-train cells. default: 3 steps: int The number of previous steps used in the recurrent cells. default: 5 rank: int The tensor-train rank of convolutional tensor-train cells. default: 16 ] (Parameters of convolutional operations) kernel_size: int or (int, int) Size of the (squared) convolutional kernel. default: 3 bias: bool Whether to add bias in the convolutional operation. default: True (Parameters of the output function) output_sigmoid: bool Whether to apply sigmoid function after the output layer. default: False """ super(ConvLSTMNet, self).__init__() ## Hyperparameters self.layers_per_block = layers_per_block self.hidden_channels = hidden_channels self.num_blocks = len(layers_per_block) assert self.num_blocks == len( hidden_channels), "Invalid number of blocks." self.skip_stride = (self.num_blocks + 1) if skip_stride is None else skip_stride self.output_sigmoid = output_sigmoid ## Module type of convolutional LSTM layers if cell == "convlstm": # standard convolutional LSTM Cell = lambda in_channels, out_channels: ConvLSTMCell( input_channels=in_channels, hidden_channels=out_channels, kernel_size=kernel_size, bias=bias) elif cell == "convttlstm": # convolutional tensor-train LSTM Cell = lambda in_channels, out_channels: ConvTTLSTMCell( input_channels=in_channels, hidden_channels=out_channels, order=cell_params["order"], steps=cell_params["steps"], ranks=cell_params["rank"], kernel_size=kernel_size, bias=bias) else: raise NotImplementedError ## Construction of convolutional tensor-train LSTM network # stack the convolutional-LSTM layers with skip connections self.layers = nn.ModuleDict() for b in range(self.num_blocks): for l in range(layers_per_block[b]): # number of input channels to the current layer if l > 0: channels = hidden_channels[b] elif b == 0: # if l == 0 and b == 0: channels = input_channels else: # if l == 0 and b > 0: channels = hidden_channels[b - 1] if b > skip_stride: channels += hidden_channels[b - 1 - skip_stride] lid = "b{}l{}".format(b, l) # layer ID self.layers[lid] = Cell(channels, hidden_channels[b]) # number of input channels to the last layer (output layer) channels = hidden_channels[-1] if self.num_blocks >= self.skip_stride: channels += hidden_channels[-1 - self.skip_stride] self.layers["output"] = nn.Conv2d(channels, input_channels, kernel_size=1, padding=0, bias=True)
class LSTMInCell(nn.Module): def __init__(self, in_channels, out_channels, batch_size=1): super(LSTMInCell, self).__init__() out_channels = out_channels // 6 self.in_channels = in_channels self.out_channels = out_channels self.batch_size = batch_size self.relu = nn.ReLU() self._1x1x1_conv_1 = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1, 1)) self._1x1x1_conv_2 = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1, 1)) self._1x1x1_conv_3 = nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, 1, 1)) self._3x3x3_max_pool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1, 1, 1), ceil_mode=True) self._pad = nn.ConstantPad3d( self.get_padding_shape(filter_shape=(3, 3, 3), stride=(1, 1, 1)), 0) self._1x1x1_lstm_conv = ConvLSTMCell(input_channels=in_channels, hidden_channels=out_channels, kernel_size=1) self._3x3x3_lstm_conv = ConvLSTMCell(input_channels=out_channels, hidden_channels=out_channels * 2, kernel_size=3) self._5x5x5_lstm_conv = ConvLSTMCell(input_channels=out_channels, hidden_channels=out_channels * 2, kernel_size=5) self._1x1x1_lstm_h = None self._1x1x1_lstm_c = None self._3x3x3_lstm_h = None self._3x3x3_lstm_c = None self._5x5x5_lstm_h = None self._5x5x5_lstm_c = None self._1x1x1_conv_h = None def get_padding_shape(self, filter_shape, stride): def _pad_top_bottom(filter_dim, stride_val): pad_along = max(filter_dim - stride_val, 0) pad_top = pad_along // 2 pad_bottom = pad_along - pad_top return pad_top, pad_bottom padding_shape = [] for filter_dim, stride_val in zip(filter_shape, stride): pad_top, pad_bottom = _pad_top_bottom(filter_dim, stride_val) padding_shape.append(pad_top) padding_shape.append(pad_bottom) depth_top = padding_shape.pop(0) depth_bottom = padding_shape.pop(0) padding_shape.append(depth_top) padding_shape.append(depth_bottom) return tuple(padding_shape) def forward(self, x, step=0): if step == 0: self._1x1x1_lstm_h, self._1x1x1_lstm_c = self._1x1x1_lstm_conv.init_hidden( self.batch_size, self.out_channels, (*x.shape[2:], )) self._3x3x3_lstm_h, self._3x3x3_lstm_c = self._3x3x3_lstm_conv.init_hidden( self.batch_size, self.out_channels * 2, (*x.shape[2:], )) self._5x5x5_lstm_h, self._5x5x5_lstm_c = self._5x5x5_lstm_conv.init_hidden( self.batch_size, self.out_channels * 2, (*x.shape[2:], )) self._1x1x1_lstm_h, self._1x1x1_lstm_c = self._1x1x1_lstm_conv( x, self._1x1x1_lstm_h, self._1x1x1_lstm_c) self._3x3x3_lstm_h, self._3x3x3_lstm_c = self._3x3x3_lstm_conv( self.relu(self._1x1x1_conv_1(x)), self._3x3x3_lstm_h, self._3x3x3_lstm_c) self._5x5x5_lstm_h, self._5x5x5_lstm_c = self._5x5x5_lstm_conv( self.relu(self._1x1x1_conv_2(x)), self._5x5x5_lstm_h, self._5x5x5_lstm_c) self._1x1x1_conv_h = self._1x1x1_conv_3( self._3x3x3_max_pool(self._pad(x))) return torch.cat((self._1x1x1_lstm_h, self._3x3x3_lstm_h, self._5x5x5_lstm_h, self._1x1x1_conv_h), 1)