Example #1
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
            modules.add_module(
                "conv_%d" % i,
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module("batch_norm_%d" % i,
                                   nn.BatchNorm2d(filters))
            if module_def["activation"] == "leaky":
                modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                padding = nn.ZeroPad2d((0, 1, 0, 1))
                modules.add_module("_debug_padding_%d" % i, padding)
            maxpool = nn.MaxPool2d(
                kernel_size=int(module_def["size"]),
                stride=int(module_def["stride"]),
                padding=int((kernel_size - 1) // 2),
            )
            modules.add_module("maxpool_%d" % i, maxpool)

        elif module_def["type"] == "upsample":
            # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')  # WARNING: deprecated
            upsample = Upsample(scale_factor=int(module_def['stride']))
            modules.add_module('upsample_%d' % i, upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            #filters = sum([output_filters[layer_i] for layer_i in layers])
            filters = 0
            for layer_i in layers:
                if (layer_i > 0):
                    filters += output_filters[layer_i + 1]
                else:
                    filters += output_filters[layer_i]
            modules.add_module("route_%d" % i, EmptyLayer())

        elif module_def["type"] == "shortcut":
            filters = output_filters[int(module_def["from"])]
            modules.add_module("shortcut_%d" % i, EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)

        elif module_def["type"] == "feed_conv2d":
            filters = int(module_def["anchors_num"]) * 5
            if "out_channel" in module_def:
                filters = int(module_def["out_channel"])
            modules.add_module(
                "feed_conv_%d" % i,
                FeedConv2d(in_channels=output_filters[-1],
                           out_channel_unit=filters,
                           kernel_size=int(module_def["size"]),
                           stride=int(module_def["stride"])),
            )

        elif module_def["type"] == "fyolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = FYOLOLayer(anchors, num_classes, img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)

        elif module_def["type"] == "myolo":
            ratios = [float(x) for x in module_def["ratios"].split(",")]
            scales = [float(x) for x in module_def["scales"].split(",")]
            #ratios=[0.33, 1, 3]
            #scales=[1]
            num_anchors_should = output_filters[-1] / 5
            num_anchors = len(ratios) * len(scales)
            assert num_anchors_should == num_anchors
            anchor_generator = Anchor(ratios, scales)
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = MYOLOLayer(anchor_generator, num_anchors, num_classes,
                                    img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Example #2
0
def clones(module, N):
    "Produce N identical layers from a given module."
    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
 def __init__(self, dictionary, encoders):
     super().__init__(dictionary)
     self.encoders = nn.ModuleList(encoders)
Example #4
0
    def __init__(
        self,
        num_classes,
        width=1.0,
        strides=[8, 16, 32],
        in_channels=[256, 512, 1024],
        act="silu",
        depthwise=False,
    ):
        """
        Args:
            act (str): activation type of conv. Defalut value: "silu".
            depthwise (bool): wheather apply depthwise conv in conv branch. Defalut value: False.
        """
        super().__init__()

        self.n_anchors = 1
        self.num_classes = num_classes
        self.decode_in_inference = True  # for deploy, set to False

        self.cls_convs = nn.ModuleList()
        self.reg_convs = nn.ModuleList()
        self.cls_preds = nn.ModuleList()
        self.reg_preds = nn.ModuleList()
        self.obj_preds = nn.ModuleList()
        self.stems = nn.ModuleList()
        Conv = DWConv if depthwise else BaseConv

        for i in range(len(in_channels)):
            self.stems.append(
                BaseConv(
                    in_channels=int(in_channels[i] * width),
                    out_channels=int(256 * width),
                    ksize=1,
                    stride=1,
                    act=act,
                )
            )
            self.cls_convs.append(
                nn.Sequential(
                    *[
                        Conv(
                            in_channels=int(256 * width),
                            out_channels=int(256 * width),
                            ksize=3,
                            stride=1,
                            act=act,
                        ),
                        Conv(
                            in_channels=int(256 * width),
                            out_channels=int(256 * width),
                            ksize=3,
                            stride=1,
                            act=act,
                        ),
                    ]
                )
            )
            self.reg_convs.append(
                nn.Sequential(
                    *[
                        Conv(
                            in_channels=int(256 * width),
                            out_channels=int(256 * width),
                            ksize=3,
                            stride=1,
                            act=act,
                        ),
                        Conv(
                            in_channels=int(256 * width),
                            out_channels=int(256 * width),
                            ksize=3,
                            stride=1,
                            act=act,
                        ),
                    ]
                )
            )
            self.cls_preds.append(
                nn.Conv2d(
                    in_channels=int(256 * width),
                    out_channels=self.n_anchors * self.num_classes,
                    kernel_size=1,
                    stride=1,
                    padding=0,
                )
            )
            self.reg_preds.append(
                nn.Conv2d(
                    in_channels=int(256 * width),
                    out_channels=4,
                    kernel_size=1,
                    stride=1,
                    padding=0,
                )
            )
            self.obj_preds.append(
                nn.Conv2d(
                    in_channels=int(256 * width),
                    out_channels=self.n_anchors * 1,
                    kernel_size=1,
                    stride=1,
                    padding=0,
                )
            )

        self.use_l1 = False
        self.l1_loss = nn.L1Loss(reduction="none")
        self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none")
        self.iou_loss = IOUloss(reduction="none")
        self.strides = strides
        self.grids = [torch.zeros(1)] * len(in_channels)
        self.expanded_strides = [None] * len(in_channels)
Example #5
0
    def __init__(
        self,
        stages,
        in_channels,
        last_channels,
        out_channels,
        conv_layer=NormConv2d,
        subpixel_upsampling=False,
        n_latent_stages=2,
    ):
        super().__init__()
        self.n_rnb = 2
        self.n_stages = stages
        self.n_latent_stages = n_latent_stages
        self.nin = conv_layer(in_channels, in_channels, kernel_size=1)
        self.blocks = nn.ModuleList()
        self.ups = nn.ModuleList()
        # autoregressive stuff
        self.latent_nins = nn.ModuleDict()
        self.auto_lp = nn.ModuleDict()
        self.auto_blocks = nn.ModuleDict()
        # last conv
        self.out_conv = conv_layer(last_channels,
                                   out_channels,
                                   kernel_size=3,
                                   padding=1)
        # for reordering
        self.depth_to_space = DepthToSpace(block_size=2)
        self.space_to_depth = SpaceToDepth(block_size=2)

        n_latent_channels_in = in_channels

        in_channels = in_channels
        for i in range(self.n_stages):

            for n in range(self.n_rnb // 2):
                self.blocks.append(
                    VunetRNB(
                        channels=in_channels,
                        a_channels=in_channels,
                        residual=True,
                        conv_layer=conv_layer,
                    ))

            if i < self.n_latent_stages:
                scale = f"l_{i}"
                self.latent_nins.update({
                    scale:
                    conv_layer(
                        n_latent_channels_in * 2,
                        n_latent_channels_in,
                        kernel_size=1,
                    )
                })

                # autoregressive_stuff
                clp = ModuleList()
                cb = ModuleList()
                for l in range(4):

                    clp.append(
                        conv_layer(
                            4 * n_latent_channels_in,
                            n_latent_channels_in,
                            kernel_size=3,
                            padding=1,
                        ))
                    if l == 0:
                        cb.append(VunetRNB(channels=n_latent_channels_in))
                    else:
                        cb.append(
                            VunetRNB(
                                channels=4 * n_latent_channels_in,
                                a_channels=n_latent_channels_in,
                                residual=True,
                            ))

                self.auto_lp.update({scale: clp})
                self.auto_blocks.update({scale: cb})

            for n in range(self.n_rnb // 2):
                self.blocks.append(
                    VunetRNB(
                        channels=in_channels,
                        a_channels=in_channels,
                        residual=True,
                        conv_layer=conv_layer,
                    ))

            if i + 1 < self.n_stages:
                out_c = min(in_channels, last_channels * 2**(stages - (i + 2)))
                self.ups.append(
                    Upsample(
                        in_channels,
                        out_c,
                        subpixel=subpixel_upsampling
                        if i < self.n_latent_stages else False,
                    ))
                in_channels = out_c
Example #6
0
def copy_layers(src_layers: nn.ModuleList, dest_layers: nn.ModuleList,
                layers_to_copy: List[int]) -> None:
    layers_to_copy = nn.ModuleList([src_layers[i] for i in layers_to_copy])
    assert len(dest_layers) == len(
        layers_to_copy), f"{len(dest_layers)} != {len(layers_to_copy)}"
    dest_layers.load_state_dict(layers_to_copy.state_dict())
    def __init__(
        self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256, 
        make_tl_layer=None, make_br_layer=None,
        make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
        make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
        make_up_layer=make_layer, make_low_layer=make_layer, 
        make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
        make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
        make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, 
        kp_layer=residual
    ):
        super(exkp, self).__init__()

        self.nstack    = nstack
        self.heads     = heads

        curr_dim = dims[0]

        self.pre = nn.Sequential(
            convolution(7, 3, 128, stride=2),
            residual(3, 128, 256, stride=2)
        ) if pre is None else pre

        self.kps  = nn.ModuleList([
            kp_module(
                n, dims, modules, layer=kp_layer,
                make_up_layer=make_up_layer,
                make_low_layer=make_low_layer,
                make_hg_layer=make_hg_layer,
                make_hg_layer_revr=make_hg_layer_revr,
                make_pool_layer=make_pool_layer,
                make_unpool_layer=make_unpool_layer,
                make_merge_layer=make_merge_layer
            ) for _ in range(nstack)
        ])
        self.cnvs = nn.ModuleList([
            make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
        ])

        self.inters = nn.ModuleList([
            make_inter_layer(curr_dim) for _ in range(nstack - 1)
        ])

        self.inters_ = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
                nn.BatchNorm2d(curr_dim)
            ) for _ in range(nstack - 1)
        ])
        self.cnvs_   = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
                nn.BatchNorm2d(curr_dim)
            ) for _ in range(nstack - 1)
        ])

        ## keypoint heatmaps
        for head in heads.keys():
            if 'hm' in head:
                module =  nn.ModuleList([
                    make_heat_layer(
                        cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
                ])
                self.__setattr__(head, module)
                for heat in self.__getattr__(head):
                    heat[-1].bias.data.fill_(-2.19)
            else:
                module = nn.ModuleList([
                    make_regr_layer(
                        cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
                ])
                self.__setattr__(head, module)


        self.relu = nn.ReLU(inplace=True)
Example #8
0
 def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
     super().__init__()
     self.num_layers = num_layers
     h = [hidden_dim] * (num_layers - 1)
     self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
Example #9
0
def clones(module, n_layers):
    """
    Produce n layers for module
    """
    return nn.ModuleList([copy.deepcopy(module) for _ in range(n_layers)])
Example #10
0
    def __init__(self,
                 ntokens,
                 input_dims,
                 hidden_size,
                 num_heads,
                 attn_dropout,
                 relu_dropout,
                 res_dropout,
                 layers,
                 horizons,
                 attn_mask=False,
                 src_mask=False,
                 tgt_mask=False,
                 crossmodal=False):
        """
        Construct a basic Transfomer model for multimodal tasks.
        
        :param ntokens: The number of unique tokens in text modality.
        :param input_dims: The input dimensions of the various (in this case, 3) modalities.
        :param num_heads: The number of heads to use in the multi-headed attention. 
        :param attn_dropout: The dropout following self-attention sm((QK)^T/d)V.
        :param relu_droput: The dropout for ReLU in residual block.
        :param res_dropout: The dropout of each residual block.
        :param layers: The number of transformer blocks.
        :param attn_mask: A boolean indicating whether to use attention mask (for transformer decoder).
        :param crossmodal: Use Crossmodal Transformer or Not

        l = a, a = b 
        """
        super(TransformerGenerationModel, self).__init__()
        [self.orig_d_l, self.orig_d_a] = input_dims
        assert self.orig_d_l == self.orig_d_a
        self.d_l, self.d_a = self.orig_d_l, self.orig_d_a
        # [self.d_l, self.d_a] = proj_dims

        self.ntokens = ntokens
        # final_out = self.d_l + self.d_a
        # final_out = (self.d_l + self.d_a) * time_step
        # final_out = (self.d_l + self.d_a) *  horizons
        final_out = self.d_l
        h_out = hidden_size
        #         output_dim = 1
        self.num_heads = num_heads
        self.layers = layers
        self.horizons = horizons
        self.attn_dropout = attn_dropout
        self.relu_dropout = relu_dropout
        self.res_dropout = res_dropout
        self.attn_mask = attn_mask  # for encoder
        # self.src_mask = src_mask  # for decoder
        # self.tgt_mask = tgt_mask  # for decoder

        self.crossmodal = crossmodal

        # Transformer networks
        self.trans_encoder = nn.ModuleList(
            [self.get_encoder_network() for i in range(self.horizons)])
        self.trans_decoder = nn.ModuleList(
            [self.get_decoder_network() for i in range(self.horizons)])

        print("Encoder Model size: {0}".format(
            count_parameters(self.trans_encoder)))
        print("Decoder Model size: {0}".format(
            count_parameters(self.trans_decoder)))

        # Projection layers
        self.proj_l = nn.ModuleList(
            [nn.Linear(self.orig_d_l, self.d_l) for i in range(self.horizons)])

        self.proj_a = nn.ModuleList(
            [nn.Linear(self.orig_d_a, self.d_a) for i in range(self.horizons)])

        # self.proj = nn.Linear(final_out, final_out) # Not in the diagram
        self.out_fc1_A = nn.Linear(final_out, h_out)
        self.out_fc1_B = nn.Linear(final_out, h_out)

        self.out_fc2_A = nn.Linear(h_out, final_out)
        self.out_fc2_B = nn.Linear(h_out, final_out)

        self.out_dropout = nn.Dropout(0.5)
Example #11
0
    def __init__(self,
                 ntokens,
                 time_step,
                 input_dims,
                 hidden_size,
                 embed_dim,
                 output_dim,
                 num_heads,
                 attn_dropout,
                 relu_dropout,
                 res_dropout,
                 layers,
                 horizons,
                 attn_mask=False,
                 crossmodal=False):
        """
        Construct a basic Transfomer model for multimodal tasks.
        
        :param ntokens: The number of unique tokens in text modality.
        :param input_dims: The input dimensions of the various (in this case, 3) modalities.
        :param num_heads: The number of heads to use in the multi-headed attention. 
        :param attn_dropout: The dropout following self-attention sm((QK)^T/d)V.
        :param relu_droput: The dropout for ReLU in residual block.
        :param res_dropout: The dropout of each residual block.
        :param layers: The number of transformer blocks.
        :param attn_mask: A boolean indicating whether to use attention mask (for transformer decoder).
        :param crossmodal: Use Crossmodal Transformer or Not
        """
        super(TransformerModel, self).__init__()
        self.cnn = nn.Sequential(
            Conv1d(in_channels=2, out_channels=16, kernel_size=6, stride=2),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=2),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1),
            nn.ReLU(),
            Conv1d(in_channels=128, out_channels=128, kernel_size=3, stride=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2),
            Flatten(),

            # nn.Linear(256*32, 2048),
            # nn.ReLU(),
            # nn.Linear(2048, output_size),
            # nn.Sigmoid()
        )
        [self.orig_d_l, self.orig_d_a] = input_dims
        assert self.orig_d_l == self.orig_d_a
        channels = (((((((
            ((((self.orig_d_l - 6) // 2 + 1 - 2) // 2 + 1 - 3) // 2 + 1 - 2) //
            2 + 1 - 3) // 1 + 1 - 2) // 2 + 1 - 3) // 1 + 1 - 2) // 2 + 1 - 3)
                     // 1 + 1 - 3) // 1 + 1 - 2) // 2 + 1
        self.d_l, self.d_a = 128 * channels // 2, 128 * channels // 2
        self.ntokens = ntokens
        #final_out = (self.orig_d_l + self.orig_d_a) *  horizons
        final_out = embed_dim * 2
        h_out = hidden_size
        self.num_heads = num_heads
        self.layers = layers
        self.horizons = horizons
        self.attn_dropout = attn_dropout
        self.relu_dropout = relu_dropout
        self.res_dropout = res_dropout
        self.attn_mask = attn_mask
        self.embed_dim = embed_dim
        self.crossmodal = crossmodal

        # Transformer networks
        self.trans = nn.ModuleList(
            [self.get_network() for i in range(self.horizons)])
        print("Encoder Model size: {0}".format(count_parameters(self.trans)))
        # Projection layers
        self.proj_l = nn.ModuleList([
            nn.Linear(self.d_l, self.embed_dim) for i in range(self.horizons)
        ])
        self.proj_a = nn.ModuleList([
            nn.Linear(self.d_a, self.embed_dim) for i in range(self.horizons)
        ])

        # self.proj = nn.Linear(final_out, final_out) # Not in the diagram
        self.out_fc1 = nn.Linear(final_out, h_out)

        self.out_fc2 = nn.Linear(h_out, output_dim)

        self.out_dropout = nn.Dropout(0.5)
Example #12
0
 def __init__(self, blocks):
     super(ProgressiveGenerator, self).__init__()
     self.blocks = nn.ModuleList(blocks)
     self.cur_block = 0
     self.alpha = 1.
Example #13
0
 def __init__(self, blocks):
     super(ProgressiveDiscriminator, self).__init__()
     self.blocks = nn.ModuleList(blocks)
     self.cur_block = len(self.blocks) - 1
     self.alpha = 1.
    def __init__(self,
                 n_layers=12,
                 channels_interval=24,
                 kernel_size_in_encoder=15,
                 kernel_size_in_decoder=5,
                 dilation_in_encoder=None,
                 dilation_in_decoder=None):
        super(UNet, self).__init__()
        #TODO 为什么调换 kernel_size_in_encoder 与 kernel_size_in_decoder 会使参数量激增 400 W

        if dilation_in_encoder:
            print(f"当前模型将在 **降采样层** 中使用膨胀卷积:{dilation_in_encoder}")

        if dilation_in_decoder:
            print(f"当前模型将在 **升采样层** 中使用膨胀卷积:{dilation_in_decoder}")

        self.n_layers = n_layers
        self.channels_interval = channels_interval
        encoder_in_channels_list = [1] + [i * self.channels_interval for i in range(1, self.n_layers)]
        encoder_out_channels_list = [i * self.channels_interval for i in range(1, self.n_layers + 1)]

        #          1    => 2    => 3    => 4    => 5    => 6   => 7   => 8   => 9  => 10 => 11 =>12
        # 16384 => 8192 => 4096 => 2048 => 1024 => 512 => 256 => 128 => 64 => 32 => 16 =>  8 => 4
        self.encoder = nn.ModuleList()
        for i in range(self.n_layers):
            dilated_rate = None
            if (i + 1) in dilation_in_encoder["layers"]:
                index_in_dilated_rates = dilation_in_encoder["layers"].index(i + 1)
                dilated_rate = dilation_in_encoder["dilated_rates"][index_in_dilated_rates]

            self.encoder.append(
                DownSamplingLayer(
                    channel_in=encoder_in_channels_list[i],
                    channel_out=encoder_out_channels_list[i],
                    kernel_size=kernel_size_in_encoder,
                    dilation=dilated_rate if dilated_rate else 1,
                    padding=calculate_same_padding(
                        l_in=encoder_in_channels_list[i],
                        kernel_size=kernel_size_in_encoder,
                        stride=1,
                        dilation=dilated_rate if dilated_rate else 1
                    ),
                )
            )

        self.middle = nn.Sequential(
            nn.Conv1d(self.n_layers * self.channels_interval, self.n_layers * self.channels_interval, 15, stride=1,
                      padding=7),
            nn.BatchNorm1d(self.n_layers * self.channels_interval),
            nn.LeakyReLU(negative_slope=0.1, inplace=True)
        )

        decoder_in_channels_list = [(2 * i + 1) * self.channels_interval for i in range(1, self.n_layers)] + [
            2 * self.n_layers * self.channels_interval]
        decoder_in_channels_list = decoder_in_channels_list[::-1]
        decoder_out_channels_list = encoder_out_channels_list[::-1]
        self.decoder = nn.ModuleList()


        for i in range(self.n_layers):
            dilated_rate = None

            if (i + 1) in dilation_in_decoder["layers"]:
                index_in_dilated_rates = dilation_in_decoder["layers"].index(i + 1)
                dilated_rate = dilation_in_decoder["dilated_rates"][index_in_dilated_rates]

            self.decoder.append(
                UpSamplingLayer(
                    channel_in=decoder_in_channels_list[i],
                    channel_out=decoder_out_channels_list[i],
                    kernel_size=kernel_size_in_decoder,
                    dilation=dilated_rate if dilated_rate else 1,
                    padding=calculate_same_padding(
                        l_in=encoder_in_channels_list[i],
                        kernel_size=kernel_size_in_decoder,
                        stride=1,
                        dilation=dilated_rate if dilated_rate else 1
                    ),
                )
            )

        self.out = nn.Sequential(
            nn.Conv1d(1 + self.channels_interval, 1, kernel_size=1, stride=1),
            nn.Tanh()
        )
Example #15
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    # module_defs中第一个字典块保存了net信息,获取网络输入、预处理等超参数相关信息
    output_filters = [int(hyperparams["channels"])]
    # 初始值对应于输入数据3通道,用来存储我们需要持续追踪被应用卷积层的卷积核数量(上一层的卷积核数量(或特征图深度))
    # 我们不仅需要追踪前一层的卷积核数量,还需要追踪之前每个层。随着不断地迭代,我们将每个模块的输出卷积核数量添加到 output_filters 列表上。

    module_list = nn.ModuleList()
    # module_list用于存储每个block,每个block对应cfg文件中一个块,类似[convolutional]里面就对应一个卷积块
    for module_i, module_def in enumerate(module_defs):
        #enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据下标和数据,一般用在 for 循环当中
        modules = nn.Sequential()
        # 这里每个块用nn.sequential()创建为了一个module,一个module有多个层

        if module_def["type"] == "convolutional":
            #需要获取卷积层、批归一化层、激活层参数
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])#output_channel
            kernel_size = int(module_def["size"])#卷积核大小
            pad = (kernel_size - 1) // 2 #边界填充数量
            modules.add_module(
                f"conv_{module_i}",
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
            if module_def["activation"] == "leaky":
                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))# 给定参数负轴系数0.1

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
            modules.add_module(f"maxpool_{module_i}", maxpool)

        elif module_def["type"] == "upsample":
            upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
            # 没有使用 Bilinear2dUpsampling,实际使用的为最近邻插值
            modules.add_module(f"upsample_{module_i}", upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[1:][i] for i in layers])
            modules.add_module(f"route_{module_i}", EmptyLayer())

        # elif (x["type"] == "route"):
        #     x["layers"] = x["layers"].split(',')
        #     # Start  of a route
        #     start = int(x["layers"][0])
        #     # end, if there exists one.
        #     try:
        #         end = int(x["layers"][1])
        #     except:
        #         end = 0
        #     # Positive anotation: 正值
        #     if start > 0:
        #         start = start - index
        #     if end > 0:  # 若end>0,由于end= end - index,再执行index + end输出的还是第end层的特征
        #         end = end - index
        #     route = EmptyLayer()
        #     module.add_module("route_{0}".format(index), route)
        #     if end < 0:  # 若end<0,则end还是end,输出index+end(而end<0)故index向后退end层的特征。
        #         filters = output_filters[index + start] + output_filters[index + end]
        #     else:  # 如果没有第二个参数,end=0,则对应下面的公式,此时若start>0,由于start = start - index,
        #     #再执行index + start输出的还是第start层的特征;若start<0,则start还是start,输出index+start(而start<0)故index向后退start层的特征。
        #         filters = output_filters[index + start]

        elif module_def["type"] == "shortcut":
            filters = output_filters[1:][int(module_def["from"])]
            modules.add_module(f"shortcut_{module_i}", EmptyLayer())
            # 使用空的层,因为它还要执行一个非常简单的操作(加)。没必要更新 filters 变量,因为它只是将前一层的特征图添加到后面的层上而已。

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_size = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
            # 锚点,检测,位置回归,分类,这个类见predict_transform中
            modules.add_module(f"yolo_{module_i}", yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
    def __init__(self, in_dim, out_dim, args, mean_std=None):
        super(Model, self).__init__()

        ##### required part, no need to change #####

        # mean std of input and output
        in_m, in_s, out_m, out_s = self.prepare_mean_std(in_dim,out_dim,\
                                                         args, mean_std)
        self.input_mean = torch_nn.Parameter(in_m, requires_grad=False)
        self.input_std = torch_nn.Parameter(in_s, requires_grad=False)
        self.output_mean = torch_nn.Parameter(out_m, requires_grad=False)
        self.output_std = torch_nn.Parameter(out_s, requires_grad=False)
        
        # a flag for debugging (by default False)
        # self.model_debug = False
        # self.flag_validation = False
        #####
        
        ####
        # on input waveform and output target
        ####
        # Load protocol and prepare the target data for network training
        protocol_file = prj_conf.optional_argument[0]
        self.protocol_parser = protocol_parse(protocol_file)
        
        # Working sampling rate
        #  torchaudio may be used to change sampling rate
        self.m_target_sr = 16000

        ####
        # optional configs (not used)
        ####                
        # re-sampling (optional)
        #self.m_resampler = torchaudio.transforms.Resample(
        #    prj_conf.wav_samp_rate, self.m_target_sr)

        # vad (optional)
        #self.m_vad = torchaudio.transforms.Vad(sample_rate = self.m_target_sr)
        
        # flag for balanced class (temporary use)
        #self.v_flag = 1

        ####
        # front-end configuration
        #  multiple front-end configurations may be used
        #  by default, use a single front-end
        ####    
        # frame shift (number of waveform points)
        self.frame_hops = [160]
        # frame length
        self.frame_lens = [320]
        # FFT length
        self.fft_n = [512]

        # spectrogram dim (base component)
        self.spec_with_delta = False
        self.spec_fb_dim = 60

        # window type
        self.win = torch.hann_window
        # floor in log-spectrum-amplitude calculating (not used)
        self.amp_floor = 0.00001
        
        # number of frames to be kept for each trial
        # no truncation
        self.v_truncate_lens = [None for x in self.frame_hops]


        # number of sub-models (by default, a single model)
        self.v_submodels = len(self.frame_lens)        

        # dimension of embedding vectors
        self.v_emd_dim = 64

        # output classes
        self.v_out_class = 1

        ####
        # create network
        ####
        # 1st part of the classifier
        self.m_transform = []
        # pooling layer
        self.m_pooling = []
        # 2nd part of the classifier
        self.m_output_act = []
        # front-end
        self.m_frontend = []
        # final part for output layer
        self.m_angle = []
        

        # it can handle models with multiple front-end configuration
        # by default, only a single front-end
        for idx, (trunc_len, fft_n) in enumerate(zip(
                self.v_truncate_lens, self.fft_n)):
            
            fft_n_bins = fft_n // 2 + 1
            
            self.m_transform.append(
                torch_nn.Sequential(
                    TrainableLinearFb(fft_n,self.m_target_sr,self.spec_fb_dim),

                    torch_nn.Conv2d(1, 64, [5, 5], 1, padding=[2, 2]),
                    nii_nn.MaxFeatureMap2D(),
                    torch.nn.MaxPool2d([2, 2], [2, 2]),

                    torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(32, affine=False),
                    torch_nn.Conv2d(32, 96, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),

                    torch.nn.MaxPool2d([2, 2], [2, 2]),
                    torch_nn.BatchNorm2d(48, affine=False),

                    torch_nn.Conv2d(48, 96, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(48, affine=False),
                    torch_nn.Conv2d(48, 128, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),

                    torch.nn.MaxPool2d([2, 2], [2, 2]),

                    torch_nn.Conv2d(64, 128, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(64, affine=False),
                    torch_nn.Conv2d(64, 64, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(32, affine=False),

                    torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.BatchNorm2d(32, affine=False),
                    torch_nn.Conv2d(32, 64, [3, 3], 1, padding=[1, 1]),
                    nii_nn.MaxFeatureMap2D(),
                    torch_nn.MaxPool2d([2, 2], [2, 2]),
                    
                    torch_nn.Dropout(0.7)
                )
            )

            self.m_pooling.append(
                nii_nn.SelfWeightedPooling((self.spec_fb_dim // 16) * 32)
            )

            self.m_output_act.append(
                torch_nn.Linear((self.spec_fb_dim//16) * 32 * 2, self.v_emd_dim)
            )

            self.m_angle.append(
                nii_ocsoftmax.OCAngleLayer(self.v_emd_dim)
            )
            
            self.m_frontend.append(
                nii_front_end.Spectrogram(self.frame_lens[idx],
                                          self.frame_hops[idx],
                                          self.fft_n[idx],
                                          self.m_target_sr)
            )

        self.m_frontend = torch_nn.ModuleList(self.m_frontend)
        self.m_transform = torch_nn.ModuleList(self.m_transform)
        self.m_output_act = torch_nn.ModuleList(self.m_output_act)
        self.m_pooling = torch_nn.ModuleList(self.m_pooling)
        self.m_angle = torch_nn.ModuleList(self.m_angle)

        # done
        return
Example #17
0
    def __init__(self, block_name, depth, num_classes):
        super(SearchShapeCifarResNet, self).__init__()

        # Model type specifies number of layers for CIFAR-10 and CIFAR-100 model
        if block_name == "ResNetBasicblock":
            block = ResNetBasicblock
            assert (depth - 2) % 6 == 0, "depth should be one of 20, 32, 44, 56, 110"
            layer_blocks = (depth - 2) // 6
        elif block_name == "ResNetBottleneck":
            block = ResNetBottleneck
            assert (depth - 2) % 9 == 0, "depth should be one of 164"
            layer_blocks = (depth - 2) // 9
        else:
            raise ValueError("invalid block : {:}".format(block_name))

        self.message = (
            "SearchShapeCifarResNet : Depth : {:} , Layers for each block : {:}".format(
                depth, layer_blocks
            )
        )
        self.num_classes = num_classes
        self.channels = [16]
        self.layers = nn.ModuleList(
            [
                ConvBNReLU(
                    3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True
                )
            ]
        )
        self.InShape = None
        self.depth_info = OrderedDict()
        self.depth_at_i = OrderedDict()
        for stage in range(3):
            cur_block_choices = get_depth_choices(layer_blocks, False)
            assert (
                cur_block_choices[-1] == layer_blocks
            ), "stage={:}, {:} vs {:}".format(stage, cur_block_choices, layer_blocks)
            self.message += (
                "\nstage={:} ::: depth-block-choices={:} for {:} blocks.".format(
                    stage, cur_block_choices, layer_blocks
                )
            )
            block_choices, xstart = [], len(self.layers)
            for iL in range(layer_blocks):
                iC = self.channels[-1]
                planes = 16 * (2 ** stage)
                stride = 2 if stage > 0 and iL == 0 else 1
                module = block(iC, planes, stride)
                self.channels.append(module.out_dim)
                self.layers.append(module)
                self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format(
                    stage,
                    iL,
                    layer_blocks,
                    len(self.layers) - 1,
                    iC,
                    module.out_dim,
                    stride,
                )
                # added for depth
                layer_index = len(self.layers) - 1
                if iL + 1 in cur_block_choices:
                    block_choices.append(layer_index)
                if iL + 1 == layer_blocks:
                    self.depth_info[layer_index] = {
                        "choices": block_choices,
                        "stage": stage,
                        "xstart": xstart,
                    }
        self.depth_info_list = []
        for xend, info in self.depth_info.items():
            self.depth_info_list.append((xend, info))
            xstart, xstage = info["xstart"], info["stage"]
            for ilayer in range(xstart, xend + 1):
                idx = bisect_right(info["choices"], ilayer - 1)
                self.depth_at_i[ilayer] = (xstage, idx)

        self.avgpool = nn.AvgPool2d(8)
        self.classifier = nn.Linear(module.out_dim, num_classes)
        self.InShape = None
        self.tau = -1
        self.search_mode = "basic"
        # assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth)

        # parameters for width
        self.Ranges = []
        self.layer2indexRange = []
        for i, layer in enumerate(self.layers):
            start_index = len(self.Ranges)
            self.Ranges += layer.get_range()
            self.layer2indexRange.append((start_index, len(self.Ranges)))
        assert len(self.Ranges) + 1 == depth, "invalid depth check {:} vs {:}".format(
            len(self.Ranges) + 1, depth
        )

        self.register_parameter(
            "width_attentions",
            nn.Parameter(torch.Tensor(len(self.Ranges), get_width_choices(None))),
        )
        self.register_parameter(
            "depth_attentions",
            nn.Parameter(torch.Tensor(3, get_depth_choices(layer_blocks, True))),
        )
        nn.init.normal_(self.width_attentions, 0, 0.01)
        nn.init.normal_(self.depth_attentions, 0, 0.01)
        self.apply(initialize_resnet)
Example #18
0
 def __init__(self, layers_size):
     super(Net, self).__init__()
     self.linear_layer_list = nn.ModuleList([
         nn.Linear(layers_size[i], layers_size[i + 1])
         for i in range(len(layers_size) - 1)
     ])
Example #19
0
    def _prepare_module(self):

        d = OrderedDict()

        #conv1 - batch_norm1 - leaky_relu1 - pool1
        d['conv1'] = ConvBnAct(3, 32, 3, stride=1, padding=1)
        d['pool1'] = max_pool(2, 2)

        #conv2 - batch_norm2 - leaky_relu2 - pool2
        d['conv2'] = ConvBnAct(32, 64, 3, stride=1, padding=1)
        d['pool2'] = max_pool(2, 2)

        #conv3 - batch_norm3 - leaky_relu3
        d['conv3'] = ConvBnAct(64, 128, 3, stride=1, padding=1)

        #conv4 - batch_norm4 - leaky_relu4
        d['conv4'] = ConvBnAct(128, 64, 1, stride=1, padding=0)

        #conv5 - batch_norm5 - leaky_relu5 - pool5
        d['conv5'] = ConvBnAct(64, 128, 3, stride=1, padding=1)
        d['pool5'] = max_pool(2, 2)

        #conv6 - batch_norm6 - leaky_relu6
        d['conv6'] = ConvBnAct(128, 256, 3, stride=1, padding=1)

        #conv7 - batch_norm7 - leaky_relu7
        d['conv7'] = ConvBnAct(256, 128, 1, stride=1, padding=0)

        #conv8 - batch_norm8 - leaky_relu8 - pool8
        d['conv8'] = ConvBnAct(128, 256, 3, stride=1, padding=1)
        d['pool8'] = max_pool(2, 2)

        #conv9 - batch_norm9 - leaky_relu9
        d['conv9'] = ConvBnAct(256, 512, 3, stride=1, padding=1)

        #conv10 - batch_norm10 - leaky_relu10
        d['conv10'] = ConvBnAct(512, 256, 1, stride=1, padding=0)

        #conv11 - batch_norm11 - leaky_relu11
        d['conv11'] = ConvBnAct(256, 512, 3, stride=1, padding=1)

        #conv12 - batch_norm12 - leaky_relu12
        d['conv12'] = ConvBnAct(512, 256, 1, stride=1, padding=0)

        #conv13 - batch_norm13 - leaky_relu13 - pool13
        d['conv13'] = ConvBnAct(256, 512, 3, stride=1, padding=1)
        d['pool13'] = max_pool(2, 2)

        #conv14 - batch_norm14 - leaky_relu14
        d['conv14'] = ConvBnAct(512, 1024, 3, stride=1, padding=1)

        #conv15 - batch_norm15 - leaky_relu15
        d['conv15'] = ConvBnAct(1024, 512, 1, stride=1, padding=0)

        #conv16 - batch_norm16 - leaky_relu16
        d['conv16'] = ConvBnAct(512, 1024, 3, stride=1, padding=1)

        #conv17 - batch_norm16 - leaky_relu17
        d['conv17'] = ConvBnAct(1024, 512, 1, stride=1, padding=0)

        #conv18 - batch_norm18 - leaky_relu18
        d['conv18'] = ConvBnAct(512, 1024, 3, stride=1, padding=1)

        #conv19 - batch_norm19 - leaky_relu19
        d['conv19'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1)

        # Detection Layer
        #conv20 - batch_norm20 - leaky_relu20
        d['conv20'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1)

        # concatenate layer20 and layer 13 using space to depth
        d['skip_connection'] = nn.Sequential(
            ConvBnAct(512, 64, 1, stride=1, padding=0), SpaceToDepth(2))
        d['conv21'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1)

        #conv22 - batch_norm22 - leaky_relu22
        d['conv22'] = ConvBnAct(1280, 1024, 3, stride=1, padding=1)

        output_channel = self.num_anchors * (5 + self.num_classes)
        d['logits'] = conv2d(1024,
                             output_channel,
                             1,
                             stride=1,
                             padding=0,
                             bias=True)

        self.module = nn.ModuleList()
        for i in d.values():
            self.module.append(i)
        return d
    def __init__(self, in_features_num, num_anchors=9, num_classes=80, 
                 features_num=256, layers_num=3, num_pyramid_levels=5, head_type='simple', act_type='relu', share_weights=True, 
                 conv_kernel_size=3, conv_stride=1, conv_padding=1, 
                 onnx_export=ONNX_EXPORT, **kwargs):
        assert head_type in ['simple', 'efficient']
        assert act_type in ['relu', 'swish']
        super(Classifier, self).__init__()
        self.convert_onnx = False
        self.pyramid_sizes = None

        if isinstance(conv_kernel_size, list):
            conv_kernel_size = tuple(conv_kernel_size)
        if isinstance(conv_padding, list):
            conv_padding = tuple(conv_padding)
        self.num_anchors = num_anchors
        self.num_classes = num_classes
        self.layers_num = layers_num
        self.num_pyramid_levels = num_pyramid_levels
        self.share_weights = share_weights

        logger = kwargs.get('logger', None)
        if logger:
            logger.info(f'==== Build Head Layer ====================')
            logger.info(f'Head Type     : Classification ({head_type} + {act_type})')
            logger.info(f'Features Num  : {features_num}')
            logger.info(f'Anchors Num   : {num_anchors}')
            logger.info(f'Layers Num    : {layers_num}')
            logger.info(f'Share Weights : {share_weights}')
            logger.info(f'Conv Kernel Size : {conv_kernel_size}')
            logger.info(f'Conv Padding     : {conv_padding}')
            logger.info(f'Conv Stride      : {conv_stride}')

        _conv_block = SeparableConvBlock if head_type == 'efficient' else nn.Conv2d
        _conv_kwargs = {'kernel_size': conv_kernel_size, 'stride': conv_stride, 'padding': conv_padding}
        if head_type == 'efficient':
            _conv_kwargs.update({'norm': False, 'activation': False})
        #self.conv_list = nn.ModuleList(
        #    [_conv_block(in_features_num if i == 0 else features_num, 
        #                 features_num, **_conv_kwargs) for i in range(layers_num)])
        #self.bn_list = nn.ModuleList(
        #    [nn.ModuleList([nn.BatchNorm2d(features_num, momentum=0.01, eps=1e-3) 
        #                    for i in range(layers_num)]) for j in range(num_pyramid_levels)])

        #self.header = _conv_block(features_num, num_anchors * num_classes, **_conv_kwargs)
        if share_weights:
            self.conv_tower = nn.ModuleList([nn.ModuleList([_conv_block(in_features_num if i == 0 else features_num, 
                                                           features_num, **_conv_kwargs) for i in range(layers_num)])])
            self.header = nn.ModuleList([_conv_block(features_num, num_anchors * num_classes, **_conv_kwargs)])
        else:
            sub_conv_towers = list()
            for p in range(num_pyramid_levels):
                sub_conv_towers.append(nn.ModuleList([_conv_block(in_features_num if i == 0 else features_num, 
                                       features_num, **_conv_kwargs) for i in range(layers_num)]))
            self.conv_tower = nn.ModuleList(sub_conv_towers)
            self.header = nn.ModuleList([_conv_block(features_num, num_anchors * num_classes, **_conv_kwargs)
                                         for p in range(num_pyramid_levels)])
        self.bn_modules = nn.ModuleList(
            [nn.ModuleList([nn.BatchNorm2d(features_num, momentum=0.01, eps=1e-3) 
                            for i in range(layers_num)]) for j in range(num_pyramid_levels)])

        if act_type == 'swish':
            self.act_fn = MemoryEfficientSwish() if not onnx_export else Swish()
        else:
            self.act_fn = nn.ReLU()
        
        self.header_act = nn.Sigmoid()

        self._initialize_weights(logger=logger)
Example #21
0
 def create_network(self, blocks):
     models = nn.ModuleList()
 
     prev_filters = 3
     out_filters =[]
     conv_id = 0
     for block in blocks:
         if block['type'] == 'net':
             prev_filters = int(block['channels'])
             continue
         elif block['type'] == 'convolutional':
             conv_id = conv_id + 1
             batch_normalize = int(block['batch_normalize'])
             filters = int(block['filters'])
             kernel_size = int(block['size'])
             stride = int(block['stride'])
             is_pad = int(block['pad'])
             pad = (kernel_size-1)//2 if is_pad else 0
             activation = block['activation']
             model = nn.Sequential()
             if batch_normalize:
                 model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
                 model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters, eps=1e-4))
                 #model.add_module('bn{0}'.format(conv_id), BN2d(filters))
             else:
                 model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
             if activation == 'leaky':
                 model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
             elif activation == 'relu':
                 model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
             prev_filters = filters
             out_filters.append(prev_filters)
             models.append(model)
         elif block['type'] == 'maxpool':
             pool_size = int(block['size'])
             stride = int(block['stride'])
             if stride > 1:
                 model = nn.MaxPool2d(pool_size, stride)
             else:
                 model = MaxPoolStride1()
             out_filters.append(prev_filters)
             models.append(model)
         elif block['type'] == 'avgpool':
             model = GlobalAvgPool2d()
             out_filters.append(prev_filters)
             models.append(model)
         elif block['type'] == 'softmax':
             model = nn.Softmax()
             out_filters.append(prev_filters)
             models.append(model)
         elif block['type'] == 'cost':
             if block['_type'] == 'sse':
                 model = nn.MSELoss(size_average=True)
             elif block['_type'] == 'L1':
                 model = nn.L1Loss(size_average=True)
             elif block['_type'] == 'smooth':
                 model = nn.SmoothL1Loss(size_average=True)
             out_filters.append(1)
             models.append(model)
         elif block['type'] == 'reorg':
             stride = int(block['stride'])
             prev_filters = stride * stride * prev_filters
             out_filters.append(prev_filters)
             models.append(Reorg(stride))
         elif block['type'] == 'route':
             layers = block['layers'].split(',')
             ind = len(models)
             layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
             if len(layers) == 1:
                 prev_filters = out_filters[layers[0]]
             elif len(layers) == 2:
                 assert(layers[0] == ind - 1)
                 prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
             out_filters.append(prev_filters)
             models.append(EmptyModule())
         elif block['type'] == 'shortcut':
             ind = len(models)
             prev_filters = out_filters[ind-1]
             out_filters.append(prev_filters)
             models.append(EmptyModule())
         elif block['type'] == 'connected':
             filters = int(block['output'])
             if block['activation'] == 'linear':
                 model = nn.Linear(prev_filters, filters)
             elif block['activation'] == 'leaky':
                 model = nn.Sequential(
                            nn.Linear(prev_filters, filters),
                            nn.LeakyReLU(0.1, inplace=True))
             elif block['activation'] == 'relu':
                 model = nn.Sequential(
                            nn.Linear(prev_filters, filters),
                            nn.ReLU(inplace=True))
             prev_filters = filters
             out_filters.append(prev_filters)
             models.append(model)
         elif block['type'] == 'region':
             loss = RegionLoss()
             anchors = block['anchors'].split(',')
             if anchors == ['']:
                 loss.anchors = []
             else:
                 loss.anchors = [float(i) for i in anchors]
             loss.num_classes = int(block['classes'])
             loss.num_anchors = int(block['num'])
             loss.anchor_step = len(loss.anchors)//loss.num_anchors
             loss.object_scale = float(block['object_scale'])
             loss.noobject_scale = float(block['noobject_scale'])
             loss.class_scale = float(block['class_scale'])
             loss.coord_scale = float(block['coord_scale'])
             out_filters.append(prev_filters)
             models.append(loss)
         else:
             print('unknown type %s' % (block['type']))
 
     return models
    def __init__(self, mem_slots, head_size, input_size, num_heads=1, num_blocks=1, forget_bias=1., input_bias=0.,
                 gate_style='unit', attention_mlp_layers=2, key_size=None, return_all_outputs=False):
        super(RelationalMemory, self).__init__()

        ########## generic parameters for RMC ##########
        self.mem_slots = mem_slots
        self.head_size = head_size
        self.num_heads = num_heads
        self.mem_size = self.head_size * self.num_heads

        # a new fixed params needed for pytorch port of RMC
        # +1 is the concatenated input per time step : we do self-attention with the concatenated memory & input
        # so if the mem_slots = 1, this value is 2
        self.mem_slots_plus_input = self.mem_slots + 1

        if num_blocks < 1:
            raise ValueError(
                'num_blocks must be >=1. Got: {}.'.format(num_blocks))
        self.num_blocks = num_blocks

        if gate_style not in ['unit', 'memory', None]:
            raise ValueError(
                'gate_style must be one of [\'unit\', \'memory\', None]. got: '
                '{}.'.format(gate_style))
        self.gate_style = gate_style

        if attention_mlp_layers < 1:
            raise ValueError('attention_mlp_layers must be >= 1. Got: {}.'.format(
                attention_mlp_layers))
        self.attention_mlp_layers = attention_mlp_layers

        self.key_size = key_size if key_size else self.head_size

        ########## parameters for multihead attention ##########
        # value_size is same as head_size
        self.value_size = self.head_size
        # total size for query-key-value
        self.qkv_size = 2 * self.key_size + self.value_size
        self.total_qkv_size = self.qkv_size * self.num_heads  # denoted as F

        # each head has qkv_sized linear projector
        # just using one big param is more efficient, rather than this line
        # self.qkv_projector = [nn.Parameter(torch.randn((self.qkv_size, self.qkv_size))) for _ in range(self.num_heads)]
        self.qkv_projector = nn.Linear(self.mem_size, self.total_qkv_size)
        self.qkv_layernorm = nn.LayerNorm(
            [self.mem_slots_plus_input, self.total_qkv_size])

        # used for attend_over_memory function
        self.attention_mlp = nn.ModuleList(
            [nn.Linear(self.mem_size, self.mem_size)] * self.attention_mlp_layers)
        self.attended_memory_layernorm = nn.LayerNorm(
            [self.mem_slots_plus_input, self.mem_size])
        self.attended_memory_layernorm2 = nn.LayerNorm(
            [self.mem_slots_plus_input, self.mem_size])

        ########## parameters for initial embedded input projection ##########
        self.input_size = input_size
        self.input_projector = nn.Linear(self.input_size, self.mem_size)

        ########## parameters for gating ##########
        self.num_gates = 2 * self.calculate_gate_size()
        self.input_gate_projector = nn.Linear(self.mem_size, self.num_gates)
        self.memory_gate_projector = nn.Linear(self.mem_size, self.num_gates)
        # trainable scalar gate bias tensors
        self.forget_bias = nn.Parameter(
            torch.tensor(forget_bias, dtype=torch.float32))
        self.input_bias = nn.Parameter(
            torch.tensor(input_bias, dtype=torch.float32))

        ########## number of outputs returned #####
        self.return_all_outputs = return_all_outputs
Example #23
0
    def __init__(self, params, dico, with_output):
        """
        Transformer model (encoder or decoder).
        """
        super().__init__()

        # encoder / decoder, output layer
        self.with_output = with_output

        # dictionary / languages
        self.n_words = params.tgt_n_words

        self.eos_index = params.eos_index
        self.pad_index = params.pad_index
        self.dico = dico
        assert len(self.dico) == self.n_words

        # model parameters
        self.dim = params.emb_dim  # 512 by default
        self.hidden_dim = self.dim * 4  # 2048 by default
        self.n_heads = params.n_heads  # 8 by default
        self.n_layers = params.dec_n_layers
        self.dropout = params.dropout
        self.attention_dropout = params.attention_dropout
        assert self.dim % self.n_heads == 0, 'transformer dim must be a multiple of n_heads'

        # embeddings
        self.position_embeddings = Embedding(N_MAX_POSITIONS, self.dim)
        if params.sinusoidal_embeddings:
            create_sinusoidal_embeddings(N_MAX_POSITIONS,
                                         self.dim,
                                         out=self.position_embeddings.weight)
        self.embeddings = Embedding(self.n_words,
                                    self.dim,
                                    padding_idx=self.pad_index)
        self.layer_norm_emb = nn.LayerNorm(self.dim, eps=1e-12)

        # transformer layers
        self.attentions = nn.ModuleList()
        self.layer_norm1 = nn.ModuleList()
        self.ffns = nn.ModuleList()
        self.layer_norm2 = nn.ModuleList()
        self.layer_norm15 = nn.ModuleList()
        self.encoder_attn = nn.ModuleList()

        for _ in range(self.n_layers):
            self.attentions.append(
                MultiHeadAttention(self.n_heads,
                                   self.dim,
                                   dropout=self.attention_dropout))
            self.layer_norm1.append(nn.LayerNorm(self.dim, eps=1e-12))
            self.layer_norm15.append(nn.LayerNorm(self.dim, eps=1e-12))
            self.encoder_attn.append(
                MultiHeadAttention(self.n_heads,
                                   self.dim,
                                   dropout=self.attention_dropout))
            self.ffns.append(
                TransformerFFN(self.dim,
                               self.hidden_dim,
                               self.dim,
                               dropout=self.dropout,
                               gelu_activation=params.gelu_activation))
            self.layer_norm2.append(nn.LayerNorm(self.dim, eps=1e-12))

        if self.with_output:
            self.pred_layer = PredLayer(params)
            if params.share_inout_emb:
                self.pred_layer.proj.weight = self.embeddings.weight
Example #24
0
    def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False):
        super().__init__(dictionary)
        self.register_buffer('version', torch.Tensor([3]))

        self.dropout = args.dropout
        self.decoder_layerdrop = args.decoder_layerdrop
        self.share_input_output_embed = args.share_decoder_input_output_embed

        input_embed_dim = embed_tokens.embedding_dim
        embed_dim = args.decoder_embed_dim
        self.output_embed_dim = args.decoder_output_dim

        self.padding_idx = embed_tokens.padding_idx
        self.max_target_positions = args.max_target_positions

        self.embed_tokens = embed_tokens

        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)

        self.project_in_dim = Linear(input_embed_dim, embed_dim, bias=False) if embed_dim != input_embed_dim else None

        self.embed_positions = PositionalEmbedding(
            args.max_target_positions, embed_dim, self.padding_idx,
            learned=args.decoder_learned_pos,
        ) if not args.no_token_positional_embeddings else None

        self.cross_self_attention = getattr(args, 'cross_self_attention', False)
        self.layer_wise_attention = getattr(args, 'layer_wise_attention', False)

        self.layers = nn.ModuleList([])
        self.layers.extend([
            TransformerDecoderLayer(args, no_encoder_attn)
            for _ in range(args.decoder_layers)
        ])

        self.adaptive_softmax = None

        self.project_out_dim = Linear(embed_dim, self.output_embed_dim, bias=False) \
            if embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None

        if args.adaptive_softmax_cutoff is not None:
            self.adaptive_softmax = AdaptiveSoftmax(
                len(dictionary),
                self.output_embed_dim,
                options.eval_str_list(args.adaptive_softmax_cutoff, type=int),
                dropout=args.adaptive_softmax_dropout,
                adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None,
                factor=args.adaptive_softmax_factor,
                tie_proj=args.tie_adaptive_proj,
            )
        elif not self.share_input_output_embed:
            self.embed_out = nn.Parameter(torch.Tensor(len(dictionary), self.output_embed_dim))
            nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5)

        if args.decoder_normalize_before and not getattr(args, 'no_decoder_final_norm', False):
            self.layer_norm = LayerNorm(embed_dim)
        else:
            self.layer_norm = None
        if getattr(args, 'layernorm_embedding', False):
            self.layernorm_embedding = LayerNorm(embed_dim)
        else:
            self.layernorm_embedding = None
Example #25
0
 def __init__(self, num_layers, input_size):
     super(test_net, self).__init__()
     self.num_layers= num_layers
     self.linear_1 = nn.Linear(input_size, 5)
     self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
     self.output = nn.Linear(5,2)
Example #26
0
    def __init__(self, layer_config, num_classes=1):
        super(CSNet, self).__init__()

        self.stages = layer_config[-1]
        self.layer_config = layer_config
        fuse_in = np.zeros(3)
        index = 0
        print(self.layer_config)
        self.stage0 = nn.ModuleList()
        self.stage0.append(
            ILBlock(np.array([3]),
                    self.layer_config[index][1],
                    nextoutlist=self.layer_config[index + 1][1],
                    stride=1,
                    first=True))

        index = index + 1
        self.stage1 = nn.ModuleList()
        self.stage1.append(
            ILBlock(self.layer_config[index][0],
                    self.layer_config[index][1],
                    nextoutlist=self.layer_config[index + 1][1]))
        index = index + 1
        for i in range(1, self.stages[0]):
            if i == self.stages[0] - 1:
                nextstride = 2
            else:
                nextstride = 1
            self.stage1.append(
                ILBlock(self.layer_config[index][0],
                        self.layer_config[index][1],
                        nextoutlist=self.layer_config[index + 1][1],
                        nextstride=nextstride))
            index = index + 1

        self.stage2 = nn.ModuleList()
        self.stage2.append(
            ILBlock(self.layer_config[index][0],
                    self.layer_config[index][1],
                    nextoutlist=self.layer_config[index + 1][1],
                    stride=2))
        index = index + 1
        for i in range(1, self.stages[1]):
            if i == self.stages[1] - 1:
                nextstride = 2
            else:
                nextstride = 1
            self.stage2.append(
                ILBlock(self.layer_config[index][0],
                        self.layer_config[index][1],
                        nextoutlist=self.layer_config[index + 1][1],
                        nextstride=nextstride))
            index = index + 1
        fuse_in[0] = int(round(sum(self.layer_config[index - 1][1])))

        self.stage3 = nn.ModuleList()
        self.stage3.append(
            ILBlock(self.layer_config[index][0],
                    self.layer_config[index][1],
                    nextoutlist=self.layer_config[index + 1][1],
                    stride=2))
        index = index + 1
        for i in range(1, self.stages[2]):
            if i == self.stages[2] - 1:
                nextstride = 2
            else:
                nextstride = 1
            self.stage3.append(
                ILBlock(self.layer_config[index][0],
                        self.layer_config[index][1],
                        nextoutlist=self.layer_config[index + 1][1],
                        nextstride=nextstride))
            index = index + 1
        fuse_in[1] = int(round(sum(self.layer_config[index - 1][1])))

        self.stage4 = nn.ModuleList()
        self.stage4.append(
            ILBlock(self.layer_config[index][0],
                    self.layer_config[index][1],
                    nextoutlist=self.layer_config[index + 1][1],
                    stride=2))
        index = index + 1
        for i in range(1, self.stages[3]):
            if i == self.stages[3] - 1:
                nextstride = 0
            else:
                nextstride = 1
            self.stage4.append(
                ILBlock(self.layer_config[index][0],
                        self.layer_config[index][1],
                        nextoutlist=None))
            index = index + 1
        fuse_in[2] = int(round(sum(self.layer_config[index - 1][1])))

        self.oct_fuse = CSFHead(self.layer_config[index:index + 3])
        fuse_out_channel = int(round(sum(self.layer_config[-2][1])))
        self.cls_layer = nn.Conv2d(fuse_out_channel,
                                   num_classes,
                                   kernel_size=1)

        self.all_flops = 0
        self.batchsize = 0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 start_level=0,
                 end_level=-1,
                 add_extra_convs=False,
                 extra_convs_on_inputs=True,
                 relu_before_extra_convs=False,
                 no_norm_on_lateral=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 upsample_cfg=dict(mode='nearest')):
        super(SiameseFPN, self).__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.relu_before_extra_convs = relu_before_extra_convs
        self.no_norm_on_lateral = no_norm_on_lateral
        self.fp16_enabled = False
        self.upsample_cfg = upsample_cfg.copy()

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level
        self.add_extra_convs = add_extra_convs
        assert isinstance(add_extra_convs, (str, bool))
        if isinstance(add_extra_convs, str):
            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
        elif add_extra_convs:  # True
            if extra_convs_on_inputs:
                # For compatibility with previous release
                # TODO: deprecate `extra_convs_on_inputs`
                self.add_extra_convs = 'on_input'
            else:
                self.add_extra_convs = 'on_output'

        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()

        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(
                in_channels[i],
                out_channels,
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            fpn_conv = ConvModule(
                out_channels,
                out_channels,
                3,
                padding=1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                act_cfg=act_cfg,
                inplace=False)

            self.lateral_convs.append(l_conv)
            self.fpn_convs.append(fpn_conv)

        # add extra conv layers (e.g., RetinaNet)
        extra_levels = num_outs - self.backbone_end_level + self.start_level
        if self.add_extra_convs and extra_levels >= 1:
            for i in range(extra_levels):
                if i == 0 and self.add_extra_convs == 'on_input':
                    in_channels = self.in_channels[self.backbone_end_level - 1]
                else:
                    in_channels = out_channels
                extra_fpn_conv = ConvModule(
                    in_channels,
                    out_channels,
                    3,
                    stride=2,
                    padding=1,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=act_cfg,
                    inplace=False)
                self.fpn_convs.append(extra_fpn_conv)
        self.sigmoid = nn.Sigmoid()
Example #28
0
    def __init__(
            self,
            *,
            mlp: List[int],
            npoint: int = None,
            split: int = 18,
            radius: float = None,
            nsample: int = None,
            bn: bool = True,
            use_xyz: bool = True,
            pooling: str = 'max',
            sigma: float = None,  # for RBF pooling
            normalize_xyz: bool = False,  # noramlize local XYZ with radius
            sample_uniformly: bool = False,
            ret_unique_cnt: bool = False,
            same_idx: bool = False,
            use_feature: bool = True):
        super().__init__()

        self.npoint = npoint
        self.radius = radius
        self.split = split
        self.nsample = nsample
        self.pooling = pooling
        self.mlp_module = None
        self.use_xyz = use_xyz
        self.sigma = sigma
        if self.sigma is None:
            self.sigma = self.radius / 2
        self.normalize_xyz = normalize_xyz
        self.ret_unique_cnt = ret_unique_cnt
        self.same_idx = same_idx

        if npoint is not None:
            '''
            self.grouper = pointnet2_utils.PairwiseGroup(radius, nsample,
                use_xyz=use_xyz, ret_grouped_xyz=True, normalize_xyz=normalize_xyz,
                sample_uniformly=sample_uniformly, ret_unique_cnt=ret_unique_cnt, use_feature=use_feature)
            '''
            self.grouper = pointnet2_utils.QueryAndGroup(
                radius,
                nsample,
                use_xyz=use_xyz,
                ret_grouped_xyz=True,
                normalize_xyz=normalize_xyz,
                sample_uniformly=sample_uniformly,
                ret_unique_cnt=ret_unique_cnt,
                use_feature=use_feature,
                ret_idx=True)
        else:
            self.grouper = pointnet2_utils.GroupAll(use_xyz,
                                                    ret_grouped_xyz=True)

        mlp_spec = mlp
        if use_feature and len(mlp_spec) > 0:
            mlp_spec[0] += mlp_spec[0]
        if use_xyz and len(mlp_spec) > 0:
            mlp_spec[0] += 3
        self.mlp_module = nn.ModuleList()
        for i in range(split):
            self.mlp_module.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
Example #29
0
    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=True,
                 normalize_input=True,
                 requires_grad=False,
                 use_fid_inception=True):
        """Build pretrained InceptionV3

        Parameters
        ----------
        output_blocks : list of int
            Indices of blocks to return features of. Possible values are:
                - 0: corresponds to output of first max pooling
                - 1: corresponds to output of second max pooling
                - 2: corresponds to output which is fed to aux classifier
                - 3: corresponds to output of final average pooling
        resize_input : bool
            If true, bilinearly resizes input to width and height 299 before
            feeding input to model. As the network without fully connected
            layers is fully convolutional, it should be able to handle inputs
            of arbitrary size, so resizing might not be strictly needed
        normalize_input : bool
            If true, scales the input from range (0, 1) to the range the
            pretrained Inception network expects, namely (-1, 1)
        requires_grad : bool
            If true, parameters of the model require gradients. Possibly useful
            for finetuning the network
        use_fid_inception : bool
            If true, uses the pretrained Inception model used in Tensorflow's
            FID implementation. If false, uses the pretrained Inception model
            available in torchvision. The FID Inception model has different
            weights and a slightly different structure from torchvision's
            Inception model. If you want to compute FID scores, you are
            strongly advised to set this parameter to true to get comparable
            results.
        """
        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = nn.ModuleList()

        if use_fid_inception:
            inception = fid_inception_v3()
        else:
            inception = models.inception_v3(pretrained=True)

        # Block 0: input to maxpool1
        block0 = [
            inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2)
        ]
        self.blocks.append(nn.Sequential(*block0))

        # Block 1: maxpool1 to maxpool2
        if self.last_needed_block >= 1:
            block1 = [
                inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2)
            ]
            self.blocks.append(nn.Sequential(*block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [
                inception.Mixed_5b,
                inception.Mixed_5c,
                inception.Mixed_5d,
                inception.Mixed_6a,
                inception.Mixed_6b,
                inception.Mixed_6c,
                inception.Mixed_6d,
                inception.Mixed_6e,
            ]
            self.blocks.append(nn.Sequential(*block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c,
                nn.AdaptiveAvgPool2d(output_size=(1, 1))
            ]
            self.blocks.append(nn.Sequential(*block3))

        for param in self.parameters():
            param.requires_grad = requires_grad
    def __init__(self, length, in_channels, out_channels, residual_channels, block_channels, kernel_size, num_blocks,
                 feedforward_channels):
        """
        Arguments:
            length: int. The length of input sequences.
            in_channels: int. The number of input channels to this network.
            out_channels: int. The number of output channels to make a linear transformation, right at the end.
            residual_channels: int. The number of channels to make a Convolutional transformation to, at the start.
            block_channels: The number of channels in each residual block.
            kernel_size: The size of the kernel in each convolutional layer.
            num_blocks: How many residual blocks. Each block has two affine transformations of width block_channels.
            feedforward_channels: Size of hidden layer in final feedforward network.

        Thus the architecture is:

        [Convolutional transform in_channels -> residual_channels]
                                |
                                |
                                +---------------------\
                                |                     |
                                |                [Batch norm]
                                |                     |
                                |                  [ReLU]
                                |                     |
                                |     [Convolutional transform residual_channels -> block_channels]
                                |                     |
                                |                [Batch norm]
                                |                     |
                                |                  [ReLU]
                                |                     |
                                |     [Convolutional transform block_channels -> residual_channels]
                                |                     |
                           [Addition]-----------------/
                                |
                                |

                                .
                                .  repeat for num_blocks blocks
                                .

                                |
        [Linear transform residual_channels -> out_channels]
        """

        super(CNNResNet, self).__init__()

        self.length = length
        self.in_channels = in_channels
        self.residual_channels = residual_channels
        self.block_channels = block_channels
        self.num_blocks = num_blocks
        self.feedforward_channels = feedforward_channels
        self.out_channels = out_channels

        self.first_padding = nn.ConstantPad1d((kernel_size - 1, 0), 0)
        self.first_conv = nn.Conv1d(in_channels=in_channels,
                                    out_channels=residual_channels,
                                    kernel_size=kernel_size)

        self.blocks = nn.ModuleList()
        for _ in range(num_blocks):
            block = nn.Sequential(nn.BatchNorm1d(residual_channels),
                                  nn.ReLU(),
                                  nn.ConstantPad1d((kernel_size - 1, 0), 0),
                                  nn.Conv1d(in_channels=residual_channels,
                                            out_channels=block_channels,
                                            kernel_size=kernel_size),
                                  nn.BatchNorm1d(block_channels),
                                  nn.ReLU(),
                                  nn.ConstantPad1d((kernel_size - 1, 0), 0),
                                  nn.Conv1d(in_channels=block_channels,
                                            out_channels=residual_channels,
                                            kernel_size=kernel_size))
            self.blocks.append(block)

        self.final_affine_one = nn.Linear(length * residual_channels, feedforward_channels)
        self.final_affine_two = nn.Linear(feedforward_channels, out_channels)