def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams["channels"])] module_list = nn.ModuleList() for i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def["type"] == "convolutional": bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0 modules.add_module( "conv_%d" % i, nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters)) if module_def["activation"] == "leaky": modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1)) elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: padding = nn.ZeroPad2d((0, 1, 0, 1)) modules.add_module("_debug_padding_%d" % i, padding) maxpool = nn.MaxPool2d( kernel_size=int(module_def["size"]), stride=int(module_def["stride"]), padding=int((kernel_size - 1) // 2), ) modules.add_module("maxpool_%d" % i, maxpool) elif module_def["type"] == "upsample": # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated upsample = Upsample(scale_factor=int(module_def['stride'])) modules.add_module('upsample_%d' % i, upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] #filters = sum([output_filters[layer_i] for layer_i in layers]) filters = 0 for layer_i in layers: if (layer_i > 0): filters += output_filters[layer_i + 1] else: filters += output_filters[layer_i] modules.add_module("route_%d" % i, EmptyLayer()) elif module_def["type"] == "shortcut": filters = output_filters[int(module_def["from"])] modules.add_module("shortcut_%d" % i, EmptyLayer()) elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_height = int(hyperparams["height"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_height) modules.add_module("yolo_%d" % i, yolo_layer) elif module_def["type"] == "feed_conv2d": filters = int(module_def["anchors_num"]) * 5 if "out_channel" in module_def: filters = int(module_def["out_channel"]) modules.add_module( "feed_conv_%d" % i, FeedConv2d(in_channels=output_filters[-1], out_channel_unit=filters, kernel_size=int(module_def["size"]), stride=int(module_def["stride"])), ) elif module_def["type"] == "fyolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_height = int(hyperparams["height"]) # Define detection layer yolo_layer = FYOLOLayer(anchors, num_classes, img_height) modules.add_module("yolo_%d" % i, yolo_layer) elif module_def["type"] == "myolo": ratios = [float(x) for x in module_def["ratios"].split(",")] scales = [float(x) for x in module_def["scales"].split(",")] #ratios=[0.33, 1, 3] #scales=[1] num_anchors_should = output_filters[-1] / 5 num_anchors = len(ratios) * len(scales) assert num_anchors_should == num_anchors anchor_generator = Anchor(ratios, scales) num_classes = int(module_def["classes"]) img_height = int(hyperparams["height"]) # Define detection layer yolo_layer = MYOLOLayer(anchor_generator, num_anchors, num_classes, img_height) modules.add_module("yolo_%d" % i, yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def clones(module, N): "Produce N identical layers from a given module." return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
def __init__(self, dictionary, encoders): super().__init__(dictionary) self.encoders = nn.ModuleList(encoders)
def __init__( self, num_classes, width=1.0, strides=[8, 16, 32], in_channels=[256, 512, 1024], act="silu", depthwise=False, ): """ Args: act (str): activation type of conv. Defalut value: "silu". depthwise (bool): wheather apply depthwise conv in conv branch. Defalut value: False. """ super().__init__() self.n_anchors = 1 self.num_classes = num_classes self.decode_in_inference = True # for deploy, set to False self.cls_convs = nn.ModuleList() self.reg_convs = nn.ModuleList() self.cls_preds = nn.ModuleList() self.reg_preds = nn.ModuleList() self.obj_preds = nn.ModuleList() self.stems = nn.ModuleList() Conv = DWConv if depthwise else BaseConv for i in range(len(in_channels)): self.stems.append( BaseConv( in_channels=int(in_channels[i] * width), out_channels=int(256 * width), ksize=1, stride=1, act=act, ) ) self.cls_convs.append( nn.Sequential( *[ Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), ] ) ) self.reg_convs.append( nn.Sequential( *[ Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), Conv( in_channels=int(256 * width), out_channels=int(256 * width), ksize=3, stride=1, act=act, ), ] ) ) self.cls_preds.append( nn.Conv2d( in_channels=int(256 * width), out_channels=self.n_anchors * self.num_classes, kernel_size=1, stride=1, padding=0, ) ) self.reg_preds.append( nn.Conv2d( in_channels=int(256 * width), out_channels=4, kernel_size=1, stride=1, padding=0, ) ) self.obj_preds.append( nn.Conv2d( in_channels=int(256 * width), out_channels=self.n_anchors * 1, kernel_size=1, stride=1, padding=0, ) ) self.use_l1 = False self.l1_loss = nn.L1Loss(reduction="none") self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none") self.iou_loss = IOUloss(reduction="none") self.strides = strides self.grids = [torch.zeros(1)] * len(in_channels) self.expanded_strides = [None] * len(in_channels)
def __init__( self, stages, in_channels, last_channels, out_channels, conv_layer=NormConv2d, subpixel_upsampling=False, n_latent_stages=2, ): super().__init__() self.n_rnb = 2 self.n_stages = stages self.n_latent_stages = n_latent_stages self.nin = conv_layer(in_channels, in_channels, kernel_size=1) self.blocks = nn.ModuleList() self.ups = nn.ModuleList() # autoregressive stuff self.latent_nins = nn.ModuleDict() self.auto_lp = nn.ModuleDict() self.auto_blocks = nn.ModuleDict() # last conv self.out_conv = conv_layer(last_channels, out_channels, kernel_size=3, padding=1) # for reordering self.depth_to_space = DepthToSpace(block_size=2) self.space_to_depth = SpaceToDepth(block_size=2) n_latent_channels_in = in_channels in_channels = in_channels for i in range(self.n_stages): for n in range(self.n_rnb // 2): self.blocks.append( VunetRNB( channels=in_channels, a_channels=in_channels, residual=True, conv_layer=conv_layer, )) if i < self.n_latent_stages: scale = f"l_{i}" self.latent_nins.update({ scale: conv_layer( n_latent_channels_in * 2, n_latent_channels_in, kernel_size=1, ) }) # autoregressive_stuff clp = ModuleList() cb = ModuleList() for l in range(4): clp.append( conv_layer( 4 * n_latent_channels_in, n_latent_channels_in, kernel_size=3, padding=1, )) if l == 0: cb.append(VunetRNB(channels=n_latent_channels_in)) else: cb.append( VunetRNB( channels=4 * n_latent_channels_in, a_channels=n_latent_channels_in, residual=True, )) self.auto_lp.update({scale: clp}) self.auto_blocks.update({scale: cb}) for n in range(self.n_rnb // 2): self.blocks.append( VunetRNB( channels=in_channels, a_channels=in_channels, residual=True, conv_layer=conv_layer, )) if i + 1 < self.n_stages: out_c = min(in_channels, last_channels * 2**(stages - (i + 2))) self.ups.append( Upsample( in_channels, out_c, subpixel=subpixel_upsampling if i < self.n_latent_stages else False, )) in_channels = out_c
def copy_layers(src_layers: nn.ModuleList, dest_layers: nn.ModuleList, layers_to_copy: List[int]) -> None: layers_to_copy = nn.ModuleList([src_layers[i] for i in layers_to_copy]) assert len(dest_layers) == len( layers_to_copy), f"{len(dest_layers)} != {len(layers_to_copy)}" dest_layers.load_state_dict(layers_to_copy.state_dict())
def __init__( self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256, make_tl_layer=None, make_br_layer=None, make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer, make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer, make_up_layer=make_layer, make_low_layer=make_layer, make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, kp_layer=residual ): super(exkp, self).__init__() self.nstack = nstack self.heads = heads curr_dim = dims[0] self.pre = nn.Sequential( convolution(7, 3, 128, stride=2), residual(3, 128, 256, stride=2) ) if pre is None else pre self.kps = nn.ModuleList([ kp_module( n, dims, modules, layer=kp_layer, make_up_layer=make_up_layer, make_low_layer=make_low_layer, make_hg_layer=make_hg_layer, make_hg_layer_revr=make_hg_layer_revr, make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, make_merge_layer=make_merge_layer ) for _ in range(nstack) ]) self.cnvs = nn.ModuleList([ make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack) ]) self.inters = nn.ModuleList([ make_inter_layer(curr_dim) for _ in range(nstack - 1) ]) self.inters_ = nn.ModuleList([ nn.Sequential( nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False), nn.BatchNorm2d(curr_dim) ) for _ in range(nstack - 1) ]) self.cnvs_ = nn.ModuleList([ nn.Sequential( nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False), nn.BatchNorm2d(curr_dim) ) for _ in range(nstack - 1) ]) ## keypoint heatmaps for head in heads.keys(): if 'hm' in head: module = nn.ModuleList([ make_heat_layer( cnv_dim, curr_dim, heads[head]) for _ in range(nstack) ]) self.__setattr__(head, module) for heat in self.__getattr__(head): heat[-1].bias.data.fill_(-2.19) else: module = nn.ModuleList([ make_regr_layer( cnv_dim, curr_dim, heads[head]) for _ in range(nstack) ]) self.__setattr__(head, module) self.relu = nn.ReLU(inplace=True)
def __init__(self, input_dim, hidden_dim, output_dim, num_layers): super().__init__() self.num_layers = num_layers h = [hidden_dim] * (num_layers - 1) self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
def clones(module, n_layers): """ Produce n layers for module """ return nn.ModuleList([copy.deepcopy(module) for _ in range(n_layers)])
def __init__(self, ntokens, input_dims, hidden_size, num_heads, attn_dropout, relu_dropout, res_dropout, layers, horizons, attn_mask=False, src_mask=False, tgt_mask=False, crossmodal=False): """ Construct a basic Transfomer model for multimodal tasks. :param ntokens: The number of unique tokens in text modality. :param input_dims: The input dimensions of the various (in this case, 3) modalities. :param num_heads: The number of heads to use in the multi-headed attention. :param attn_dropout: The dropout following self-attention sm((QK)^T/d)V. :param relu_droput: The dropout for ReLU in residual block. :param res_dropout: The dropout of each residual block. :param layers: The number of transformer blocks. :param attn_mask: A boolean indicating whether to use attention mask (for transformer decoder). :param crossmodal: Use Crossmodal Transformer or Not l = a, a = b """ super(TransformerGenerationModel, self).__init__() [self.orig_d_l, self.orig_d_a] = input_dims assert self.orig_d_l == self.orig_d_a self.d_l, self.d_a = self.orig_d_l, self.orig_d_a # [self.d_l, self.d_a] = proj_dims self.ntokens = ntokens # final_out = self.d_l + self.d_a # final_out = (self.d_l + self.d_a) * time_step # final_out = (self.d_l + self.d_a) * horizons final_out = self.d_l h_out = hidden_size # output_dim = 1 self.num_heads = num_heads self.layers = layers self.horizons = horizons self.attn_dropout = attn_dropout self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.attn_mask = attn_mask # for encoder # self.src_mask = src_mask # for decoder # self.tgt_mask = tgt_mask # for decoder self.crossmodal = crossmodal # Transformer networks self.trans_encoder = nn.ModuleList( [self.get_encoder_network() for i in range(self.horizons)]) self.trans_decoder = nn.ModuleList( [self.get_decoder_network() for i in range(self.horizons)]) print("Encoder Model size: {0}".format( count_parameters(self.trans_encoder))) print("Decoder Model size: {0}".format( count_parameters(self.trans_decoder))) # Projection layers self.proj_l = nn.ModuleList( [nn.Linear(self.orig_d_l, self.d_l) for i in range(self.horizons)]) self.proj_a = nn.ModuleList( [nn.Linear(self.orig_d_a, self.d_a) for i in range(self.horizons)]) # self.proj = nn.Linear(final_out, final_out) # Not in the diagram self.out_fc1_A = nn.Linear(final_out, h_out) self.out_fc1_B = nn.Linear(final_out, h_out) self.out_fc2_A = nn.Linear(h_out, final_out) self.out_fc2_B = nn.Linear(h_out, final_out) self.out_dropout = nn.Dropout(0.5)
def __init__(self, ntokens, time_step, input_dims, hidden_size, embed_dim, output_dim, num_heads, attn_dropout, relu_dropout, res_dropout, layers, horizons, attn_mask=False, crossmodal=False): """ Construct a basic Transfomer model for multimodal tasks. :param ntokens: The number of unique tokens in text modality. :param input_dims: The input dimensions of the various (in this case, 3) modalities. :param num_heads: The number of heads to use in the multi-headed attention. :param attn_dropout: The dropout following self-attention sm((QK)^T/d)V. :param relu_droput: The dropout for ReLU in residual block. :param res_dropout: The dropout of each residual block. :param layers: The number of transformer blocks. :param attn_mask: A boolean indicating whether to use attention mask (for transformer decoder). :param crossmodal: Use Crossmodal Transformer or Not """ super(TransformerModel, self).__init__() self.cnn = nn.Sequential( Conv1d(in_channels=2, out_channels=16, kernel_size=6, stride=2), nn.BatchNorm1d(16), nn.ReLU(), nn.MaxPool1d(2, stride=2), Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=2), nn.BatchNorm1d(32), nn.ReLU(), nn.MaxPool1d(2, stride=2), Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1), nn.BatchNorm1d(64), nn.ReLU(), nn.MaxPool1d(2, stride=2), Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1), nn.BatchNorm1d(64), nn.ReLU(), nn.MaxPool1d(2, stride=2), Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1), nn.ReLU(), Conv1d(in_channels=128, out_channels=128, kernel_size=3, stride=1), nn.BatchNorm1d(128), nn.ReLU(), nn.MaxPool1d(2, stride=2), Flatten(), # nn.Linear(256*32, 2048), # nn.ReLU(), # nn.Linear(2048, output_size), # nn.Sigmoid() ) [self.orig_d_l, self.orig_d_a] = input_dims assert self.orig_d_l == self.orig_d_a channels = ((((((( ((((self.orig_d_l - 6) // 2 + 1 - 2) // 2 + 1 - 3) // 2 + 1 - 2) // 2 + 1 - 3) // 1 + 1 - 2) // 2 + 1 - 3) // 1 + 1 - 2) // 2 + 1 - 3) // 1 + 1 - 3) // 1 + 1 - 2) // 2 + 1 self.d_l, self.d_a = 128 * channels // 2, 128 * channels // 2 self.ntokens = ntokens #final_out = (self.orig_d_l + self.orig_d_a) * horizons final_out = embed_dim * 2 h_out = hidden_size self.num_heads = num_heads self.layers = layers self.horizons = horizons self.attn_dropout = attn_dropout self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.attn_mask = attn_mask self.embed_dim = embed_dim self.crossmodal = crossmodal # Transformer networks self.trans = nn.ModuleList( [self.get_network() for i in range(self.horizons)]) print("Encoder Model size: {0}".format(count_parameters(self.trans))) # Projection layers self.proj_l = nn.ModuleList([ nn.Linear(self.d_l, self.embed_dim) for i in range(self.horizons) ]) self.proj_a = nn.ModuleList([ nn.Linear(self.d_a, self.embed_dim) for i in range(self.horizons) ]) # self.proj = nn.Linear(final_out, final_out) # Not in the diagram self.out_fc1 = nn.Linear(final_out, h_out) self.out_fc2 = nn.Linear(h_out, output_dim) self.out_dropout = nn.Dropout(0.5)
def __init__(self, blocks): super(ProgressiveGenerator, self).__init__() self.blocks = nn.ModuleList(blocks) self.cur_block = 0 self.alpha = 1.
def __init__(self, blocks): super(ProgressiveDiscriminator, self).__init__() self.blocks = nn.ModuleList(blocks) self.cur_block = len(self.blocks) - 1 self.alpha = 1.
def __init__(self, n_layers=12, channels_interval=24, kernel_size_in_encoder=15, kernel_size_in_decoder=5, dilation_in_encoder=None, dilation_in_decoder=None): super(UNet, self).__init__() #TODO 为什么调换 kernel_size_in_encoder 与 kernel_size_in_decoder 会使参数量激增 400 W if dilation_in_encoder: print(f"当前模型将在 **降采样层** 中使用膨胀卷积:{dilation_in_encoder}") if dilation_in_decoder: print(f"当前模型将在 **升采样层** 中使用膨胀卷积:{dilation_in_decoder}") self.n_layers = n_layers self.channels_interval = channels_interval encoder_in_channels_list = [1] + [i * self.channels_interval for i in range(1, self.n_layers)] encoder_out_channels_list = [i * self.channels_interval for i in range(1, self.n_layers + 1)] # 1 => 2 => 3 => 4 => 5 => 6 => 7 => 8 => 9 => 10 => 11 =>12 # 16384 => 8192 => 4096 => 2048 => 1024 => 512 => 256 => 128 => 64 => 32 => 16 => 8 => 4 self.encoder = nn.ModuleList() for i in range(self.n_layers): dilated_rate = None if (i + 1) in dilation_in_encoder["layers"]: index_in_dilated_rates = dilation_in_encoder["layers"].index(i + 1) dilated_rate = dilation_in_encoder["dilated_rates"][index_in_dilated_rates] self.encoder.append( DownSamplingLayer( channel_in=encoder_in_channels_list[i], channel_out=encoder_out_channels_list[i], kernel_size=kernel_size_in_encoder, dilation=dilated_rate if dilated_rate else 1, padding=calculate_same_padding( l_in=encoder_in_channels_list[i], kernel_size=kernel_size_in_encoder, stride=1, dilation=dilated_rate if dilated_rate else 1 ), ) ) self.middle = nn.Sequential( nn.Conv1d(self.n_layers * self.channels_interval, self.n_layers * self.channels_interval, 15, stride=1, padding=7), nn.BatchNorm1d(self.n_layers * self.channels_interval), nn.LeakyReLU(negative_slope=0.1, inplace=True) ) decoder_in_channels_list = [(2 * i + 1) * self.channels_interval for i in range(1, self.n_layers)] + [ 2 * self.n_layers * self.channels_interval] decoder_in_channels_list = decoder_in_channels_list[::-1] decoder_out_channels_list = encoder_out_channels_list[::-1] self.decoder = nn.ModuleList() for i in range(self.n_layers): dilated_rate = None if (i + 1) in dilation_in_decoder["layers"]: index_in_dilated_rates = dilation_in_decoder["layers"].index(i + 1) dilated_rate = dilation_in_decoder["dilated_rates"][index_in_dilated_rates] self.decoder.append( UpSamplingLayer( channel_in=decoder_in_channels_list[i], channel_out=decoder_out_channels_list[i], kernel_size=kernel_size_in_decoder, dilation=dilated_rate if dilated_rate else 1, padding=calculate_same_padding( l_in=encoder_in_channels_list[i], kernel_size=kernel_size_in_decoder, stride=1, dilation=dilated_rate if dilated_rate else 1 ), ) ) self.out = nn.Sequential( nn.Conv1d(1 + self.channels_interval, 1, kernel_size=1, stride=1), nn.Tanh() )
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) # module_defs中第一个字典块保存了net信息,获取网络输入、预处理等超参数相关信息 output_filters = [int(hyperparams["channels"])] # 初始值对应于输入数据3通道,用来存储我们需要持续追踪被应用卷积层的卷积核数量(上一层的卷积核数量(或特征图深度)) # 我们不仅需要追踪前一层的卷积核数量,还需要追踪之前每个层。随着不断地迭代,我们将每个模块的输出卷积核数量添加到 output_filters 列表上。 module_list = nn.ModuleList() # module_list用于存储每个block,每个block对应cfg文件中一个块,类似[convolutional]里面就对应一个卷积块 for module_i, module_def in enumerate(module_defs): #enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据下标和数据,一般用在 for 循环当中 modules = nn.Sequential() # 这里每个块用nn.sequential()创建为了一个module,一个module有多个层 if module_def["type"] == "convolutional": #需要获取卷积层、批归一化层、激活层参数 bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"])#output_channel kernel_size = int(module_def["size"])#卷积核大小 pad = (kernel_size - 1) // 2 #边界填充数量 modules.add_module( f"conv_{module_i}", nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) if module_def["activation"] == "leaky": modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))# 给定参数负轴系数0.1 elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module(f"maxpool_{module_i}", maxpool) elif module_def["type"] == "upsample": upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") # 没有使用 Bilinear2dUpsampling,实际使用的为最近邻插值 modules.add_module(f"upsample_{module_i}", upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] filters = sum([output_filters[1:][i] for i in layers]) modules.add_module(f"route_{module_i}", EmptyLayer()) # elif (x["type"] == "route"): # x["layers"] = x["layers"].split(',') # # Start of a route # start = int(x["layers"][0]) # # end, if there exists one. # try: # end = int(x["layers"][1]) # except: # end = 0 # # Positive anotation: 正值 # if start > 0: # start = start - index # if end > 0: # 若end>0,由于end= end - index,再执行index + end输出的还是第end层的特征 # end = end - index # route = EmptyLayer() # module.add_module("route_{0}".format(index), route) # if end < 0: # 若end<0,则end还是end,输出index+end(而end<0)故index向后退end层的特征。 # filters = output_filters[index + start] + output_filters[index + end] # else: # 如果没有第二个参数,end=0,则对应下面的公式,此时若start>0,由于start = start - index, # #再执行index + start输出的还是第start层的特征;若start<0,则start还是start,输出index+start(而start<0)故index向后退start层的特征。 # filters = output_filters[index + start] elif module_def["type"] == "shortcut": filters = output_filters[1:][int(module_def["from"])] modules.add_module(f"shortcut_{module_i}", EmptyLayer()) # 使用空的层,因为它还要执行一个非常简单的操作(加)。没必要更新 filters 变量,因为它只是将前一层的特征图添加到后面的层上而已。 elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_size = int(hyperparams["height"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_size) # 锚点,检测,位置回归,分类,这个类见predict_transform中 modules.add_module(f"yolo_{module_i}", yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self, in_dim, out_dim, args, mean_std=None): super(Model, self).__init__() ##### required part, no need to change ##### # mean std of input and output in_m, in_s, out_m, out_s = self.prepare_mean_std(in_dim,out_dim,\ args, mean_std) self.input_mean = torch_nn.Parameter(in_m, requires_grad=False) self.input_std = torch_nn.Parameter(in_s, requires_grad=False) self.output_mean = torch_nn.Parameter(out_m, requires_grad=False) self.output_std = torch_nn.Parameter(out_s, requires_grad=False) # a flag for debugging (by default False) # self.model_debug = False # self.flag_validation = False ##### #### # on input waveform and output target #### # Load protocol and prepare the target data for network training protocol_file = prj_conf.optional_argument[0] self.protocol_parser = protocol_parse(protocol_file) # Working sampling rate # torchaudio may be used to change sampling rate self.m_target_sr = 16000 #### # optional configs (not used) #### # re-sampling (optional) #self.m_resampler = torchaudio.transforms.Resample( # prj_conf.wav_samp_rate, self.m_target_sr) # vad (optional) #self.m_vad = torchaudio.transforms.Vad(sample_rate = self.m_target_sr) # flag for balanced class (temporary use) #self.v_flag = 1 #### # front-end configuration # multiple front-end configurations may be used # by default, use a single front-end #### # frame shift (number of waveform points) self.frame_hops = [160] # frame length self.frame_lens = [320] # FFT length self.fft_n = [512] # spectrogram dim (base component) self.spec_with_delta = False self.spec_fb_dim = 60 # window type self.win = torch.hann_window # floor in log-spectrum-amplitude calculating (not used) self.amp_floor = 0.00001 # number of frames to be kept for each trial # no truncation self.v_truncate_lens = [None for x in self.frame_hops] # number of sub-models (by default, a single model) self.v_submodels = len(self.frame_lens) # dimension of embedding vectors self.v_emd_dim = 64 # output classes self.v_out_class = 1 #### # create network #### # 1st part of the classifier self.m_transform = [] # pooling layer self.m_pooling = [] # 2nd part of the classifier self.m_output_act = [] # front-end self.m_frontend = [] # final part for output layer self.m_angle = [] # it can handle models with multiple front-end configuration # by default, only a single front-end for idx, (trunc_len, fft_n) in enumerate(zip( self.v_truncate_lens, self.fft_n)): fft_n_bins = fft_n // 2 + 1 self.m_transform.append( torch_nn.Sequential( TrainableLinearFb(fft_n,self.m_target_sr,self.spec_fb_dim), torch_nn.Conv2d(1, 64, [5, 5], 1, padding=[2, 2]), nii_nn.MaxFeatureMap2D(), torch.nn.MaxPool2d([2, 2], [2, 2]), torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(32, affine=False), torch_nn.Conv2d(32, 96, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch.nn.MaxPool2d([2, 2], [2, 2]), torch_nn.BatchNorm2d(48, affine=False), torch_nn.Conv2d(48, 96, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(48, affine=False), torch_nn.Conv2d(48, 128, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch.nn.MaxPool2d([2, 2], [2, 2]), torch_nn.Conv2d(64, 128, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(64, affine=False), torch_nn.Conv2d(64, 64, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(32, affine=False), torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(32, affine=False), torch_nn.Conv2d(32, 64, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch_nn.MaxPool2d([2, 2], [2, 2]), torch_nn.Dropout(0.7) ) ) self.m_pooling.append( nii_nn.SelfWeightedPooling((self.spec_fb_dim // 16) * 32) ) self.m_output_act.append( torch_nn.Linear((self.spec_fb_dim//16) * 32 * 2, self.v_emd_dim) ) self.m_angle.append( nii_ocsoftmax.OCAngleLayer(self.v_emd_dim) ) self.m_frontend.append( nii_front_end.Spectrogram(self.frame_lens[idx], self.frame_hops[idx], self.fft_n[idx], self.m_target_sr) ) self.m_frontend = torch_nn.ModuleList(self.m_frontend) self.m_transform = torch_nn.ModuleList(self.m_transform) self.m_output_act = torch_nn.ModuleList(self.m_output_act) self.m_pooling = torch_nn.ModuleList(self.m_pooling) self.m_angle = torch_nn.ModuleList(self.m_angle) # done return
def __init__(self, block_name, depth, num_classes): super(SearchShapeCifarResNet, self).__init__() # Model type specifies number of layers for CIFAR-10 and CIFAR-100 model if block_name == "ResNetBasicblock": block = ResNetBasicblock assert (depth - 2) % 6 == 0, "depth should be one of 20, 32, 44, 56, 110" layer_blocks = (depth - 2) // 6 elif block_name == "ResNetBottleneck": block = ResNetBottleneck assert (depth - 2) % 9 == 0, "depth should be one of 164" layer_blocks = (depth - 2) // 9 else: raise ValueError("invalid block : {:}".format(block_name)) self.message = ( "SearchShapeCifarResNet : Depth : {:} , Layers for each block : {:}".format( depth, layer_blocks ) ) self.num_classes = num_classes self.channels = [16] self.layers = nn.ModuleList( [ ConvBNReLU( 3, 16, 3, 1, 1, False, has_avg=False, has_bn=True, has_relu=True ) ] ) self.InShape = None self.depth_info = OrderedDict() self.depth_at_i = OrderedDict() for stage in range(3): cur_block_choices = get_depth_choices(layer_blocks, False) assert ( cur_block_choices[-1] == layer_blocks ), "stage={:}, {:} vs {:}".format(stage, cur_block_choices, layer_blocks) self.message += ( "\nstage={:} ::: depth-block-choices={:} for {:} blocks.".format( stage, cur_block_choices, layer_blocks ) ) block_choices, xstart = [], len(self.layers) for iL in range(layer_blocks): iC = self.channels[-1] planes = 16 * (2 ** stage) stride = 2 if stage > 0 and iL == 0 else 1 module = block(iC, planes, stride) self.channels.append(module.out_dim) self.layers.append(module) self.message += "\nstage={:}, ilayer={:02d}/{:02d}, block={:03d}, iC={:3d}, oC={:3d}, stride={:}".format( stage, iL, layer_blocks, len(self.layers) - 1, iC, module.out_dim, stride, ) # added for depth layer_index = len(self.layers) - 1 if iL + 1 in cur_block_choices: block_choices.append(layer_index) if iL + 1 == layer_blocks: self.depth_info[layer_index] = { "choices": block_choices, "stage": stage, "xstart": xstart, } self.depth_info_list = [] for xend, info in self.depth_info.items(): self.depth_info_list.append((xend, info)) xstart, xstage = info["xstart"], info["stage"] for ilayer in range(xstart, xend + 1): idx = bisect_right(info["choices"], ilayer - 1) self.depth_at_i[ilayer] = (xstage, idx) self.avgpool = nn.AvgPool2d(8) self.classifier = nn.Linear(module.out_dim, num_classes) self.InShape = None self.tau = -1 self.search_mode = "basic" # assert sum(x.num_conv for x in self.layers) + 1 == depth, 'invalid depth check {:} vs {:}'.format(sum(x.num_conv for x in self.layers)+1, depth) # parameters for width self.Ranges = [] self.layer2indexRange = [] for i, layer in enumerate(self.layers): start_index = len(self.Ranges) self.Ranges += layer.get_range() self.layer2indexRange.append((start_index, len(self.Ranges))) assert len(self.Ranges) + 1 == depth, "invalid depth check {:} vs {:}".format( len(self.Ranges) + 1, depth ) self.register_parameter( "width_attentions", nn.Parameter(torch.Tensor(len(self.Ranges), get_width_choices(None))), ) self.register_parameter( "depth_attentions", nn.Parameter(torch.Tensor(3, get_depth_choices(layer_blocks, True))), ) nn.init.normal_(self.width_attentions, 0, 0.01) nn.init.normal_(self.depth_attentions, 0, 0.01) self.apply(initialize_resnet)
def __init__(self, layers_size): super(Net, self).__init__() self.linear_layer_list = nn.ModuleList([ nn.Linear(layers_size[i], layers_size[i + 1]) for i in range(len(layers_size) - 1) ])
def _prepare_module(self): d = OrderedDict() #conv1 - batch_norm1 - leaky_relu1 - pool1 d['conv1'] = ConvBnAct(3, 32, 3, stride=1, padding=1) d['pool1'] = max_pool(2, 2) #conv2 - batch_norm2 - leaky_relu2 - pool2 d['conv2'] = ConvBnAct(32, 64, 3, stride=1, padding=1) d['pool2'] = max_pool(2, 2) #conv3 - batch_norm3 - leaky_relu3 d['conv3'] = ConvBnAct(64, 128, 3, stride=1, padding=1) #conv4 - batch_norm4 - leaky_relu4 d['conv4'] = ConvBnAct(128, 64, 1, stride=1, padding=0) #conv5 - batch_norm5 - leaky_relu5 - pool5 d['conv5'] = ConvBnAct(64, 128, 3, stride=1, padding=1) d['pool5'] = max_pool(2, 2) #conv6 - batch_norm6 - leaky_relu6 d['conv6'] = ConvBnAct(128, 256, 3, stride=1, padding=1) #conv7 - batch_norm7 - leaky_relu7 d['conv7'] = ConvBnAct(256, 128, 1, stride=1, padding=0) #conv8 - batch_norm8 - leaky_relu8 - pool8 d['conv8'] = ConvBnAct(128, 256, 3, stride=1, padding=1) d['pool8'] = max_pool(2, 2) #conv9 - batch_norm9 - leaky_relu9 d['conv9'] = ConvBnAct(256, 512, 3, stride=1, padding=1) #conv10 - batch_norm10 - leaky_relu10 d['conv10'] = ConvBnAct(512, 256, 1, stride=1, padding=0) #conv11 - batch_norm11 - leaky_relu11 d['conv11'] = ConvBnAct(256, 512, 3, stride=1, padding=1) #conv12 - batch_norm12 - leaky_relu12 d['conv12'] = ConvBnAct(512, 256, 1, stride=1, padding=0) #conv13 - batch_norm13 - leaky_relu13 - pool13 d['conv13'] = ConvBnAct(256, 512, 3, stride=1, padding=1) d['pool13'] = max_pool(2, 2) #conv14 - batch_norm14 - leaky_relu14 d['conv14'] = ConvBnAct(512, 1024, 3, stride=1, padding=1) #conv15 - batch_norm15 - leaky_relu15 d['conv15'] = ConvBnAct(1024, 512, 1, stride=1, padding=0) #conv16 - batch_norm16 - leaky_relu16 d['conv16'] = ConvBnAct(512, 1024, 3, stride=1, padding=1) #conv17 - batch_norm16 - leaky_relu17 d['conv17'] = ConvBnAct(1024, 512, 1, stride=1, padding=0) #conv18 - batch_norm18 - leaky_relu18 d['conv18'] = ConvBnAct(512, 1024, 3, stride=1, padding=1) #conv19 - batch_norm19 - leaky_relu19 d['conv19'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1) # Detection Layer #conv20 - batch_norm20 - leaky_relu20 d['conv20'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1) # concatenate layer20 and layer 13 using space to depth d['skip_connection'] = nn.Sequential( ConvBnAct(512, 64, 1, stride=1, padding=0), SpaceToDepth(2)) d['conv21'] = ConvBnAct(1024, 1024, 3, stride=1, padding=1) #conv22 - batch_norm22 - leaky_relu22 d['conv22'] = ConvBnAct(1280, 1024, 3, stride=1, padding=1) output_channel = self.num_anchors * (5 + self.num_classes) d['logits'] = conv2d(1024, output_channel, 1, stride=1, padding=0, bias=True) self.module = nn.ModuleList() for i in d.values(): self.module.append(i) return d
def __init__(self, in_features_num, num_anchors=9, num_classes=80, features_num=256, layers_num=3, num_pyramid_levels=5, head_type='simple', act_type='relu', share_weights=True, conv_kernel_size=3, conv_stride=1, conv_padding=1, onnx_export=ONNX_EXPORT, **kwargs): assert head_type in ['simple', 'efficient'] assert act_type in ['relu', 'swish'] super(Classifier, self).__init__() self.convert_onnx = False self.pyramid_sizes = None if isinstance(conv_kernel_size, list): conv_kernel_size = tuple(conv_kernel_size) if isinstance(conv_padding, list): conv_padding = tuple(conv_padding) self.num_anchors = num_anchors self.num_classes = num_classes self.layers_num = layers_num self.num_pyramid_levels = num_pyramid_levels self.share_weights = share_weights logger = kwargs.get('logger', None) if logger: logger.info(f'==== Build Head Layer ====================') logger.info(f'Head Type : Classification ({head_type} + {act_type})') logger.info(f'Features Num : {features_num}') logger.info(f'Anchors Num : {num_anchors}') logger.info(f'Layers Num : {layers_num}') logger.info(f'Share Weights : {share_weights}') logger.info(f'Conv Kernel Size : {conv_kernel_size}') logger.info(f'Conv Padding : {conv_padding}') logger.info(f'Conv Stride : {conv_stride}') _conv_block = SeparableConvBlock if head_type == 'efficient' else nn.Conv2d _conv_kwargs = {'kernel_size': conv_kernel_size, 'stride': conv_stride, 'padding': conv_padding} if head_type == 'efficient': _conv_kwargs.update({'norm': False, 'activation': False}) #self.conv_list = nn.ModuleList( # [_conv_block(in_features_num if i == 0 else features_num, # features_num, **_conv_kwargs) for i in range(layers_num)]) #self.bn_list = nn.ModuleList( # [nn.ModuleList([nn.BatchNorm2d(features_num, momentum=0.01, eps=1e-3) # for i in range(layers_num)]) for j in range(num_pyramid_levels)]) #self.header = _conv_block(features_num, num_anchors * num_classes, **_conv_kwargs) if share_weights: self.conv_tower = nn.ModuleList([nn.ModuleList([_conv_block(in_features_num if i == 0 else features_num, features_num, **_conv_kwargs) for i in range(layers_num)])]) self.header = nn.ModuleList([_conv_block(features_num, num_anchors * num_classes, **_conv_kwargs)]) else: sub_conv_towers = list() for p in range(num_pyramid_levels): sub_conv_towers.append(nn.ModuleList([_conv_block(in_features_num if i == 0 else features_num, features_num, **_conv_kwargs) for i in range(layers_num)])) self.conv_tower = nn.ModuleList(sub_conv_towers) self.header = nn.ModuleList([_conv_block(features_num, num_anchors * num_classes, **_conv_kwargs) for p in range(num_pyramid_levels)]) self.bn_modules = nn.ModuleList( [nn.ModuleList([nn.BatchNorm2d(features_num, momentum=0.01, eps=1e-3) for i in range(layers_num)]) for j in range(num_pyramid_levels)]) if act_type == 'swish': self.act_fn = MemoryEfficientSwish() if not onnx_export else Swish() else: self.act_fn = nn.ReLU() self.header_act = nn.Sigmoid() self._initialize_weights(logger=logger)
def create_network(self, blocks): models = nn.ModuleList() prev_filters = 3 out_filters =[] conv_id = 0 for block in blocks: if block['type'] == 'net': prev_filters = int(block['channels']) continue elif block['type'] == 'convolutional': conv_id = conv_id + 1 batch_normalize = int(block['batch_normalize']) filters = int(block['filters']) kernel_size = int(block['size']) stride = int(block['stride']) is_pad = int(block['pad']) pad = (kernel_size-1)//2 if is_pad else 0 activation = block['activation'] model = nn.Sequential() if batch_normalize: model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False)) model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters, eps=1e-4)) #model.add_module('bn{0}'.format(conv_id), BN2d(filters)) else: model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad)) if activation == 'leaky': model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True)) elif activation == 'relu': model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True)) prev_filters = filters out_filters.append(prev_filters) models.append(model) elif block['type'] == 'maxpool': pool_size = int(block['size']) stride = int(block['stride']) if stride > 1: model = nn.MaxPool2d(pool_size, stride) else: model = MaxPoolStride1() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'avgpool': model = GlobalAvgPool2d() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'softmax': model = nn.Softmax() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'cost': if block['_type'] == 'sse': model = nn.MSELoss(size_average=True) elif block['_type'] == 'L1': model = nn.L1Loss(size_average=True) elif block['_type'] == 'smooth': model = nn.SmoothL1Loss(size_average=True) out_filters.append(1) models.append(model) elif block['type'] == 'reorg': stride = int(block['stride']) prev_filters = stride * stride * prev_filters out_filters.append(prev_filters) models.append(Reorg(stride)) elif block['type'] == 'route': layers = block['layers'].split(',') ind = len(models) layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers] if len(layers) == 1: prev_filters = out_filters[layers[0]] elif len(layers) == 2: assert(layers[0] == ind - 1) prev_filters = out_filters[layers[0]] + out_filters[layers[1]] out_filters.append(prev_filters) models.append(EmptyModule()) elif block['type'] == 'shortcut': ind = len(models) prev_filters = out_filters[ind-1] out_filters.append(prev_filters) models.append(EmptyModule()) elif block['type'] == 'connected': filters = int(block['output']) if block['activation'] == 'linear': model = nn.Linear(prev_filters, filters) elif block['activation'] == 'leaky': model = nn.Sequential( nn.Linear(prev_filters, filters), nn.LeakyReLU(0.1, inplace=True)) elif block['activation'] == 'relu': model = nn.Sequential( nn.Linear(prev_filters, filters), nn.ReLU(inplace=True)) prev_filters = filters out_filters.append(prev_filters) models.append(model) elif block['type'] == 'region': loss = RegionLoss() anchors = block['anchors'].split(',') if anchors == ['']: loss.anchors = [] else: loss.anchors = [float(i) for i in anchors] loss.num_classes = int(block['classes']) loss.num_anchors = int(block['num']) loss.anchor_step = len(loss.anchors)//loss.num_anchors loss.object_scale = float(block['object_scale']) loss.noobject_scale = float(block['noobject_scale']) loss.class_scale = float(block['class_scale']) loss.coord_scale = float(block['coord_scale']) out_filters.append(prev_filters) models.append(loss) else: print('unknown type %s' % (block['type'])) return models
def __init__(self, mem_slots, head_size, input_size, num_heads=1, num_blocks=1, forget_bias=1., input_bias=0., gate_style='unit', attention_mlp_layers=2, key_size=None, return_all_outputs=False): super(RelationalMemory, self).__init__() ########## generic parameters for RMC ########## self.mem_slots = mem_slots self.head_size = head_size self.num_heads = num_heads self.mem_size = self.head_size * self.num_heads # a new fixed params needed for pytorch port of RMC # +1 is the concatenated input per time step : we do self-attention with the concatenated memory & input # so if the mem_slots = 1, this value is 2 self.mem_slots_plus_input = self.mem_slots + 1 if num_blocks < 1: raise ValueError( 'num_blocks must be >=1. Got: {}.'.format(num_blocks)) self.num_blocks = num_blocks if gate_style not in ['unit', 'memory', None]: raise ValueError( 'gate_style must be one of [\'unit\', \'memory\', None]. got: ' '{}.'.format(gate_style)) self.gate_style = gate_style if attention_mlp_layers < 1: raise ValueError('attention_mlp_layers must be >= 1. Got: {}.'.format( attention_mlp_layers)) self.attention_mlp_layers = attention_mlp_layers self.key_size = key_size if key_size else self.head_size ########## parameters for multihead attention ########## # value_size is same as head_size self.value_size = self.head_size # total size for query-key-value self.qkv_size = 2 * self.key_size + self.value_size self.total_qkv_size = self.qkv_size * self.num_heads # denoted as F # each head has qkv_sized linear projector # just using one big param is more efficient, rather than this line # self.qkv_projector = [nn.Parameter(torch.randn((self.qkv_size, self.qkv_size))) for _ in range(self.num_heads)] self.qkv_projector = nn.Linear(self.mem_size, self.total_qkv_size) self.qkv_layernorm = nn.LayerNorm( [self.mem_slots_plus_input, self.total_qkv_size]) # used for attend_over_memory function self.attention_mlp = nn.ModuleList( [nn.Linear(self.mem_size, self.mem_size)] * self.attention_mlp_layers) self.attended_memory_layernorm = nn.LayerNorm( [self.mem_slots_plus_input, self.mem_size]) self.attended_memory_layernorm2 = nn.LayerNorm( [self.mem_slots_plus_input, self.mem_size]) ########## parameters for initial embedded input projection ########## self.input_size = input_size self.input_projector = nn.Linear(self.input_size, self.mem_size) ########## parameters for gating ########## self.num_gates = 2 * self.calculate_gate_size() self.input_gate_projector = nn.Linear(self.mem_size, self.num_gates) self.memory_gate_projector = nn.Linear(self.mem_size, self.num_gates) # trainable scalar gate bias tensors self.forget_bias = nn.Parameter( torch.tensor(forget_bias, dtype=torch.float32)) self.input_bias = nn.Parameter( torch.tensor(input_bias, dtype=torch.float32)) ########## number of outputs returned ##### self.return_all_outputs = return_all_outputs
def __init__(self, params, dico, with_output): """ Transformer model (encoder or decoder). """ super().__init__() # encoder / decoder, output layer self.with_output = with_output # dictionary / languages self.n_words = params.tgt_n_words self.eos_index = params.eos_index self.pad_index = params.pad_index self.dico = dico assert len(self.dico) == self.n_words # model parameters self.dim = params.emb_dim # 512 by default self.hidden_dim = self.dim * 4 # 2048 by default self.n_heads = params.n_heads # 8 by default self.n_layers = params.dec_n_layers self.dropout = params.dropout self.attention_dropout = params.attention_dropout assert self.dim % self.n_heads == 0, 'transformer dim must be a multiple of n_heads' # embeddings self.position_embeddings = Embedding(N_MAX_POSITIONS, self.dim) if params.sinusoidal_embeddings: create_sinusoidal_embeddings(N_MAX_POSITIONS, self.dim, out=self.position_embeddings.weight) self.embeddings = Embedding(self.n_words, self.dim, padding_idx=self.pad_index) self.layer_norm_emb = nn.LayerNorm(self.dim, eps=1e-12) # transformer layers self.attentions = nn.ModuleList() self.layer_norm1 = nn.ModuleList() self.ffns = nn.ModuleList() self.layer_norm2 = nn.ModuleList() self.layer_norm15 = nn.ModuleList() self.encoder_attn = nn.ModuleList() for _ in range(self.n_layers): self.attentions.append( MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) self.layer_norm1.append(nn.LayerNorm(self.dim, eps=1e-12)) self.layer_norm15.append(nn.LayerNorm(self.dim, eps=1e-12)) self.encoder_attn.append( MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) self.ffns.append( TransformerFFN(self.dim, self.hidden_dim, self.dim, dropout=self.dropout, gelu_activation=params.gelu_activation)) self.layer_norm2.append(nn.LayerNorm(self.dim, eps=1e-12)) if self.with_output: self.pred_layer = PredLayer(params) if params.share_inout_emb: self.pred_layer.proj.weight = self.embeddings.weight
def __init__(self, args, dictionary, embed_tokens, no_encoder_attn=False): super().__init__(dictionary) self.register_buffer('version', torch.Tensor([3])) self.dropout = args.dropout self.decoder_layerdrop = args.decoder_layerdrop self.share_input_output_embed = args.share_decoder_input_output_embed input_embed_dim = embed_tokens.embedding_dim embed_dim = args.decoder_embed_dim self.output_embed_dim = args.decoder_output_dim self.padding_idx = embed_tokens.padding_idx self.max_target_positions = args.max_target_positions self.embed_tokens = embed_tokens self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim) self.project_in_dim = Linear(input_embed_dim, embed_dim, bias=False) if embed_dim != input_embed_dim else None self.embed_positions = PositionalEmbedding( args.max_target_positions, embed_dim, self.padding_idx, learned=args.decoder_learned_pos, ) if not args.no_token_positional_embeddings else None self.cross_self_attention = getattr(args, 'cross_self_attention', False) self.layer_wise_attention = getattr(args, 'layer_wise_attention', False) self.layers = nn.ModuleList([]) self.layers.extend([ TransformerDecoderLayer(args, no_encoder_attn) for _ in range(args.decoder_layers) ]) self.adaptive_softmax = None self.project_out_dim = Linear(embed_dim, self.output_embed_dim, bias=False) \ if embed_dim != self.output_embed_dim and not args.tie_adaptive_weights else None if args.adaptive_softmax_cutoff is not None: self.adaptive_softmax = AdaptiveSoftmax( len(dictionary), self.output_embed_dim, options.eval_str_list(args.adaptive_softmax_cutoff, type=int), dropout=args.adaptive_softmax_dropout, adaptive_inputs=embed_tokens if args.tie_adaptive_weights else None, factor=args.adaptive_softmax_factor, tie_proj=args.tie_adaptive_proj, ) elif not self.share_input_output_embed: self.embed_out = nn.Parameter(torch.Tensor(len(dictionary), self.output_embed_dim)) nn.init.normal_(self.embed_out, mean=0, std=self.output_embed_dim ** -0.5) if args.decoder_normalize_before and not getattr(args, 'no_decoder_final_norm', False): self.layer_norm = LayerNorm(embed_dim) else: self.layer_norm = None if getattr(args, 'layernorm_embedding', False): self.layernorm_embedding = LayerNorm(embed_dim) else: self.layernorm_embedding = None
def __init__(self, num_layers, input_size): super(test_net, self).__init__() self.num_layers= num_layers self.linear_1 = nn.Linear(input_size, 5) self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)]) self.output = nn.Linear(5,2)
def __init__(self, layer_config, num_classes=1): super(CSNet, self).__init__() self.stages = layer_config[-1] self.layer_config = layer_config fuse_in = np.zeros(3) index = 0 print(self.layer_config) self.stage0 = nn.ModuleList() self.stage0.append( ILBlock(np.array([3]), self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], stride=1, first=True)) index = index + 1 self.stage1 = nn.ModuleList() self.stage1.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1])) index = index + 1 for i in range(1, self.stages[0]): if i == self.stages[0] - 1: nextstride = 2 else: nextstride = 1 self.stage1.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], nextstride=nextstride)) index = index + 1 self.stage2 = nn.ModuleList() self.stage2.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], stride=2)) index = index + 1 for i in range(1, self.stages[1]): if i == self.stages[1] - 1: nextstride = 2 else: nextstride = 1 self.stage2.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], nextstride=nextstride)) index = index + 1 fuse_in[0] = int(round(sum(self.layer_config[index - 1][1]))) self.stage3 = nn.ModuleList() self.stage3.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], stride=2)) index = index + 1 for i in range(1, self.stages[2]): if i == self.stages[2] - 1: nextstride = 2 else: nextstride = 1 self.stage3.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], nextstride=nextstride)) index = index + 1 fuse_in[1] = int(round(sum(self.layer_config[index - 1][1]))) self.stage4 = nn.ModuleList() self.stage4.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=self.layer_config[index + 1][1], stride=2)) index = index + 1 for i in range(1, self.stages[3]): if i == self.stages[3] - 1: nextstride = 0 else: nextstride = 1 self.stage4.append( ILBlock(self.layer_config[index][0], self.layer_config[index][1], nextoutlist=None)) index = index + 1 fuse_in[2] = int(round(sum(self.layer_config[index - 1][1]))) self.oct_fuse = CSFHead(self.layer_config[index:index + 3]) fuse_out_channel = int(round(sum(self.layer_config[-2][1]))) self.cls_layer = nn.Conv2d(fuse_out_channel, num_classes, kernel_size=1) self.all_flops = 0 self.batchsize = 0
def __init__(self, in_channels, out_channels, num_outs, start_level=0, end_level=-1, add_extra_convs=False, extra_convs_on_inputs=True, relu_before_extra_convs=False, no_norm_on_lateral=False, conv_cfg=None, norm_cfg=None, act_cfg=None, upsample_cfg=dict(mode='nearest')): super(SiameseFPN, self).__init__() assert isinstance(in_channels, list) self.in_channels = in_channels self.out_channels = out_channels self.num_ins = len(in_channels) self.num_outs = num_outs self.relu_before_extra_convs = relu_before_extra_convs self.no_norm_on_lateral = no_norm_on_lateral self.fp16_enabled = False self.upsample_cfg = upsample_cfg.copy() if end_level == -1: self.backbone_end_level = self.num_ins assert num_outs >= self.num_ins - start_level else: # if end_level < inputs, no extra level is allowed self.backbone_end_level = end_level assert end_level <= len(in_channels) assert num_outs == end_level - start_level self.start_level = start_level self.end_level = end_level self.add_extra_convs = add_extra_convs assert isinstance(add_extra_convs, (str, bool)) if isinstance(add_extra_convs, str): # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') elif add_extra_convs: # True if extra_convs_on_inputs: # For compatibility with previous release # TODO: deprecate `extra_convs_on_inputs` self.add_extra_convs = 'on_input' else: self.add_extra_convs = 'on_output' self.lateral_convs = nn.ModuleList() self.fpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): l_conv = ConvModule( in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False) fpn_conv = ConvModule( out_channels, out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.lateral_convs.append(l_conv) self.fpn_convs.append(fpn_conv) # add extra conv layers (e.g., RetinaNet) extra_levels = num_outs - self.backbone_end_level + self.start_level if self.add_extra_convs and extra_levels >= 1: for i in range(extra_levels): if i == 0 and self.add_extra_convs == 'on_input': in_channels = self.in_channels[self.backbone_end_level - 1] else: in_channels = out_channels extra_fpn_conv = ConvModule( in_channels, out_channels, 3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False) self.fpn_convs.append(extra_fpn_conv) self.sigmoid = nn.Sigmoid()
def __init__( self, *, mlp: List[int], npoint: int = None, split: int = 18, radius: float = None, nsample: int = None, bn: bool = True, use_xyz: bool = True, pooling: str = 'max', sigma: float = None, # for RBF pooling normalize_xyz: bool = False, # noramlize local XYZ with radius sample_uniformly: bool = False, ret_unique_cnt: bool = False, same_idx: bool = False, use_feature: bool = True): super().__init__() self.npoint = npoint self.radius = radius self.split = split self.nsample = nsample self.pooling = pooling self.mlp_module = None self.use_xyz = use_xyz self.sigma = sigma if self.sigma is None: self.sigma = self.radius / 2 self.normalize_xyz = normalize_xyz self.ret_unique_cnt = ret_unique_cnt self.same_idx = same_idx if npoint is not None: ''' self.grouper = pointnet2_utils.PairwiseGroup(radius, nsample, use_xyz=use_xyz, ret_grouped_xyz=True, normalize_xyz=normalize_xyz, sample_uniformly=sample_uniformly, ret_unique_cnt=ret_unique_cnt, use_feature=use_feature) ''' self.grouper = pointnet2_utils.QueryAndGroup( radius, nsample, use_xyz=use_xyz, ret_grouped_xyz=True, normalize_xyz=normalize_xyz, sample_uniformly=sample_uniformly, ret_unique_cnt=ret_unique_cnt, use_feature=use_feature, ret_idx=True) else: self.grouper = pointnet2_utils.GroupAll(use_xyz, ret_grouped_xyz=True) mlp_spec = mlp if use_feature and len(mlp_spec) > 0: mlp_spec[0] += mlp_spec[0] if use_xyz and len(mlp_spec) > 0: mlp_spec[0] += 3 self.mlp_module = nn.ModuleList() for i in range(split): self.mlp_module.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
def __init__(self, output_blocks=[DEFAULT_BLOCK_INDEX], resize_input=True, normalize_input=True, requires_grad=False, use_fid_inception=True): """Build pretrained InceptionV3 Parameters ---------- output_blocks : list of int Indices of blocks to return features of. Possible values are: - 0: corresponds to output of first max pooling - 1: corresponds to output of second max pooling - 2: corresponds to output which is fed to aux classifier - 3: corresponds to output of final average pooling resize_input : bool If true, bilinearly resizes input to width and height 299 before feeding input to model. As the network without fully connected layers is fully convolutional, it should be able to handle inputs of arbitrary size, so resizing might not be strictly needed normalize_input : bool If true, scales the input from range (0, 1) to the range the pretrained Inception network expects, namely (-1, 1) requires_grad : bool If true, parameters of the model require gradients. Possibly useful for finetuning the network use_fid_inception : bool If true, uses the pretrained Inception model used in Tensorflow's FID implementation. If false, uses the pretrained Inception model available in torchvision. The FID Inception model has different weights and a slightly different structure from torchvision's Inception model. If you want to compute FID scores, you are strongly advised to set this parameter to true to get comparable results. """ super(InceptionV3, self).__init__() self.resize_input = resize_input self.normalize_input = normalize_input self.output_blocks = sorted(output_blocks) self.last_needed_block = max(output_blocks) assert self.last_needed_block <= 3, \ 'Last possible output block index is 3' self.blocks = nn.ModuleList() if use_fid_inception: inception = fid_inception_v3() else: inception = models.inception_v3(pretrained=True) # Block 0: input to maxpool1 block0 = [ inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3, inception.Conv2d_2b_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block0)) # Block 1: maxpool1 to maxpool2 if self.last_needed_block >= 1: block1 = [ inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block1)) # Block 2: maxpool2 to aux classifier if self.last_needed_block >= 2: block2 = [ inception.Mixed_5b, inception.Mixed_5c, inception.Mixed_5d, inception.Mixed_6a, inception.Mixed_6b, inception.Mixed_6c, inception.Mixed_6d, inception.Mixed_6e, ] self.blocks.append(nn.Sequential(*block2)) # Block 3: aux classifier to final avgpool if self.last_needed_block >= 3: block3 = [ inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c, nn.AdaptiveAvgPool2d(output_size=(1, 1)) ] self.blocks.append(nn.Sequential(*block3)) for param in self.parameters(): param.requires_grad = requires_grad
def __init__(self, length, in_channels, out_channels, residual_channels, block_channels, kernel_size, num_blocks, feedforward_channels): """ Arguments: length: int. The length of input sequences. in_channels: int. The number of input channels to this network. out_channels: int. The number of output channels to make a linear transformation, right at the end. residual_channels: int. The number of channels to make a Convolutional transformation to, at the start. block_channels: The number of channels in each residual block. kernel_size: The size of the kernel in each convolutional layer. num_blocks: How many residual blocks. Each block has two affine transformations of width block_channels. feedforward_channels: Size of hidden layer in final feedforward network. Thus the architecture is: [Convolutional transform in_channels -> residual_channels] | | +---------------------\ | | | [Batch norm] | | | [ReLU] | | | [Convolutional transform residual_channels -> block_channels] | | | [Batch norm] | | | [ReLU] | | | [Convolutional transform block_channels -> residual_channels] | | [Addition]-----------------/ | | . . repeat for num_blocks blocks . | [Linear transform residual_channels -> out_channels] """ super(CNNResNet, self).__init__() self.length = length self.in_channels = in_channels self.residual_channels = residual_channels self.block_channels = block_channels self.num_blocks = num_blocks self.feedforward_channels = feedforward_channels self.out_channels = out_channels self.first_padding = nn.ConstantPad1d((kernel_size - 1, 0), 0) self.first_conv = nn.Conv1d(in_channels=in_channels, out_channels=residual_channels, kernel_size=kernel_size) self.blocks = nn.ModuleList() for _ in range(num_blocks): block = nn.Sequential(nn.BatchNorm1d(residual_channels), nn.ReLU(), nn.ConstantPad1d((kernel_size - 1, 0), 0), nn.Conv1d(in_channels=residual_channels, out_channels=block_channels, kernel_size=kernel_size), nn.BatchNorm1d(block_channels), nn.ReLU(), nn.ConstantPad1d((kernel_size - 1, 0), 0), nn.Conv1d(in_channels=block_channels, out_channels=residual_channels, kernel_size=kernel_size)) self.blocks.append(block) self.final_affine_one = nn.Linear(length * residual_channels, feedforward_channels) self.final_affine_two = nn.Linear(feedforward_channels, out_channels)