def forward(self, x, inp_dim, num_classes, confidence): x = x.data global args prediction = x.to(args.device) prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence, args) return prediction
def forward(self, x, inp_dim, num_classes, confidence): x = x.data global CUDA prediction = x prediction = predict_transform(prediction, inp_dim, self.anchors, num_classes, confidence, CUDA) return prediction
def forward(self, x): modules = self.blocks[1:] outputs = {} write = 0 # Use the modules as to determine the module by index # Module list contains the details of the module for i, module in enumerate(modules): module_type = (module["type"]) if module_type == "convolutional" or module_type == "upsample": x = self.module_list[i](x) elif module_type == "route": layers = module["layers"] # List of the route layers layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if(layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] # Concat on the depth x = torch.cat((map1, map2), 1) elif module_type == "shortcut": from_ = int(module["from"]) x = outputs[i-1] + outputs[i+from_] elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int (self.net_info["height"]) #Get the number of classes num_classes = int (module["classes"]) #Transform x = x.data x = predict_transform(x, inp_dim, anchors, num_classes) if not write: #if no collector has been intialised. detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, x, CUDA): modules = self.blocks[1:] # Cache route layer output outputs = {} write = 0 for i, (config, module) in enumerate(zip(modules, self.module_list)): if config["type"] in ["convolutional", "upsample"]: x = module(x) elif config["type"] == "route": layers = [int(layer) for layer in config["layers"]] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif config["type"] == "shortcut": from_ = int(config["from"]) x = outputs[i - 1] + outputs[i + from_] elif config["type"] == 'yolo': anchors = module[0].anchors # Get the input dimensions inp_dim = int(self.net_info["height"]) # Get the number of classes num_classes = int(config["classes"]) # Transform x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, x): modules = self.blocks[1:] outputs = {} detections = [] for i, m in enumerate(modules): mtype = m['type'] # print(m) # print(mtype) # print(x.size()) if mtype == 'convolutional' or mtype == 'upsample': x = self.module_list[i](x) elif mtype == 'route': layers = m['layers'] layers = [int(x) for x in layers] if layers[0] > 0: layers[0] -= i if len(layers) == 1: x = outputs[i + layers[0]] else: if layers[1] > 0: layers[1] -= i x = torch.cat( (outputs[layers[0] + i], outputs[layers[1] + i]), 1) elif mtype == 'shortcut': _from = int(m['from']) x = outputs[i - 1] + outputs[i + _from] elif mtype == 'yolo': anchors = self.module_list[i][0].anchors inp_dim = int(self.net_info['height']) num_classes = int(m['classes']) detections += [ predict_transform(x, inp_dim, anchors, num_classes) ] outputs[i] = x res = torch.cat(detections, dim=1) # res = detections[0] # for dets in detections[1:]: # res = torch.cat([res, dets], dim=1) return res
def forward(self, x, CUDA=False): modules = self.blocks[ 1:] # first element of blocks is a net block, which isn't part of the forward pass outputs = {} # cache the outputs for the route layer write = 0 # means the collector hasn't been initialized. for i, module in enumerate(modules): module_type = (module['type']) if module_type == 'convolutional' or module_type == 'upsample': x = self.module_list[i](x) elif module_type == 'route': layers = module['layers'] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif module_type == 'shortcut': from_ = int(module['from']) x = outputs[i - 1] + outputs[i + from_] elif module_type == 'yolo': anchors = self.module_list[i][0].anchors # Get the input dimensions inp_dim = int(self.net_info['height']) # Get number of classes num_classes = int(module['classes']) # Transform x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, x, CUDA=False): modules = self.blocks[1:] # first element of blocks is a net block, which isn't part of the forward pass outputs = {} # cache the outputs for the route layer write = 0 # means the collector hasn't been initialized. for i, module in enumerate(modules): module_type = (module['type']) if module_type == 'convolutional' or module_type == 'upsample': x = self.module_list[i](x) elif module_type == 'route': layers = module['layers'] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif module_type == 'shortcut': from_ = int(module['from']) x = outputs[i - 1] + outputs[i + from_] elif module_type == 'yolo': anchors = self.module_list[i][0].anchors # Get the input dimensions inp_dim = int(self.net_info['height']) # Get number of classes num_classes = int(module['classes']) # Transform x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, x, CUDA): detections = [] modules = self.blocks[ 1:] #迭代 self.block[1:] 而不是 self.blocks,因为 self.blocks 的第一个元素是一个 net 块,它不属于前向传播。 outputs = {} #We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #Output the result x = x.data ## 这里得到的是预测的yolo层feature map x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0
def forward(self, x, CUDA): modules = self.blocks[1:] # 而且层的顺序与cfg中定义的相同 outputs = {} # 存储route layer的输出 k: 层的索引, v: 层的输出特征 write = 0 # 用来标志是不是第一个尺度的特征,如果不是直接concate输出结果即可 for i, module in enumerate(modules): module_type = module['type'] # 卷积层或者上采样层,直接forward即可 if module_type in ['convolutional', 'upsample']: x = self.module_list[i](x) # route层, concate操作 elif module_type == 'route': layers = module['layers'] layers = [int(a) for a in layers] if layers[0] > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + layers[0]] else: if layers[1] > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), dim=1) # 在通道维度上concate elif module_type == 'shortcut': from_ = int(module['from']) x = outputs[i - 1] + outputs[i + from_] # 在该尺度输出 elif module_type == 'yolo': anchors = self.module_list[i][0].anchors inp_dim = int(self.net_info['height']) num_classes = int(module["classes"]) # 开始转化 x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), dim=1) # 把推理后的x赋值到outputs outputs[i] = x return detections
def forward(self, x, CUDA=True): detections = [] modules = self.blocks[1:] # 除了net块之外的所有 outputs = {} # cache output for route layer write = False # 拼接检测层结果 for i in range(len(modules)): module_type = modules[i]["type"] # if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x # elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] # 求和运算 outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors inp_dim = int(self.net_info["height"]) # 得到输入维度 num_classes = int(modules[i]["classes"]) # 得到类别数 x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) # 输出结果 if type(x) == int: continue if not write: # 将在3个不同level的feature map上检测结果存储在 detections 里 detections = x write = True else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections # 网络forward 执行完毕 except: return 0
def forward(self, x, CUDA): detections = [] modules = self.blocks[1:] outputs = {} write = 0 for i in range(len(modules)): module_type = modules[i]["type"] if (module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool"): x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == "yolo": anchors = self.module_list[i][0].anchors inp_dim = int(self.net_info["height"]) num_classes = int(modules[i]["classes"]) x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0
def forward(self,x,CUDA): modules = self.blocks[1:] outputs = {} write = 0 for i,module in enumerate(modules): module_type = (module["type"]) if module_type == "convolutional" or module_type == "upsample": x = self.module_list[i](x) # forward elif module_type == "route": layers = module["layers"] layers = [int(a) for a in layers] if layers[0] > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + layers[0]] else: if layers[1] > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1,map2),1) elif module_type == "shortcut": form_ = int(module["from"]) x = outputs[i-1] + outputs[i + form_] # 求和 elif module_type == "yolo": anchors = self.module_list[i][0].anchors inp_dim = int(self.net_info["height"]) num_classes = int(module["classes"]) x = x.data # 得到yolo层的feature map x = predict_transform(x,inp_dim,anchors,num_classes,CUDA) if not write: detections = x write = 1 else: detections = torch.cat((detections,x),1) outputs[i] = x return detections
def forward(self, x, CUDA): modules = self.blocks[1:] outputs = {} write = 0 for i, module in enumerate(modules): if module['type'] == 'convolutional' or module[ 'type'] == 'unsample': x = self.module_lists[i](x) elif module['type'] == 'route': layers = module['layers'] layers = [int(x.strip()) for x in layers] if layers[0] > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + layers[0]] else: if layers[1] > 1: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif module['type'] == 'shortcut': from_ = module['from'] x = outputs[i - 1] + outputs[from_] elif module['type'] == 'yolo': x = x.data anchors = self.module_lists[i][0].anchors inp_size = int(self.netinfo['height']) class_num = int(module['classes']) x = predict_transform(x, inp_size, anchors, class_num, CUDA) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, x, CUDA): modules = self.blocks[1:] outputs = {} write = 0 for i, module in enumerate(modules): module_type = module['type'] if module_type in ['convolutional', 'upsample']: x = self.module_list[i](x) elif module_type == 'route': layers = module['layers'].split(',') layers = [int(a) for a in layers] if len(layers) == 1: x = outputs[i + layers[0]] else: map1 = outputs[i + layers[0]] map2 = outputs[layers[1]] # TODO: why 1 x = torch.cat((map1, map2), 1) elif module_type == 'shortcut': from_ = int(module['from']) x = outputs[i - 1] + outputs[i + from_] elif module_type == 'yolo': anchors = self.module_list[i][0].anchors inp_dim = int(self.net_info['height']) num_classes = int(module['classes']) x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, False) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, x, CUDA): modules = self.blocks[1:] outputs = {} write = 0 for i, module in enumerate(modules): module_type = (module["type"]) if module_type == "convolutional" or module_type == "upsample": x = self.module_list[i](x) elif module_type == "route": layers = module["layers"] layers = [int(a) for a in layers] if(layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if(layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif module_type == "shortcut": from_ = int(module["from"]) x = outputs[i - 1] + outputs[i + from_] elif module_type == "yolo": anchors = self.module_list[i][0].anchors # Get the input dimensions inp_dim = int(self.net_info["height"]) # Get the number of classes num_classes = int(module["classes"]) # Transform x = x.data x = util.predict_transform(x, inp_dim, anchors, num_classes, False) if not write: # If no collector has bean initialized detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = x return detections
def forward(self, input, CUDA): block_without_net = self.block_list[1:] outputs = {} write = 0 detections = 0 for i, module in enumerate(block_without_net): # for every node in block_list without net, total 106 # module: ['type':'yolo',] module_type = module['type'] if module_type == 'convolutional' or module_type == 'upsample': output = self.layer_list[i](input) elif module_type == 'route': #TODO: not the same layers = module['layers'] layers = layers.split(',') if len(layers) == 1: output = outputs[i + int(layers[0].strip())] else: start = int(layers[0].strip()) end = int(layers[1].strip()) map1 = outputs[i + start] map2 = outputs[end] output = torch.cat((map1, map2), 1) elif module_type == 'shortcut': from_ = int(module['from']) output = outputs[i-1] + outputs[i+from_] elif module_type == 'yolo': #TODO: self.layer_list[i][0] anchors = block_without_net[i]['anchors'] inp_dim = int(self.net_info['height']) num_classes = int(module['classes']) input = input.data output = predict_transform(input, inp_dim, anchors, num_classes, CUDA) if not write: detections = output write = 1 else: detections = torch.cat((detections. output), 1) outputs[i] = output return detections
def forward(self, x, cuda=True): modules = self.blocks[1:] outputs = {} write = False for idx, module in enumerate(modules): module_type = module['type'] if module_type in ('convolutional', 'upsample'): x = self.module_list[idx](x) elif module_type == 'route': if module['end'] == 0: x = outputs[module['start']] else: x = torch.cat( (outputs[module['start']], outputs[module['end']]), 1) elif module_type == 'shortcut': from_ = int(module['from']) x = outputs[idx - 1] + outputs[idx + from_] elif module_type == 'yolo': anchors = self.module_list[idx][0].anchors in_dim = int(self.net_info['height']) num_classes = int(module['classes']) # Transform x = predict_transform(x.data, in_dim, anchors, num_classes, cuda) if not write: detections = x write = True else: detections = torch.cat((detections, x), 1) outputs[idx] = x return detections
def forward(self, x, CUDA): detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": if i == 0: print("Input to YOLO ", i) print(x.permute(0, 2, 3, 1).size()) print(x.permute(0, 2, 3, 1)) # if i in [81, 93, 105]: # print("Input to YOLO ", i) # print(x.permute(0, 2, 3, 1).size()) # print(x.permute(0, 2, 3, 1)) # print("Weights to layer ", i) # print(self.module_list[i][0].weight.permute(2,3,1,0).size()) # print(self.module_list[i][0].weight.permute(2,3,1,0)) x = self.module_list[i](x) if i == 0: print("Output of YOLO ", i) print(x.permute(0, 2, 3, 1).size()) print(x.permute(0, 2, 3, 1)) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #Output the result x = x.data # print("Yolo ", i, " has input ") # print(x.permute(0,2,3,1).size()) # print(x.permute(0,2,3,1)) x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0
def forward(self, x, CUDA): detections = [] modules = self.blocks[ 1:] # 除了net块之外的所有,forward这里用的是blocks列表中的各个block块字典 outputs = {} #We cache the outputs for the route layer write = 0 #write表示我们是否遇到第一个检测。write=0,则收集器尚未初始化,write=1,则收集器已经初始化,我们只需要将检测图与收集器级联起来即可。 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i # 如果只有一层时。从前面的if (layers[0]) > 0:语句中可知,如果layer[0]>0,则输出的就是当前layer[0]这一层的特征,如果layer[0]<0,输出就是从route层(第i层)向后退layer[0]层那一层得到的特征 if len(layers) == 1: x = outputs[i + (layers[0])] # 第二个元素同理 else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) #第二个参数设为 1,这是因为我们希望将特征图沿anchor数量的维度级联起来。 outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] # 求和运算,它只是将前一层的特征图添加到后面的层上而已 outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions#从net_info(实际就是blocks[0],即[net])中get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #Output the result x = x.data # 这里得到的是预测的yolo层feature map # 在util.py中的predict_transform()函数利用x(是传入yolo层的feature map),得到每个格子所对应的anchor最终得到的目标 # 坐标与宽高,以及出现目标的得分与每种类别的得分。经过predict_transform变换后的x的维度是(batch_size, grid_size*grid_size*num_anchors, 5+类别数量) x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: #if no collector has been intialised. 因为一个空的tensor无法与一个有数据的tensor进行concatenate操作, detections = x #所以detections的初始化在有预测值出来时才进行, write = 1 #用write = 1标记,当后面的分数出来后,直接concatenate操作即可。 else: ''' 变换后x的维度是(batch_size, grid_size * grid_size * num_anchors, 5 + 类别数量),这里是在维度1上进行concatenate,即按照 anchor数量的维度进行连接,对应教程part3中的Bounding Box attributes图的行进行连接。yolov3中有3个yolo层,所以 对于每个yolo层的输出先用predict_transform() 变成每行为一个anchor对应的预测值的形式(不看batch_size这个维度,x剩下的 维度可以看成一个二维tensor),这样3个yolo层的预测值按照每个方框对应的行的维度进行连接。得到了这张图处所有anchor的预测值,后面的NMS等操作可以一次完成 ''' detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0
def forward(self, x, CUDA): ''' 输入参数: x:输入样本数据 CUDA:是否使用CUDA的标志位,为True则使用,False则不使用 输出参数:detection:Tensor类型,网络的输出,大小为[batch_size, all_anchors, (5+classes_num)] ''' detections = [] modules = self.blocks[1:] outputs = { } # 由于route和shortcut层需要前面的层的输出图,因此将每个层的输出特征图缓存在字典outputs中。键是层的索引,值是特征图 write = 0 # 标志位,标志是否是第一次检测 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] # 将参数大于零和小于零的情况都转换为从当前层往前数的层数 if (layers[0]) > 0: layers[0] = layers[0] - i # layers只有一个参数时直接输出对应层的特征图即可 if len(layers) == 1: x = outputs[i + (layers[0])] # 两个参数时需要按照深度对两层的特征图进行拼接 else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] # 进行拼接 x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) # 将前一层和from_指定的层的特征图进行相加 x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #Output the result x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue # 注意:无法将一个张量连接至一个空的张量,所以要分情况讨论 # 如果是第一次检测,则直接将网络预测输出赋值给detections if not write: detections = x write = 1 # 如果不是第一检测,则执行连接操作 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0
def forward(self, x, CUDA=False, Train=False): detections = [] modules = self.blocks[1:] outputs = {} # We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors # Get the input dimensions inp_dim = int(self.net_info["height"]) # Get the number of classes num_classes = int(modules[i]["classes"]) # Output the result if Train: x, box_b, pred = predict_transform(x, inp_dim, anchors, num_classes, CUDA, Train) else: x = predict_transform(x, inp_dim, anchors, num_classes, CUDA, Train) if type(x) == int: continue if not write: detections = x write = 1 if Train: bbox = box_b y_pred = pred else: detections = torch.cat((detections, x), 1) if Train: bbox = torch.cat((bbox, box_b), 1) y_pred = torch.cat((y_pred, pred), 1) outputs[i] = outputs[i - 1] # self.anchors = torch.cat(self.anchors) if Train: return detections, bbox, self.input_size, self.anchors, y_pred else: return detections
def forward(self, x, CUDA = True): detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) print('index -> ', i) print('type -> ', module_type) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i-1] + outputs[i+from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int (self.net_info["height"]) #Get the number of classes num_classes = int (modules[i]["classes"]) #Output the result x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) print('yolo shape: ', x.shape) print('yolo : ', x) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i-1] print('outputs[81]: ', outputs[81].shape) print('outputs[82]: ', outputs[82].shape) try: return detections except: return 0
def forward(self, x, targets=None, CUDA=False): detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer loss = 0 write = 0 device = "cpu" if CUDA: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() device = "cuda" for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i].to(device)(x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) batch_size = x.size(0) #Get the number of classes num_classes = int(modules[i]["classes"]) bbox_attrs = 5 + num_classes num_anchors = len(anchors) #Output the result if targets is None: x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) else: FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor stride = inp_dim // x.size(2) grid_size = inp_dim // stride prediction = x.view(batch_size, num_anchors, bbox_attrs, grid_size * grid_size) prediction = prediction.transpose(2, 3).contiguous() prediction = prediction.view(batch_size, num_anchors, grid_size, grid_size, bbox_attrs) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. grid_x = torch.arange(grid_size).repeat(grid_size, 1).view( [1, 1, grid_size, grid_size]).type(FloatTensor) grid_y = torch.arange(grid_size).repeat( grid_size, 1).t().view([1, 1, grid_size, grid_size]).type(FloatTensor) scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors]) anchor_w = scaled_anchors[:, 0:1].view( (1, num_anchors, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view( (1, num_anchors, 1, 1)) pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h pred_boxes = pred_boxes.to(device) nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets( pred_boxes=pred_boxes.cpu().data, pred_conf=pred_conf.cpu().data, pred_cls=pred_cls.cpu().data, target=targets, anchors=scaled_anchors.cpu().data, num_anchors=num_anchors, num_classes=num_classes, grid_size=grid_size, ignore_thres=self.ignore_thres, device=device) # Handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(conf_mask.type(ByteTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(LongTensor), requires_grad=False) # Get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask # Mask outputs to ignore non-existing objects loss_x = self.mse_loss(x[mask], tx[mask]) loss_y = self.mse_loss(y[mask], ty[mask]) loss_w = self.mse_loss(w[mask], tw[mask]) loss_h = self.mse_loss(h[mask], th[mask]) loss_conf = self.bce_loss( pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss( pred_conf[conf_mask_true], tconf[conf_mask_true]) loss_cls = (1 / batch_size) * self.ce_loss( pred_cls[mask], torch.argmax(tcls[mask], 1)) return loss_x loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls outputs[i] = outputs[i - 1] if targets == None: try: return detections except: return 0 else: return loss
def forward(self, x, CUDA): detections = [] # return 하는 놈 modules = self.blocks[1:] # cfg 파일에서 가져온 놈 outputs = {} # We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': #output을 내보내는 곳 anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #print(i, " : ", np.shape(x)) #Output the result x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) # x_tmp = x.permute(1, 2, 0) # # x_tmp = x_tmp.cpu() # x_tmp = x_tmp.numpy() # # print(np.shape(x_tmp)) # # if i == 82 : # cv2.imshow("82", x_tmp) # if i == 94 : # cv2.imshow("94", x_tmp) # if i == 106: # cv2.imshow("106", x_tmp) outputs[i] = outputs[i - 1] # 각 레이어의 이미지를 보고 싶으면 이거 사용 # if i == 98 : # x_tmp = x.squeeze() # x_tmp = x_tmp.permute(1, 2, 0) # x_tmp = x_tmp.cpu() # x_tmp = x_tmp.detach().numpy() # # print("x_tmp shape :", np.shape(x_tmp)) # for i in range(20):#np.shape(x_tmp)[2]) : # # cv2.imshow(str(i), x_tmp[:,:,i]) #레이어의 shape를 보기 위해서 사용 #print("num : ", i, ", ", module_type, " : \t", np.shape(x)) try: return detections except: return 0
def forward(self, x, device, flag_eval): detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer layer_index = 1 write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x # print(x.size(), layer_index, module_type) layer_index += 1 elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x # print(x.size(), layer_index, module_type) layer_index += 1 elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x # print(x.size(), layer_index, module_type) if i == 61: self.feature_map = x layer_index += 1 elif module_type == 'yolo' and flag_eval: anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #Output the result x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, device) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] if flag_eval: try: return detections except: return 0 else: return outputs[61]
def forward(self, x, CUDA, vis=None, featureLayers=None, show=3, view='flat', conf=0.5): detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "dilated_convolutional" or module_type == "upsample" or module_type == "maxpool" or module_type == "bilinear" or module_type == "avgpool": x = self.module_list[i](x) outputs[i] = x #visualize feature map if vis and module_type == "convolutional" and (featureLayers==None or len(featureLayers)==0 or featureLayers.split().count(str(i+1))>0): self.visualizeFeatureMap(vis, i, x, show, view) elif module_type == "softmax": return F.softmax(x) elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i-1] + outputs[i+from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int (self.net_info["height"]) #Get the number of classes num_classes = int (modules[i]["classes"]) grid = x.size(2) #Output the result x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue #visualize feature map if vis and (featureLayers==None or len(featureLayers)==0 or featureLayers.split().count(str(i+1))>0): #X = x.view(1, len(anchors)*(5+num_classes), grid, grid) X = x self.visualizeFeatureMap(vis, i, X, show, view, conf) if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i-1] self.result = outputs[i]#for debug net structure try: return detections except: return 0
def create_yolo_model(model_list: list, input_shape=(416, 416, 3)): """ Create YOLOv3 model with list of layers which based on cfg_file. :param input_shape: shape of input image. :param model_list: A list of layers which needed. :return: A functional Keras model """ # Define the input as a tensor with shape input_shape X = Input(input_shape) # Catch the output of each layer used for route layer and shortcut layer. layer_output_list = [] write = False # flag of getting the first output layer # Traverse the model_list for layer_index, layer in enumerate(model_list): # In model_list, both convNet layer and upsample layer are functional, # to let layer work, just need a input. # But route and shortcut layer, they need the output from other layers, # so, when create model_list, this two layers set as dict to hold info, # when building the model, get info from dict of layer, and get output # from layer_output_list to implement route or shortcut layer. layer_type = type(layer) # get layer type if layer_type != dict: # This layer is convolution or upsampling layer. X = layer(X) # just get layer a input and get the output else: # This layer is route or shortcut or yolo layer. if layer["layerType"] == "route": # This layer is route layer. route_start = layer[ "start"] # get start layer index. Negative value map_start = layer_output_list[ layer_index + route_start] # get output tensor from start layer. try: # try to get end layer index. If exist, get the index and output tensor of that layer. route_end = layer["end"] #Negative value map_end = layer_output_list[layer_index + route_end] # out put of route layer is concatenate two layers' output tensor at channel axis. X = tf.concat( [map_start, map_end], axis=-1 ) # as tensor is 'channel last', concat at last axis except Exception: # If there is only start layer, the output tensor is start layer's output. X = map_start elif layer[ "layerType"] == "shortcut": # This layer is shortcut layer. shortcut_from = layer["from"] # get add layer's index # the sum of from_layer and previous layer as shortcut layer's output. X = layer_output_list[layer_index - 1] + layer_output_list[layer_index + shortcut_from] elif layer["layerType"] == "yolo": # yolo layer anchors: list = layer["anchors"] # get anchors' size inp_dim = net_info["height"] # Get input image dimension num_classes = layer["classes"] # Get the number of classes # Transfer convNet output format to readable format X = predict_transform(X, inp_dim=inp_dim, anchors=anchors, num_classes=num_classes) # check write flag which marks the model get first detection tensor or not. if not write: # if model hasn't get first detection, create detection tensor and # raise the write flag. detections = X write = True else: # if model has got detection, concatenate detection tensor and # new X tensor at axis=1 detections = tf.concat([detections, X], axis=1) else: # error continue layer_output_list.append(X) # append output in list
def forward(self, x, CUDA): if CUDA: x=x.cuda() modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer detections=[] detections_prev=[] for i, module in enumerate(modules): module_type = (module["type"]) if module_type == "convolutional" or module_type == "upsample": x = self.module_list[i](x) elif module_type == "route": layers = module["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif module_type == "shortcut": from_ = int(module["from"]) x = outputs[i-1] + outputs[i+from_] elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int (self.net_info["height"]) #Get the number of classes num_classes = int (module["classes"]) #Transform detections_prev.append(x) x = x.data self.module_list[i][0].stride = inp_dim // x.size(2) self.module_list[i][0].grid_size = inp_dim // self.module_list[i][0].stride x = util.predict_transform(x, inp_dim, anchors, num_classes, CUDA) detections.append(x) ''' if not write: #if no collector has been intialised. detections = x write = 1 else: detections = torch.stack((detections, x), 0) ''' outputs[i] = x return detections_prev,detections
def forward(self, x, CUDA): """ | x | torch.Tensor | size=(batch,channel,height,width) | CUDA | bool | """ detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': # anchors # yolo_82 | [(116,90), (156,198), (373,326)] # yolo_94 | [( 30,61), ( 62, 45), ( 59,119)] # yolo_106 | [( 10,13), ( 16, 30), ( 33, 23)] anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes # classes=80 (MS COCO) num_classes = int(modules[i]["classes"]) #Output the result x = x.data x = predict_transform(prediction=x, inp_dim=inp_dim, anchors=anchors, num_classes=num_classes, CUDA=CUDA) if type(x) == int: continue if not write: detections = x write = 1 # 一度書き込んだかどうかのフラグ else: # concatenate along channel axis detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0
def forward(self, x): x = x.data prediction = x prediction = predict_transform(prediction, self.inp_dim, self.anchors, self.num_classes) return prediction
def forward(self, x, CUDA): """ 两个作用:1.计算输出 2.为了处理更方便,对输出的检测特征图进行变换 :param x:输入 :param CUDA:是否使用GPU加速训练,如果为True,就是用GPU :return: """ detections = [] # 最后的预测结果列表 modules = self.blocks[1:] # 第一个层是net,和计算无关 # route和shortcut层需要前面的层的输出图,因此我们将每个层的输出特征图缓存在字典outputs中 outputs = {} # 键是层的索引,值是特征图 write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or \ module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if layers[0] > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors # 获取输入维度信息 inp_dim = int(self.net_info["height"]) # 获取类别的数量 num_classes = int(modules[i]["classes"]) # 输出结果 x = x.data x = predict_transform(x, inp_dim, anchors, num_classes, CUDA) if type(x) == int: continue if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] try: return detections except: return 0