def __init__(self, config): super(YoloBody, self).__init__() self.config = config self.backbone = darknet53(None) # out3, out4, out5 out_filters = self.backbone.layers_out_filters # 75 = 3*(5+num_classes) = 3*(4+1+80) final_out_filter0 = len( config['yolo']['anchors'][0]) * (5 + config['yolo']['classes']) self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) final_out_filter1 = len( config['yolo']['anchors'][1]) * (5 + config['yolo']['classes']) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) final_out_filter2 = len( config['yolo']['anchors'][2]) * (5 + config['yolo']['classes']) self.last_layer1_conv = conv2d(256, 128, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
def __init__(self, config): super(YoloBody, self).__init__() self.config = config # backbone self.backbone = darknet53(None) out_filters = self.backbone.layers_out_filters # last_layer0 # print(config["yolo"]["anchors"][0]) # print(len(config["yolo"]["anchors"][0])) # print(5 + config["yolo"]["classes"]) final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"]) self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) # embedding1 final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"]) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) # embedding2 final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"]) self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
def __init__(self, num_classes=80, model_mode='train'): super(YOLOv3, self).__init__() self.num_classes = num_classes assert str.lower(model_mode) in ['train', 'eval', 'test'], \ "model_mode should be 'train' 'eval' or 'test', but got " \ "{}".format(model_mode) self.model_mode = str.lower(model_mode) self.anchors = [ 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326 ] self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] self.valid_thresh = 0.005 self.nms_thresh = 0.45 self.nms_topk = 400 self.nms_posk = 100 self.draw_thresh = 0.5 self.backbone = darknet53(pretrained=(model_mode == 'train'), with_pool=False, num_classes=-1) self.block_outputs = [] self.yolo_blocks = [] self.route_blocks = [] for idx, num_chan in enumerate([1024, 768, 384]): yolo_block = self.add_sublayer( "yolo_detecton_block_{}".format(idx), YoloDetectionBlock(num_chan, 512 // (2**idx))) self.yolo_blocks.append(yolo_block) num_filters = len(self.anchor_masks[idx]) * (self.num_classes + 5) block_out = self.add_sublayer( "block_out_{}".format(idx), Conv2D(num_channels=1024 // (2**idx), num_filters=num_filters, filter_size=1, act=None, param_attr=ParamAttr( initializer=fluid.initializer.Normal(0., 0.02)), bias_attr=ParamAttr( initializer=fluid.initializer.Constant(0.0), regularizer=L2Decay(0.)))) self.block_outputs.append(block_out) if idx < 2: route = self.add_sublayer( "route2_{}".format(idx), ConvBNLayer(ch_in=512 // (2**idx), ch_out=256 // (2**idx), filter_size=1, act='leaky_relu')) self.route_blocks.append(route)
def main(): args = parse_args() model = darknet53(args.modelpath).eval() img_ori = cv2.imread(args.image) inp_dim = args.resolution, args.resolution img = img_prepare(img_ori, inp_dim) if args.cuda: model = model.cuda() img = img.cuda() with torch.no_grad(): out0, out1, out2 = model(img) output = {'out0': out0.cpu().numpy(), 'out1': out1.cpu().numpy(), 'out2': out2.cpu().numpy()} rects = rects_prepare(output) mapping = get_classname_mapping(args.classfile) scaling_factor = min(1, args.resolution / img_ori.shape[1]) for pt1, pt2, cls, prob in rects: pt1[0] -= (args.resolution - scaling_factor*img_ori.shape[1])/2 pt2[0] -= (args.resolution - scaling_factor*img_ori.shape[1])/2 pt1[1] -= (args.resolution - scaling_factor*img_ori.shape[0])/2 pt2[1] -= (args.resolution - scaling_factor*img_ori.shape[0])/2 pt1[0] = np.clip(int(pt1[0] / scaling_factor), a_min=0, a_max=img_ori.shape[1]) pt2[0] = np.clip(int(pt2[0] / scaling_factor), a_min=0, a_max=img_ori.shape[1]) pt1[1] = np.clip(int(pt1[1] / scaling_factor), a_min=0, a_max=img_ori.shape[1]) pt2[1] = np.clip(int(pt2[1] / scaling_factor), a_min=0, a_max=img_ori.shape[1]) label = "{}:{:.2f}".format(mapping[cls], prob) color = tuple(map(int, np.uint8(np.random.uniform(0, 255, 3)))) cv2.rectangle(img_ori, tuple(pt1), tuple(pt2), color, 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] pt2 = pt1[0] + t_size[0] + 3, pt1[1] + t_size[1] + 4 cv2.rectangle(img_ori, tuple(pt1), tuple(pt2), color, -1) cv2.putText(img_ori, label, (pt1[0], t_size[1] + 4 + pt1[1]), cv2.FONT_HERSHEY_PLAIN, cv2.FONT_HERSHEY_PLAIN, 1, 1, 2) cv2.imshow(args.image, img_ori) cv2.waitKey()
def __init__(self, config): super(YoloBody, self).__init__() self.config = config self.backbone = darknet53() out_filters = self.backbone.layers_out_filters # boxes * (objectness + offsets + classes) final_out_filter0 = len(config['yolo']['anchors'][0]) * (5 + config['yolo']['classes']) self.last_layer0 = make_last_layer([512, 1024], out_filters[-1], final_out_filter0) final_out_filter1 = len(config['yolo']['anchors'][1]) * (5 + config['yolo']['classes']) self.last_layer1_conv = conv2d(512, 256, 1) # DBL self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') # Upsample self.last_layer1 = make_last_layer([256, 512], out_filters[-2]+256, final_out_filter1) final_out_filter2 = len(config['yolo']['anchors'][2]) * (5 + config['yolo']['classes']) self.last_layer2_conv = conv2d(256, 128, 1) # DBL self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') # Upsample self.last_layer2 = make_last_layer([128, 256], out_filters[-3]+128, final_out_filter2)
def __init__(self, classes=1000): super(Network, self).__init__() self.backbone = darknet53('') self.conv1_extra = nn.Sequential() self.conv1_extra.add_module('conv1', nn.Conv2d(1024, 256, 1, 1)) self.conv1_extra.add_module('bn1',nn.BatchNorm2d(256)) self.conv1_extra.add_module('leakyrelu1',nn.LeakyReLU(0.1)) self.fc1_extra = nn.Sequential() self.fc1_extra.add_module('fc1',nn.Linear(256*3*3, 1024)) self.fc1_extra.add_module('relu1',nn.ReLU(inplace=True)) self.fc1_extra.add_module('drop1',nn.Dropout(p=0.5)) self.fc2_extra = nn.Sequential() self.fc2_extra.add_module('fc2',nn.Linear(9*1024,4096)) self.fc2_extra.add_module('relu2',nn.ReLU(inplace=True)) self.fc2_extra.add_module('drop2',nn.Dropout(p=0.5)) self.classifier = nn.Sequential() self.classifier.add_module('fc3',nn.Linear(4096, classes))
def get_default_net(num_anchors=1, cfg=None): """ Constructs the network based on the config """ if cfg['mdl_to_use'] == 'retina': encoder = tvm.resnet50(True) backbone = RetinaBackBone(encoder, cfg) elif cfg['mdl_to_use'] == 'ssd_vgg': encoder = ssd_vgg.build_ssd('train', cfg=cfg) encoder.vgg.load_state_dict( torch.load('./weights/vgg16_reducedfc.pth')) print('loaded pretrained vgg backbone') backbone = SSDBackBone(encoder, cfg) # backbone = encoder elif cfg['mdl_to_use'] == 'realgin': encoder = darknet53(True) backbone = YoloBackBone(encoder, cfg) # Freeze visual backbone params for param in backbone.parameters(): param.requires_grad = False zsg_net = ZSGNet(backbone, num_anchors, cfg=cfg) return zsg_net
def main(): print(torch.__version__) # برای تقسیم داده ها به آموزش و تست به صورت خودکار # این تابع درصدی از داده های آموزش را ضمن حفظ اسامی پوشه ها، به پوشه تست منتقل می کند if(not os.path.exists(vars.val_dir)): utils.create_validation_data(vars.train_dir, vars.val_dir, vars.val_split_ratio, 'jpg') def handler(signum, frame): print('Signal handler called with signal', signum) print('Training will finish after this epoch') vars.stop_training = True #raise OSError("Couldn't open vars.device!") signal.signal(signal.SIGINT, handler) # only in python version >= 3.2 print("Start Time: ", strftime("%Y-%m-%d %H:%M:%S", localtime())) print("Active Mode: " + vars.mode) plt.ion() # interactive mode ###################################################################### # Load Data # Data augmentation and normalization for training # Just normalization for validation data_transforms = { 'train': transforms.Compose([ transforms.Resize([vars.input_size, vars.input_size]), #transforms.ColorJitter(0.1, 0.1, 0.1, 0.01), #transforms.RandomRotation(5), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize([vars.input_size, vars.input_size]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize([vars.input_size, vars.input_size]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } # image_dataset_train = {'train': datasets.ImageFolder(os.path.join(vars.data_dir, 'train'), data_transforms['train'])} # image_dataset_test = {'val': datasets.ImageFolder(os.path.join(vars.data_dir, 'val'), data_transforms['val'])} # image_dataset_train.update(image_dataset_test) # خط پایین با سه خط بالا برابری می کند! image_datasets = {x: datasets.ImageFolder(os.path.join(vars.data_dir, x), data_transforms[x]) for x in ['train', 'val']} vars.dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=vars.batch_size, shuffle=True, num_workers=0) for x in ['train', 'val']} vars.dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} vars.class_names = image_datasets['train'].classes # Get a batch of training data inputs, classes = next(iter(vars.dataloaders['train'])) # Make a grid from batch out = torchvision.utils.make_grid(inputs) #utils.imshow(out, title=[vars.class_names[x] for x in classes]) ###################################################################### # Finetuning the convnet # Load a pretrained model and reset final fully connected layer. ##\\//\\//model, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True) if(vars.model_name.find('vgg') != -1): model = models.vgg11_bn(pretrained=vars.pre_trained) num_ftrs = model.classifier[6].in_features model.classifier[6] = nn.Linear(num_ftrs, len(vars.class_names)) elif(vars.model_name == 'resnet152'): model = models.resnet152(pretrained=vars.pre_trained) elif(vars.model_name == 'resnet50'): model = models.resnet50(pretrained=vars.pre_trained) elif(vars.model_name == 'resnet18'): model = models.resnet18(pretrained=vars.pre_trained) elif(vars.model_name == 'googlenet'): model = models.googlenet(pretrained=vars.pre_trained) elif(vars.model_name == 'darknet53'): model = darknet.darknet53(1000) optimizer = optim.SGD(model.parameters(), lr = vars.learning_rate, momentum=0.9) if(vars.pre_trained): #model.load_state_dict( torch.load('D:\\Projects\\_Python\\car Detection\\model_best.pth.tar') ) checkpoint = torch.load('D:\\Projects\\_Python\\car Detection\\model_best.pth.tar') start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if vars.device.type == 'cuda': # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(vars.device) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) elif(vars.model_name == 'car3conv'): model = mymodels.car3conv() elif(vars.model_name == 'car5conv'): model = mymodels.car5conv() elif(vars.model_name == 'car2conv'): model = mymodels.car2conv() elif(vars.model_name == 'mymodel'): model = mymodels.MyModel() elif(vars.model_name == 'mymodel2'): model = mymodels.MyModel2() else: return hogmain.main(None) if(vars.model_name != 'vgg11'): num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, len(vars.class_names)) model.to(vars.device) if(vars.mode == 'test'):#test #model.load_state_dict(torch.load("D:\\Projects\\Python\\Zeitoon Detection\")) model.load_state_dict(torch.load(vars.test_model)) # log_dir = '{}-{}-{}-batch-{}'.format(vars.model_name, 'SGD', 'cuda', vars.batch_size) # if(vars.pre_trained): # log_dir = log_dir + '-pretrained' # if not os.path.exists(log_dir): # os.mkdir(log_dir) log_file = open(".\\Time-{}-{}.log".format(vars.model_name, vars.batch_size),"w") for dev in ['cuda', 'cpu']: vars.device = torch.device(dev) model = model.to(vars.device) #run model on one batch to allocate required memory on device (and have more exact results) inputs = inputs.to(vars.device) outputs = model(inputs) s = test_model(model, vars.criterion, 'val', 100) log_file.write(s) #log_file.write('\n' + '-'*80) log_file.write(summary(model, input_size=(3, vars.input_size, vars.input_size), batch_size=-1, device=vars.device.type)) log_file.close() print(summary(model, input_size=(3, vars.input_size, vars.input_size), batch_size=-1, device=vars.device.type)) else: print(summary(model, input_size=(3, vars.input_size, vars.input_size), batch_size=-1, device=vars.device.type)) #model.load_state_dict(torch.load("C:\\Projects\\Python\\Car Detection\\darknet53-SGD-cuda-batch-32\\ep7-acc97.83-loss0.0667.pth")) optimizer = optim.SGD(model.parameters(), lr = vars.learning_rate, momentum=0.9) #optimizer = optim.Adam(model.parameters(), lr=0.05) # Decay LR by a factor of 0.6 every 6 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size = vars.scheduler_step_size, gamma = vars.scheduler_gamma) model = model.to(vars.device) model = train_model(model, vars.criterion, optimizer, exp_lr_scheduler, vars.num_epochs) visualize_model(model) plt.ioff() plt.show()
import torch from darknet import darknet53 from hardnet import hardnet model = hardnet(19).cuda() inputs = torch.randn((1, 3, 512, 512)).cuda() outs = model(inputs) print(outs.shape) model = darknet53(0, 1, 2).cuda() inputs = torch.randn((1, 3, 512, 512)).cuda() outs = model(inputs) print(outs.shape)
def pre_img(img, device): img = img / 255.0 img = img.astype(np.float32) img = (img - opt.mean) / opt.std img = torch.FloatTensor(img) img = img.to(device) img = img.permute(2, 0, 1).unsqueeze(0) return img for file in files: img = cv2.imread(file) h, w, _ = img.shape h1 = ((h - 1) | 15) + 1 w1 = ((w - 1) | 15) + 1 img1 = np.zeros((h1, w1, 3)) img1[:h, :w, :] = img imgs.append(img) img1 = pre_img(img1, opt.device) img1s.append(img1) print(len(img1s)) print('Creating model...') net = darknet53() net, _ = load_model(net, './res51/m1_best1.pth', opt.device) net = net.to('cuda') net.eval() print('Parameters have been loaded') detector = Demoshower(img1s, imgs, net) detector.show_demo()
##\\//\\//model, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True) if (vars.model_name.find('vgg') != -1): model = models.vgg11_bn(pretrained=vars.pre_trained) num_ftrs = model.classifier[6].in_features model.classifier[6] = nn.Linear(num_ftrs, len(vars.class_names)) elif (vars.model_name == 'resnet152'): model = models.resnet152(pretrained=vars.pre_trained) elif (vars.model_name == 'resnet50'): model = models.resnet50(pretrained=vars.pre_trained) elif (vars.model_name == 'resnet18'): model = models.resnet18(pretrained=vars.pre_trained) elif (vars.model_name == 'googlenet'): model = models.googlenet(pretrained=vars.pre_trained) elif (vars.model_name == 'darknet53'): model = darknet.darknet53(1000) if (vars.pre_trained): model.load_state_dict( torch.load( 'D:\\Projects\\_Python\\Fruit Detection2\\darknet53.weights')) elif (vars.model_name == 'fruit3conv'): model = mymodels.fruit3conv() elif (vars.model_name == 'fruit5conv'): model = mymodels.fruit5conv() elif (vars.model_name == 'fruit2conv'): model = mymodels.fruit2conv() elif (vars.model_name == 'mymodel'): model = mymodels.MyModel() elif (vars.model_name == 'mymodel2'): model = mymodels.MyModel2()