def initialize_model(): weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: exit() else: my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2) checkpoint = torch.load(weights_path + '/%s' % model_lst[-1], map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) return (yolo, model, averages, angle_bins)
def __init__(self): super(SpineModelPAF, self).__init__() self.pcm_n = 2 self.paf_n = 1 import torchvision.models.vgg as vgg vgg19 = vgg.vgg19_bn(pretrained=False) top_layers = list(list(vgg19.children())[0].children()) top_layers[0] = nn.Conv2d(1, 64, kernel_size=3, padding=1) tops = top_layers[:33] # Top 10 (conv batch relu)*10 + maxpool * 3 tops.pop(26) # delete third max pool [tops.append(l) for l in self.make_conv_layers(512, 256)] [tops.append(l) for l in self.make_conv_layers(256, 128)] self.model_0 = nn.Sequential(*tops) # out: 32, 94 s1_pcm = lambda: self.stage1(self.pcm_n) s1_paf = lambda: self.stage1(self.paf_n) sn_pcm = lambda: self.stageN(self.pcm_n) sn_paf = lambda: self.stageN(self.paf_n) self.model1_1 = s1_pcm() self.model1_2 = s1_paf() self.model2_1 = sn_pcm() self.model2_2 = sn_paf() self.model3_1 = sn_pcm() self.model3_2 = sn_paf() self.model4_1 = sn_pcm() self.model4_2 = sn_paf() self.model5_1 = sn_pcm() self.model5_2 = sn_paf()
def __init__(self, feature_layer=34, use_bn=False, use_input_norm=True, device=torch.device('cpu'), z_norm=False): #Note: PPON uses cuda instead of CPU super(VGGFeatureExtractor, self).__init__() if use_bn: model = vgg.vgg19_bn(pretrained=True) else: model = vgg.vgg19(pretrained=True) self.use_input_norm = use_input_norm if self.use_input_norm: if z_norm: # if input in range [-1,1] mean = torch.Tensor([0.485-1, 0.456-1, 0.406-1]).view(1, 3, 1, 1).to(device) std = torch.Tensor([0.229*2, 0.224*2, 0.225*2]).view(1, 3, 1, 1).to(device) else: # input in range [0,1] mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device) std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device) self.register_buffer('mean', mean) self.register_buffer('std', std) self.features = nn.Sequential(*list(model.features.children())[:(feature_layer + 1)]) # No need to BP to variable for k, v in self.features.named_parameters(): v.requires_grad = False
def vgg(**config): dataset = config.pop('dataset', 'imagenet') depth = config.pop('depth', 16) bn = config.pop('bn', True) if dataset == 'imagenet': config.setdefault('num_classes', 1000) if depth == 11: if bn is False: return vgg11(pretrained=False, **config) else: return vgg11_bn(pretrained=False, **config) if depth == 13: if bn is False: return vgg13(pretrained=False, **config) else: return vgg13_bn(pretrained=False, **config) if depth == 16: if bn is False: return vgg16(pretrained=False, **config) else: return vgg16_bn(pretrained=False, **config) if depth == 19: if bn is False: return vgg19(pretrained=False, **config) else: return vgg19_bn(pretrained=False, **config) elif dataset == 'cifar10': config.setdefault('num_classes', 10) elif dataset == 'cifar100': config.setdefault('num_classes', 100) config.setdefault('batch_norm', bn) return VGG(model_name[depth], **config)
def __init__(self): super(VGG19_bn,self).__init__() self.vgg = vgg19_bn(pretrained=True) self.encoder_1 = self.vgg.features[0:6] self.encoder_2 = self.vgg.features[6:13] self.encoder_3 = self.vgg.features[13:26] self.encoder_4 = self.vgg.features[26:39] self.encoder_5 = self.vgg.features[39:-1]
def __init__(self): rospy.loginfo("pointcloud object detection is running...") # frame size self.frame_x = 640 self.frame_y = 480 self.bridge = CvBridge() # cv_image and pcl variables self.cv_image = np.zeros([self.frame_x, self.frame_y]) self.pcl = None # transform config # self.tf_pub = tf.TransformBroadcaster() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) self.model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) self.model.load_state_dict(checkpoint['model_state_dict']) self.model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' self.yolo = cv_Yolo(yolo_path) self.averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? self.angle_bins = generate_bins(2) calib_path = os.path.abspath( os.path.dirname(__file__)) + "/" + "camera_cal/" self.calib_file = calib_path + "calib_cam_to_cam.txt" # subscribers self.img_sub = rospy.Subscriber("/kitti/camera_color_right/image_raw", Image, self.rgb_callback) #self.pcl_sub = rospy.Subscriber("/camera/depth_registered/points", PointCloud2, self.pcl_callback) # publishers self.img_detected_pub = rospy.Publisher( "ROS_3D_BBox/img_detected_frame", Image, queue_size=100) self.location_pub = rospy.Publisher("ROS_3D_BBox/location_array", LocationArray, queue_size=100) self.rate = rospy.Rate(1)
def __init__(self, weights_path='../rotation_detector/weights'): # load yolo self.yolo = cv_Yolo(weights_path) # load rotation model model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True).cpu() self.model = Model.Model(features=my_vgg.features, bins=2).cpu() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1], map_location=torch.device('cpu')) self.model.load_state_dict(checkpoint['model_state_dict']) self.angle_bins = generate_bins(2)
def vgg_19(batch_norm=True, pretrained=False, fixed_feature=True): """ VGG 19-layer model from torchvision's vgg model. :param batch_norm: train model with batch normalization :param pretrained: if true, return a model pretrained on ImageNet :param fixed_feature: if true and pretrained is true, model features are fixed while training. """ if batch_norm: from torchvision.models.vgg import vgg19_bn model = vgg19_bn(pretrained) else: from torchvision.models.vgg import vgg19 model = vgg19(pretrained) ff = True if pretrained and fixed_feature else False return _VGG(model, model.features, ff)
def init_vgg19_params(self): vgg19 = vgg.vgg19_bn(pretrained=True) blocks = [self.down1, self.down2, self.down3, self.down4, self.down5] features = list(vgg19.features.children()) vgg_layers = [] for _layer in features: if isinstance(_layer, nn.Conv2d): vgg_layers.append(_layer) elif isinstance(_layer, nn.BatchNorm2d): vgg_layers.append(_layer) merged_layers = [] for idx, conv_block in enumerate(blocks): if idx < 2: units = [conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit] else: units = [ conv_block.conv1.cbr_unit, conv_block.conv2.cbr_unit, conv_block.conv3.cbr_unit, conv_block.conv4.cbr_unit, ] for _unit in units: for _layer in _unit: if isinstance(_layer, nn.Conv2d): merged_layers.append(_layer) elif isinstance(_layer, nn.BatchNorm2d): merged_layers.append(_layer) assert len(vgg_layers) == len(merged_layers) for l1, l2 in zip(vgg_layers, merged_layers): if isinstance(l1, nn.Conv2d) and isinstance(l2, nn.Conv2d): assert l1.weight.size() == l2.weight.size() assert l1.bias.size() == l2.bias.size() l2.weight.data = l1.weight.data l2.bias.data = l1.bias.data elif isinstance(l1, nn.BatchNorm2d) and isinstance( l2, nn.BatchNorm2d): l2.running_mean.data = l1.running_mean.data l2.running_var.data = l1.running_var.data l2.weight.data = l1.weight.data l2.bias.data = l1.bias.data
def __init__(self, subtype='vgg16', out_stages=[2, 3, 4], backbone_path=None, pretrained=False): super(VGG, self).__init__() self.out_stages = out_stages self.backbone_path = backbone_path self.pretrained = pretrained if subtype == 'vgg11': self.pretrained = True features = vgg11_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] elif subtype == 'vgg13': self.pretrained = True features = vgg13_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] elif subtype == 'vgg16': self.pretrained = True features = vgg16_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] elif subtype == 'vgg19': self.pretrained = True features = vgg19_bn(pretrained=self.pretrained).features self.out_channels = [64, 128, 256, 512, 512] else: raise NotImplementedError self.out_channels = self.out_channels[self.out_stages[0]:self. out_stages[-1] + 1] self.conv1 = nn.Sequential(*list(features.children())[:7]) self.layer1 = nn.Sequential(*list(features.children())[7:14]) self.layer2 = nn.Sequential(*list(features.children())[14:24]) self.layer3 = nn.Sequential(*list(features.children())[24:34]) self.layer4 = nn.Sequential(*list(features.children())[34:43]) if not self.pretrained: if self.backbone_path: self.pretrained = True self.backbone.load_state_dict(torch.load(self.backbone_path)) else: self.init_weights()
def __init__(self): super(ColorizationNetwork_L, self).__init__() self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.VGG_19 = vgg.vgg19_bn(pretrained=True) #[batch, 256, 56*, 56*] self.VGG_19.classifier = nn.Sequential() self.l = list(self.VGG_19.features.children()) del self.l[52] del self.l[39] del self.l[0] self.VGG_modified = nn.Sequential(*self.l) self.conv_8 = nn.Sequential( nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1, dilation=1), nn.ReLU(inplace=True), #[batch, 128, 56*, 56*] nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1), nn.ReLU(inplace=True), #[batch, 128, 56*, 56*] nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, dilation=1), nn.ReLU(inplace=True), #[batch, 128, 56*, 56*] nn.Conv2d(in_channels=256, out_channels=313, kernel_size=1, stride=1, dilation=1) #[batch, 313, 56*, 56*] )
def make_compatible_VGG(input_shape): '''Creates a modified VGG_19 batch-normalized network usuable for binary classification; this CNN is compatible with arbitrary-sized images as input (as opposed to the 224x224 images originally used by VGG) Parameters: input_shape: Desired shape of input image Returns: torch.nn.Module 19-layer VGG model; this model includes batch normalization and is derived from the pretrained torch VGG model ''' base_nn = vgg.vgg19_bn( pretrained=True ) # Setting num_classes here blows up importing pretrained model # Freeze the parameters for the convolutional part of the network; will be modifying only fully-connected layers for param in base_nn.features.parameters(): param.requires_grad = False # Modify dimensions of first linear layer based on input image # First calculate final feature map size after several max pools (VGG's conv2ds don't change size) out_size = [] for dim_size in input_shape: size = dim_size for i in range(5): size -= 2 size /= 2 size = int(size) + 1 out_size.append(size) base_nn.classifier[0] = nn.Linear(512 * out_size[0] * out_size[1], 4096) nn.init.normal_(base_nn.classifier[0].weight, 0, 0.01) nn.init.constant_(base_nn.classifier[-1].bias, 0) # Now set the number of classes base_nn.classifier[-1] = nn.Linear(4096, 2) nn.init.normal_(base_nn.classifier[-1].weight, 0, 0.01) nn.init.constant_(base_nn.classifier[-1].bias, 0) return base_nn
def __init__(self, num_classes, pretrained=True): super(SegNet, self).__init__() vgg = vgg19_bn(pretrained=pretrained) features = list(vgg.features.children()) self.enc1 = nn.Sequential(*features[0:7]) self.enc2 = nn.Sequential(*features[7:14]) self.enc3 = nn.Sequential(*features[14:27]) self.enc4 = nn.Sequential(*features[27:40]) self.enc5 = nn.Sequential(*features[40:]) self.dec5 = nn.Sequential(*([nn.Upsample(scale_factor=2)] + [ nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True) ] * 4)) self.dec4 = _DecoderBlock(1024, 256, 4, last=False) self.dec3 = _DecoderBlock(512, 128, 4, last=False) self.dec2 = _DecoderBlock(256, 64, 2, last=False) self.dec1 = _DecoderBlock(128, num_classes, 2, last=True) initialize_weights(self.dec5, self.dec4, self.dec3, self.dec2, self.dec1)
def test_vgg19_bn(self): # VGG 19-layer model (configuration 'E') with batch normalization x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0)) self.exportTest(toC(vgg19_bn()), toC(x))
def main(): FLAGS = parser.parse_args() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir # using P_rect from global calibration file calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir calib_file = calib_path + "calib_cam_to_cam_custom.txt" # using P from each frame # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/' try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for img_id in ids: start_time = time.time() img_file = img_path + img_id + ".png" # P for each frame # calib_file = calib_path + id + ".txt" pad_image = False if pad_image: truth_img = cv2.imread(img_file) truth_img = cv2.resize(truth_img, (374, 374)) height, width, channels = truth_img.shape width_pad = 1242 height_pad = 375 center_height = height_pad // 2 center_width = width_pad // 2 img_pad = np.zeros([height_pad, width_pad, 3], dtype=np.uint8) start_height = center_height - height // 2 stop_height = center_height + height // 2 start_width = center_width - width // 2 stop_width = center_width + width // 2 img_pad[start_height:stop_height, start_width:stop_width, :] = truth_img truth_img = img_pad img = np.copy(img_pad) yolo_img = np.copy(img_pad) detections = yolo.detect(yolo_img) else: truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) detections = yolo.detect(yolo_img) for detection in detections: if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] print('cos:', cos) print('sin:', sin) alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi my_alpha = alpha - np.pi / 2 print('new cos', np.cos(my_alpha)) print('new sin', np.sin(my_alpha)) print('adding', angle_bins[argmax]) print('confidence', conf) print('ANGLE', (my_alpha % (2 * np.pi)) / (2 * np.pi) * 360) print(theta_ray) if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: print('Estimated pose: %s' % location) if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print("\n") print('Got %s poses in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------') if FLAGS.video: cv2.waitKey(1) else: if cv2.waitKey(0) != 32: # space bar exit()
def __init__(self): super(Encoder, self).__init__() self.original_model = vgg.vgg19_bn(pretrained=True) self.convs = list(self.original_model.children())[0] self.layers = nn.Sequential(*list(self.convs)[:-1])
def main(): root = os.path.dirname(os.path.abspath(__file__)) weights_path = root + '/weights' model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')] assert len(model_lst)>0, 'No previous model found, please train first!' print ('Using previous model %s'%model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=False) model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s'%model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # defaults to /eval dataset = Dataset(root + '/eval') averages = ClassAverages.ClassAverages() all_images = dataset.all_objects() for key in sorted(all_images.keys()): start_time = time.time() data = all_images[key] truth_img = data['Image'] img = np.copy(truth_img) objects = data['Objects'] cam_to_img = data['Calib'] for detectedObject in objects: label = detectedObject.label theta_ray = detectedObject.theta_ray input_img = detectedObject.img input_tensor = torch.zeros([1,3,224,224]).cuda() input_tensor[0,:,:,:] = input_img input_tensor.cuda() [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(label['Class']) argmax = np.argmax(conf) cos, sin = orient[argmax, :2] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] - np.pi location = plot_regressed_3d_bbox(img, truth_img, cam_to_img, label['Box_2D'], dim, alpha, theta_ray) print('Truth pose: %s\nEstimated location: %s'%(label['Location'], location)) # x,y,z # plot car by car if single_car: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('3D-DeepBox', numpy_vertical); cv2.waitKey(0) print('Got %s poses in %.3f seconds\n'%(len(objects), time.time()-start_time)) # plot image by image if not single_car: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('3D-DeepBox', numpy_vertical) if cv2.waitKey(0) == 27: return
def main(): # hyper parameters epochs = 100 batch_size = 8 alpha = 0.6 w = 0.4 train_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/training' dataset = Dataset(train_path) params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 6} generator = data.DataLoader(dataset, **params) my_vgg = vgg.vgg19_bn(pretrained=True) model = Model(features=my_vgg.features).cuda() opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) conf_loss_func = nn.CrossEntropyLoss().cuda() dim_loss_func = nn.MSELoss().cuda() orient_loss_func = OrientationLoss # load any previous weights model_path = os.path.abspath( os.path.dirname(__file__)) + '/weights/back_up/' latest_model = None first_epoch = 0 if not os.path.isdir(model_path): os.mkdir(model_path) else: try: latest_model = [ x for x in sorted(os.listdir(model_path)) if x.endswith('.pkl') ][-1] except: pass if latest_model is not None: checkpoint = torch.load(model_path + latest_model) model.load_state_dict(checkpoint['model_state_dict']) opt_SGD.load_state_dict(checkpoint['optimizer_state_dict']) first_epoch = checkpoint['epoch'] loss = checkpoint['loss'] print('Found previous checkpoint: %s at epoch %s' % (latest_model, first_epoch)) print('Resuming training....') total_num_batches = int(len(dataset) / batch_size) for epoch in range(first_epoch + 1, epochs + 1): curr_batch = 0 passes = 0 print("Loading all detected objects in dataset...") for local_batch, local_labels in generator: truth_orient = local_labels['Orientation'].float().cuda() truth_conf = local_labels['Confidence'].long().cuda() truth_dim = local_labels['Dimensions'].float().cuda() local_batch = local_batch.float().cuda() [orient, conf, dim] = model(local_batch) orient_loss = orient_loss_func(orient, truth_orient, truth_conf) dim_loss = dim_loss_func(dim, truth_dim) truth_conf = torch.max(truth_conf, dim=1)[1] conf_loss = conf_loss_func(conf, truth_conf) loss_theta = conf_loss + w * orient_loss loss = alpha * dim_loss + loss_theta opt_SGD.zero_grad() loss.backward() opt_SGD.step() if passes % 10 == 0: print("--- epoch %s | batch %s/%s --- [loss: %s]" % (epoch, curr_batch, total_num_batches, loss.item())) passes = 0 passes += 1 curr_batch += 1 # save after every 10 epochs if epoch % 10 == 0: name = model_path + 'epoch_%s.pkl' % epoch print("====================") print("Done with epoch %s!" % epoch) print("Saving weights as %s ..." % name) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': opt_SGD.state_dict(), 'loss': loss }, name) print("====================") print('epoch', epoch) print('model_state_dict', model.state_dict()) print('optimizer_state_dict', opt_SGD.state_dict()) print('loss', loss)
epochs = config["epochs"] batches = config["batches"] bins = config["bins"] alpha = config["alpha"] w = config["w"] print("load train data!") print("load val data!") data = Dataset.ImageDataset(path + "/training") #print("data:") data = Dataset.BatchDataset(data, batches, bins) if len(model_list) == 0: print("No previous model found, start training!") vgg = vgg.vgg19_bn(pretrained=True) model = Model.Model(features=vgg.features, bins=bins).cuda() else: print("Find previous model %s" % model_list[-1]) vgg = vgg.vgg19_bn(pretrained=False) model = Model.Model(features=vgg.features, bins=bins).cuda() param = torch.load(model_path + "/%s" % model_list[-1]) model.load_state_dict(param) opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) dim_LossFunc = nn.MSELoss().cuda() conf_LossFunc = nn.CrossEntropyLoss().cuda() #print("33333",float(data.num_of_patch)/batches) iter_each_time = round(float(data.num_of_patch) / batches) for epoch in range(epochs): for i in range(int(iter_each_time)):
def test_vgg19_bn(self): # VGG 19-layer model (configuration 'E') with batch normalization x = Variable(torch.randn(BATCH_SIZE, 3, 224, 224).fill_(1.0)) self.exportTest(toC(vgg19_bn()), toC(x))
def test_vgg19_bn(self): self.run_model_test(vgg19_bn(), train=False, batch_size=BATCH_SIZE)
def main(): root = os.path.dirname(os.path.abspath(__file__)) weights_path = root + '/weights' cam = cv2.VideoCapture(0) model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] assert len(model_lst) > 0, 'No previous model found, please train first!' print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=False) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = root + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) FLAGS = parser.parse_args() cal_dir = FLAGS.cal_dir # using P_rect from global calibration file calib_path = root + '/' + cal_dir calib_file = calib_path + 'calib_cam_to_cam.txt' # using P from each frame # calib_path = root + '/Kitti/testing/calib/' while cv2.waitKey(5) != 27: # P for each frame # calib_file = calib_path + id + '.txt' ret, truth_img = cam.read() if not ret: continue start_time = time.time() img = truth_img.copy() yolo_img = truth_img.copy() detections = yolo.detect(yolo_img) for detection in detections: if not averages.recognized_class(detection.detected_class): continue # This is throwing when the 2d bbox is invalid # TODO: better check try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) cos, sin = orient[argmax, :2] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] - np.pi if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: print('Estimated location: %s' % location) # x,y,z if not FLAGS.hide_debug: print('Got %s poses in %.3f seconds\n' % (len(detections), time.time() - start_time)) if FLAGS.show_yolo: img = np.concatenate((truth_img, img), axis=0) cv2.imshow('3D-DeepBox', img)
import torch import torch.nn as nn from torch.autograd import Variable from torchvision.models import vgg if __name__ == '__main__': bins = 8 w = 1 alpha = 1 data = Dataset.ImageDataset('../../Kitti/training') data = Dataset.BatchDataset(data, 8, bins) #''' #vgg = torch.load('model/vgg16.pkl').cuda() vgg = vgg.vgg19_bn(pretrained=True) #param = torch.load('model.pkl') model = Model.Model(features=vgg.features, bins=bins).cuda() #model.load_state_dict(param) opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) dim_LossFunc = nn.MSELoss().cuda() conf_LossFunc = nn.CrossEntropyLoss().cuda() for epoch in range(25): for i in range(5000): batch, confidence, confidence_multi, ntheta, angleDiff, dimGT, LocalAngle, Ry, ThetaRay = data.Next() confidence_arg = np.argmax(confidence, axis = 1) batch = Variable(torch.FloatTensor(batch), requires_grad=False).cuda() confidence = Variable(torch.LongTensor(confidence.astype(np.int)), requires_grad=False).cuda() confidence_multi = Variable(torch.LongTensor(confidence_multi.astype(np.int)), requires_grad=False).cuda() ntheta = Variable(torch.FloatTensor(ntheta), requires_grad=False).cuda()
def convert_to_image(array, size=256): img = np.transpose(array, [1, 2, 0]) img = img * 127.5 + 127.5 img = np.clip(img, 0, 255).astype(np.uint8) return cv2.resize(img, (size, size)) label_placeholder = tf.placeholder(tf.float32, [None, 121]) synthesise = Gs.get_output_for(latent, label_placeholder) weights_path = '../rotation_detector/weights' model_lst = [x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl')] print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True).cpu() model = Model.Model(features=my_vgg.features, bins=2).cpu() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1], map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = '../rotation_detector/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() angle_bins = generate_bins(2) for angle in range(0, 360, 30):
def main(): FLAGS = parser.parse_args() # load torch weights_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2).cuda() checkpoint = torch.load(weights_path + '/%s' % model_lst[-1]) model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath(os.path.dirname(__file__)) + '/weights' yolo = cv_Yolo(yolo_path) averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) image_dir = FLAGS.image_dir cal_dir = FLAGS.cal_dir if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath(os.path.dirname(__file__)) + "/" + image_dir # using P_rect from global calibration file calib_path = os.path.abspath(os.path.dirname(__file__)) + "/" + cal_dir # calib_file = calib_path + "calib_cam_to_cam.txt" # using P from each frame # calib_path = os.path.abspath(os.path.dirname(__file__)) + '/Kitti/testing/calib/' try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for id in ids: start_time = time.time() img_file = img_path + id + ".png" # P for each frame calib_file = calib_path + id + ".txt" #comp_img = np.array(Image.open(img_file).convert('RGB')) truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) detections = yolo.detect(img_file) for detection in detections: if not averages.recognized_class(detection.detected_class): continue # this is throwing when the 2d bbox is invalid # TODO: better check #try: object = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) #except: # continue theta_ray = object.theta_ray input_img = object.img proj_matrix = object.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]).cuda() input_tensor[0, :, :, :] = input_img [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: print('Estimated pose: %s' % location) if FLAGS.show_yolo: numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imwrite(os.path.join('output', id + '_yolo.png'), numpy_vertical) #cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imwrite(os.path.join('output', id + '_3d.png'), img) #cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print("\n") print('Got %s poses in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------')
def load_model(model_name, classes=1000, pretrained=True, in_channels=3): """Load the specified VGG architecture for ImageNet Args: model_name: VGG architecture type classes: number of predicted classes pretrained: load pretrained network on ImageNet """ if pretrained: assert classes == 1000, "Pretrained models are provided only for Imagenet." kwargs = {'num_classes': classes} if model_name == 'vgg11': net = VGG.vgg11(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg13': net = VGG.vgg13(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg16': net = VGG.vgg16(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19': net = VGG.vgg19(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg11bn': net = VGG.vgg11_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg13bn': net = VGG.vgg13_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg16bn': net = VGG.vgg16_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19bn': net = VGG.vgg19_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19_orig': net = VGG.vgg19(pretrained=False, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) net.features[0] = input_layer init_weights_vgg_orig(net) elif model_name == 'alexnet': net = AlexNet(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=11, stride=4, padding=2) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'lenet': kwargs['in_channels'] = in_channels net = lenet(**kwargs) else: raise ValueError("Unsupported model architecture.") return net
img[:, :, 2] = batch[0, 0, :, :] return img if __name__ == '__main__': bins = 8 w = 1 alpha = 1 path = '../../Kitti/training' kittiData = kitti.KITTIObjectsReader(path) #print kittiData.getFrameInfo(0)['calibration'] #sys.exit() data = Dataset.ImageDataset(path) data = Dataset.BatchDataset(data, 1, bins, mode='eval') #print 'a' param = torch.load('model.pkl') VGG = vgg.vgg19_bn(pretrained=False) model = Model(features=VGG.features, bins=bins).cuda() model.load_state_dict(param) model.eval() total = 0 error_lst = [] distance_lst = [] for epoch in range(1): for i in range(5000): #data.idx = 10 batch, centerAngle, info = data.EvalBatch() P = kittiData.getFrameInfo(info['Index'])['calibration'] ['projection_left'] box_2D = info['Box_2D'] dimGT = info['Dimension'] angle = info['LocalAngle'] / np.pi * 180
def main(): # 默认值:cal_dir='camera_cal/', hide_debug=False, image_dir='eval/image_2/', show_yolo=False, video=False FLAGS = parser.parse_args() # 注意:总共有两个权重文件,一个是yolo2D检测的yolov3.weights权重文件 # 一个是自己训练的回归维度和alpha的权重文件,命名为epoch_10.pkl weights_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep model_lst = [ x for x in sorted(os.listdir(weights_path)) if x.endswith('.pkl') ] if len(model_lst) == 0: print('No previous model found, please train first!') exit() else: print('Using previous model %s' % model_lst[-1]) # 采用vgg19_bn来提取图片的特征,该特征作为后面3个branch的输入特征 # TODO 是否要换成VGG16_bn? my_vgg = vgg.vgg19_bn(pretrained=True) # TODO: load bins from file or something model = Model.Model(features=my_vgg.features, bins=2) # 在CPU上进行测试 checkpoint = torch.load(weights_path + '/%s' % model_lst[-1], map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) model.eval() # load yolo yolo_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep yolo = cv_Yolo(yolo_path) # 训练集中统计的各个class的维度统计信息 averages = ClassAverages.ClassAverages() # TODO: clean up how this is done. flag? angle_bins = generate_bins(2) # 待检测图片的途径 image_dir = FLAGS.image_dir # 当所有的图片用的是同一个proj_matrix时,应该将该proj_matrix放在该目录下 cal_dir = FLAGS.cal_dir # FLAGS.video默认为false if FLAGS.video: if FLAGS.image_dir == "eval/image_2/" and FLAGS.cal_dir == "camera_cal/": image_dir = "eval/video/2011_09_26/image_2/" cal_dir = "eval/video/2011_09_26/" img_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + image_dir # using P_rect from global calibration file # calib_path = os.path.abspath(os.path.dirname(__file__)) + os.path.sep + cal_dir # calib_file = calib_path + "calib_cam_to_cam.txt" # using P from each frame calib_path = os.path.abspath(os.path.dirname( __file__)) + os.path.sep + 'eval' + os.path.sep + 'calib' + os.path.sep try: ids = [x.split('.')[0] for x in sorted(os.listdir(img_path))] except: print("\nError: no images in %s" % img_path) exit() for img_id in ids: start_time = time.time() img_file = img_path + img_id + ".png" # P for each frame calib_file = calib_path + img_id + ".txt" truth_img = cv2.imread(img_file) img = np.copy(truth_img) yolo_img = np.copy(truth_img) # yolo检测出来的结果为2d像素坐标和类别 detections = yolo.detect(yolo_img) for detection in detections: # 检测的类别必须出现在KITTI数据集的枚举的类别中,如果不在,那么忽视这个被检测出来的类别 # 因为yolo定义的类别数量是比KITTI数据集的类别数量多,所以可能yolo检测出了一个类别,但没有出现 # 在KITTI数据集的枚举类别中 if not averages.recognized_class(detection.detected_class): print('class ' + detection.detected_class + ' is not in KITTI class, so ignore this class') continue # this is throwing when the 2d bbox is invalid # TODO: better check # 将图像 以及检测到的类别,2D框 以及对应这张图像的proj_matrix作为参数传入到DetectedObject类的init()函数中 try: detectedObject = DetectedObject(img, detection.detected_class, detection.box_2d, calib_file) except: print("yolo检测错误,2D框无效!") continue theta_ray = detectedObject.theta_ray input_img = detectedObject.img proj_matrix = detectedObject.proj_matrix box_2d = detection.box_2d detected_class = detection.detected_class input_tensor = torch.zeros([1, 3, 224, 224]) input_tensor[0, :, :, :] = input_img # 得到预测的orient,conf,dim [orient, conf, dim] = model(input_tensor) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] dim += averages.get_item(detected_class) # 取conf大的那个bin,将该bin对应的orient的值赋值给最终的orient argmax = np.argmax(conf) orient = orient[argmax, :] # 得到预测出来的cos值和sin值 # cos值在训练集中是cos(angle_diff),sin值在训练集中是sin(angle_diff) # 而angle_diff是真实的alpha(经过扩展到0-2pi)与对应的bin的夹角 cos = orient[0] sin = orient[1] # np.arctan2传入sin为y轴坐标 # cos为x轴坐标 # 返回弧度制角度 -pi~+pi # 参考https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.arctan2.html alpha = np.arctan2(sin, cos) alpha += angle_bins[argmax] alpha -= np.pi # 得到最终的alpha的值 # 展示2D检测效果,默认不展示 if FLAGS.show_yolo: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray, truth_img) else: location = plot_regressed_3d_bbox(img, proj_matrix, box_2d, dim, alpha, theta_ray) if not FLAGS.hide_debug: # FLAGS.hide_debug默认为False # 对于每一个检测到的类输出其位置信息。为了保证与KITTI数据集中的一致 # 进行 location[1] += dim[0] location[1] += dim[0] / 2 print('Estimated pose: %s' % location) if FLAGS.show_yolo: # FLAGS.show_yolo默认为False numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('SPACE for next image, any other key to exit', numpy_vertical) else: cv2.imshow('3D detections', img) if not FLAGS.hide_debug: print('Got %s detect class in %.3f seconds' % (len(detections), time.time() - start_time)) print('-------------') if FLAGS.video: cv2.waitKey(1) else: if cv2.waitKey(0) != 32: # space bar exit()
def main(): store_path = os.path.abspath(os.path.dirname(__file__)) + '/models' model_lst = [x for x in sorted(os.listdir(store_path)) if x.endswith('.pkl')] if len(model_lst) == 0: print 'No previous model found, please check it' exit() else: print 'Find previous model %s'%model_lst[-1] vgg = V.vgg19_bn(pretrained=False) model = Model.Model(features=vgg.features, bins=2).cuda() params = torch.load(store_path + '/%s'%model_lst[-1]) model.load_state_dict(params) model.eval() dataset = Dataset(os.path.abspath(os.path.dirname(__file__)) + '/eval') bins = model.bins centerAngle = np.zeros(bins) interval = 2 * np.pi / bins for i in range(1, bins): centerAngle[i] = i*interval for data in dataset: truth_img = data['Image'] img = np.copy(truth_img) objects = data['Objects'] cam_to_img = data['Calib'] for object in objects: label = object.label theta_ray = object.theta_ray batch = object.img alpha = label['Alpha'] dimensions = label['Dimensions'] batch = Variable(torch.FloatTensor(batch), requires_grad=False).cuda() [orient, conf, dim] = model(batch) orient = orient.cpu().data.numpy()[0, :, :] conf = conf.cpu().data.numpy()[0, :] dim = dim.cpu().data.numpy()[0, :] argmax = np.argmax(conf) orient = orient[argmax, :] cos = orient[0] sin = orient[1] theta = np.arctan2(sin, cos) theta = theta + centerAngle[argmax] print theta print alpha exit() plot_regressed_3d_bbox(img, truth_img, label['Box_2D'], dim, alpha, theta_ray, cam_to_img, label) numpy_vertical = np.concatenate((truth_img, img), axis=0) cv2.imshow('2D detection on top, 3D prediction on bottom', numpy_vertical) cv2.waitKey(0)
def recordVGG(info): global SKIP import torchvision.models.vgg as vggGen if not (SKIP and 'vgg11' in info['name_list']): INFO("proceeding for VGG11...") net = vggGen.vgg11(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg11') else: INFO("Skip VGG11") if not (SKIP and 'vgg13' in info['name_list']): INFO("proceeding for VGG13...") net = vggGen.vgg13(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg13') else: INFO("Skip VGG13") if not (SKIP and 'vgg16' in info['name_list']): INFO("proceeding for VGG16...") net = vggGen.vgg16(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg16') else: INFO("Skip VGG16") if not (SKIP and 'vgg19' in info['name_list']): INFO("proceeding for VGG19...") net = vggGen.vgg19(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg19') else: INFO("Skip VGG19") if not (SKIP and 'vgg11_bn' in info['name_list']): INFO("proceeding for VGG11_bn...") net = vggGen.vgg11_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg11_bn') else: INFO("Skip VGG11_bn") if not (SKIP and 'vgg13_bn' in info['name_list']): INFO("proceeding for VGG13_bn...") net = vggGen.vgg13_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg13_bn') else: INFO("Skip VGG13_bn") if not (SKIP and 'vgg16_bn' in info['name_list']): INFO("proceeding for VGG16_bn...") net = vggGen.vgg16_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg16_bn') else: INFO("Skip VGG16_bn") if not (SKIP and 'vgg19_bn' in info['name_list']): INFO("proceeding for VGG19_bn...") net = vggGen.vgg19_bn(pretrained=True).cuda() sum = __summary(net, [3, 224, 224], verbose=True) __writeInfoJSON(sum, 'vgg19_bn') else: INFO("Skip VGG19_bn")
def main(): # hyper parameters epochs = 100 batch_size = 8 # 批训练数据的个数 alpha = 0.6 w = 0.4 print("Loading all detected objects in dataset...") # 找到训练集的路径,目录默认为 ./Kitti/training/ train_path = os.path.abspath( os.path.dirname(__file__) ) + os.path.sep + 'Kitti' + os.path.sep + 'training' + os.path.sep # 执行Dataset()的init函数 dataset = Dataset(train_path) # shuffle为true表示打乱数据 ,num_works线程个数 params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 6} generator = data.DataLoader(dataset, **params) my_vgg = vgg.vgg19_bn(pretrained=True) model = Model(features=my_vgg.features) opt_SGD = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) conf_loss_func = nn.CrossEntropyLoss() dim_loss_func = nn.MSELoss() # 对于orient的损失函数,采用自定义的损失函数 orient_loss_func = OrientationLoss # load any previous weights model_path = os.path.abspath( os.path.dirname(__file__)) + os.path.sep + 'weights' + os.path.sep latest_model = None first_epoch = 0 if not os.path.isdir(model_path): os.mkdir(model_path) else: try: latest_model = [ x for x in sorted(os.listdir(model_path)) if x.endswith('.pkl') ][-1] except: pass if latest_model is not None: checkpoint = torch.load( model_path + latest_model, map_location=torch.device('cpu')) # 加载epoch_10.pkl文件 model.load_state_dict(checkpoint['model_state_dict']) opt_SGD.load_state_dict(checkpoint['optimizer_state_dict']) first_epoch = checkpoint['epoch'] loss = checkpoint['loss'] print('Found previous checkpoint: %s at epoch %s' % (latest_model, first_epoch)) print('Resuming training....') total_num_batches = int(len(dataset) / batch_size) for epoch in range(first_epoch + 1, epochs + 1): curr_batch = 0 passes = 0 for local_batch, local_labels in generator: # Orientation是根据angle角与bin的中心角度的差计算的cos和sin的值 # 注意此处的bin是angle落在哪个bin中,没落的bin对应者的orient为0,0 truth_orient = local_labels['Orientation'].float() # 根据label中angle落在哪个bin上,得到的confidence信息,由于本文设置的bin # 的个数为2,所以对于每一个label标签中的每一行,Confidence都是1*2矩阵 truth_conf = local_labels['Confidence'].long() # 标签中的真正的维度信息,经过了减去类别均值的操作 truth_dim = local_labels['Dimensions'].float() local_batch = local_batch.float() # 数据送入到模型中,得到预测的结果 [orient, conf, dim] = model(local_batch) orient_loss = orient_loss_func(orient, truth_orient, truth_conf) dim_loss = dim_loss_func(dim, truth_dim) # 返回的是truth_conf为1的的索引下标 truth_conf = torch.max(truth_conf, dim=1)[1] conf_loss = conf_loss_func(conf, truth_conf) loss_theta = conf_loss + w * orient_loss loss = alpha * dim_loss + loss_theta opt_SGD.zero_grad() loss.backward() opt_SGD.step() if passes % 10 == 0: print("--- epoch %s | batch %s/%s --- [loss: %s]" % (epoch, curr_batch, total_num_batches, loss.item())) passes = 0 passes += 1 curr_batch += 1 # save after every 10 epochs if epoch % 10 == 0: name = model_path + 'epoch_%s.pkl' % epoch print("====================") print("Done with epoch %s!" % epoch) print("Saving weights as %s ..." % name) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': opt_SGD.state_dict(), 'loss': loss }, name) print("====================")
def test_vgg19_bn(self): self.run_model_test(vgg19_bn(), train=False, batch_size=BATCH_SIZE)