images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 4 classes = load_classes("data/scattered_coins/train/classes.txt") # set up the neural network print("Loading network...") cfgfile = os.path.abspath( "cfg/yolov3_mod.cfg" ) # "/home/jovyan/work/YOLO_v3_tutorial_from_scratch/cfg/yolov3_mod.cfg" model = Darknet(cfgfile) print("Network successfully loaded") # swap out the layers before YOLO and the classes in the YOLO layers det_layers = [82, 94, 106] for i in det_layers: in_channels = model.module_list[i - 1][0].in_channels model.module_list[i - 1] = nn.Sequential(nn.Conv2d(in_channels, 27, 1)) model.blocks[i + 1]["classes"] = 4 print("Layers have been swapped out") # load state_dict checkpoint = torch.load("checkpoint.pkl", map_location=torch.device("cpu")) model.load_state_dict(checkpoint["model_state_dict"]) prev_epoch = checkpoint["epoch"] + 1 loss = checkpoint["loss"]
nms_thresh = 0.4 iou_thresh = 0.5 im_width = 640 im_height = 480 if Data_type == SCANNET: im_width = 1296 im_height = 968 # Specify which gpus to use torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) # Specifiy the model and the loss model = Darknet(cfgfile) region_loss = model.loss # Model settings # model.load_weights(weightfile) model.load_weights_until_last(weightfile) model.print_network() model.seen = 0 region_loss.iter = model.iter region_loss.seen = model.seen processed_batches = model.seen // batch_size init_width = model.width init_height = model.height test_width = 672 test_height = 672 init_epoch = model.seen // nsamples
def main(camera_id, shelf_id): rospy.init_node('MultiProcessingNode', anonymous=True) ip = '192.168.0.' + str(camera_id) name = str('admin') pw = str('a1234567') camera = HKCamera(ip, name, pw) threadPubMsg_shelfID_1 = pubmsg.MsgPublishClass(cameraID=camera_id, shelfID=shelf_id[0]) threadPubMsg_shelfID_1.setDaemon(True) threadPubMsg_shelfID_1.start() shelf1 = 'shelfID_' + str(shelf_id[0]) threadPubMsg_dict = {shelf1: threadPubMsg_shelfID_1} model = loadDataset() cfg = Darknet('cfg/yolov3.cfg') cfg.load_weights('yolov3.weights') cfg.cuda() # global frame_number frame_number2 = [0] flag = [0] bridge = CvBridge() dic_change = {} pre_res = {} huojia1_id = shelf_id[0] print("huojia1_id: {}".format(huojia1_id)) tmp = 0 while not rospy.is_shutdown(): frame_origin = camera.getFrame() frame_origin = np.array(frame_origin) frame_origin = cv2.resize(frame_origin, None, fx=0.75, fy=0.75, interpolation=cv2.INTER_AREA) frame_trans = copy.deepcopy(frame_origin) # draw the shangping area # left_x, top_y, right_m, bottom_n = shangpin_area(huojia1_id) # cv2.rectangle(frame_origin, (left_x, top_y), (right_m, bottom_n), (0, 255, 0), 2) res, camera_id, dict_res = callback( (None, cfg, model, frame_number2, bridge, camera_id, flag, frame_origin, huojia1_id, pre_res)) if res == []: if tmp > 30: threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia1_id)] threadPubMsg.set_commodity_recognition_trigger_with_image( camera_id=camera_id, person_id=-1, shelf_id=huojia1_id, flag=0, flag1=0, flag2=0, flag_list=[], frame=None) tmp = 0 else: tmp += 1 continue else: tmp = 0 dic = xuanze_original(res, frame_origin, model, cfg, camera_id, dic_change, huojia1_id) if compare_dic(dic, dic_change) == False: pass else: dic = xuanze(res, frame_origin, model, cfg, threadPubMsg_dict, camera_id, dic, dic_change, huojia1_id, frame_trans) #print("**********************") #print("dic_change_shelf_{}: {}".format(shelf_id[0], dic)) #print("") dic_change = dic pre_res = dict_res HKIPcamera.release()
if __name__ == '__main__': args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() device = torch.device("cuda:0" if CUDA else "cpu") num_classes = 80 bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 model = model.to(device) model.eval() videofile = args.video cap = cv2.VideoCapture(videofile)
D = computeDistortionCoefficients(K) print(D) # ---------------------Extrinsic Calibration-------------------------------------------------------------------------- # extrinsics = args.extrinsics # Model Initialization if extrinsics: confidence = float(args.confidence) nms_thresh = float(args.nms_thresh) CUDA = torch.cuda.is_available() num_classes = 80 colors = pkl.load(open("pallete", "rb")) print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() mapx, mapy = undistortMap(frame, min_factor) sift = cv2.xfeatures2d.SIFT_create() count = 0 kp1 = [] lines = [] cap = cv2.VideoCapture("sample_video/" + video_file_name)
import sys import time import os from PIL import Image, ImageDraw from utils import * from darknet import Darknet if __name__ == '__main__': if len(sys.argv) == 4: cfgfile = sys.argv[1] weightfile = sys.argv[2] imgdir = sys.argv[3] use_cuda = True darknet_model = Darknet(cfgfile) darknet_model.load_weights(weightfile) if use_cuda: darknet_model = darknet_model.cuda() # read in the label names associated with the darknet model if darknet_model.num_classes == 20: namesfile = 'data/voc.names' elif darknet_model.num_classes == 80: namesfile = 'data/coco.names' else: namesfile = 'data/names' for imgfile in os.listdir(imgdir): if imgfile.endswith('.jpg') or imgfile.endswith('.png'): name = os.path.splitext(imgfile)[0] #image name w/o extension txtname = name + '.txt' txtpath = os.path.abspath(
use_cuda = True datacfg = {'hands': 'cfg/hands.data'} cfgfile = {'hands': 'cfg/yolo-hands.cfg', 'cautery': 'cfg/my_config_realsense.yaml'} weightfile = {'hands': 'backup/hands/000500.weights'} namesfile = {'hands': 'data/hands.names'} ####################################################### # Setting up YOLO-hand ####################################################### model_hand = Darknet(cfgfile['hands']) model_hand.load_weights(weightfile['hands']) print('Loading weights from %s... Done!' % (weightfile['hands'])) if use_cuda: model_hand.cuda() class_names = uyolo.load_class_names(namesfile['hands']) ####################################################### # Setting up DOPE ####################################################### yaml_path = cfgfile['cautery'] with open(yaml_path, 'r') as stream: try: print("Loading DOPE parameters from '{}'...".format(yaml_path))
# Test parameters conf_thresh = 0.25 nms_thresh = 0.4 iou_thresh = 0.5 if not os.path.exists(backupdir): os.mkdir(backupdir) ############### torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) model = Darknet(cfgfile) region_loss = model.loss model.load_weights(weightfile) model.print_network() region_loss.seen = model.seen processed_batches = model.seen / batch_size init_width = model.width init_height = model.height init_epoch = model.seen // nsamples kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} test_loader = torch.utils.data.DataLoader(dataset.listDataset( testlist,
def main(): global loss_layers global test_loader global model data_options = read_data_file(FLAGS.data) net_options = parse_cfg(FLAGS.config)[0] train_dir = data_options['train'] test_dir = data_options['valid'] names = data_options['names'] batch_size = int(net_options['batch']) learning_rate = float(net_options['learning_rate']) hue = float(net_options['hue']) hue = float(net_options['hue']) exposure = float(net_options['exposure']) saturation = float(net_options['saturation']) momentum = float(net_options['momentum']) epochs = 100 model = Darknet(FLAGS.config) torch.manual_seed(0) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = data_options['gpus'] torch.cuda.manual_seed(0) model = model.to(device) model.load_weights(weightfile="data/yolov3.weights") loss_layers = model.loss_layers optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) train_data = dataset.YoloDataset(train_dir, (model.width, model.height), transform=transforms.ToTensor(), train=True) test_data = dataset.YoloDataset(test_dir, (model.width, model.height), transform=transforms.ToTensor(), train=False) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False) for epoch in range(epochs): for idx, (images, labels) in enumerate(train_loader): # print(idx, images.shape, labels.shape) images = images.to(device) labels = labels.to(device) optimizer.zero_grad() output = model(images) org_loss = [] org_loss = [] for i, l in enumerate(loss_layers): l.seen += labels.data.size(0) ol = l(output[i]['output'], labels) org_loss.append(ol) sum(org_loss).backward() optimizer.step() # if (idx + 1) % 250 == 0: # model.save_weights('models/batch_{}.weights'.format(idx)) # print('Model saved.') # # test(idx) model.save_weights('models_scratch/epoch_{}.weights'.format(epoch + 1)) print('Epoch_{:d} model saved.'.format(epoch + 1))
class Detector(torch.nn.Module): def __init__(self,save_net): super(Detector, self).__init__() self.net = Darknet(80) # self.net.load_state_dict(torch.load("model/yolov3.pth")) self.net.load_weights(save_net) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.net.to(self.device) self.net.eval() #开始测试 def forward(self, input, thresh, anchors): #将图片、置信度阈值、建议框输入 input_ = input.to(self.device) output_13, output_26, output_52 = self.net(input_) #将图片传入网络中得到三个特征图输出 # output_13 = output_13.cpu() # output_26 = output_26.cpu() # output_52 = output_52.cpu() idxs_13, vecs_13 = self._filter(output_13, thresh) #得到置信度大于阈值的索引和输出 boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13]) idxs_26, vecs_26 = self._filter(output_26, thresh) boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26]) idxs_52, vecs_52 = self._filter(output_52, thresh) boxes_52 = self._parse(idxs_52, vecs_52, 8, anchors[52]) box = torch.cat([boxes_13, boxes_26, boxes_52], dim=0) box = nms(box.cpu()) return box def _filter(self, output, thresh): output = output.permute(0, 2, 3, 1) output = output.reshape(output.size(0), output.size(1), output.size(2), 3, -1) mask = torch.sigmoid(output[..., 4]) > thresh #得到置信度大于阈值的掩码 idxs = mask.nonzero() #根据掩码得到索引 vecs = output[mask] #置信度大于阈值的总输出 return idxs, vecs def _parse(self, idxs, vecs, t, anchors): anchors = torch.Tensor(anchors).to(self.device) #将建议框转化为张量 n = idxs[:, 0] # 所属的图片,批量传入时,这里不会用到 a = idxs[:, 3] # 建议框 [N,13,13,3,15] cy = (idxs[:, 1].float() + torch.sigmoid(vecs[:, 1])) * t # 索引+中心点输出乘以缩放比例得到原图的中心点y cx = (idxs[:, 2].float() + torch.sigmoid(vecs[:, 0])) * t #索引 + 中心点输出乘以缩放比例得到原图的中心点x w = anchors[a, 0] * torch.exp(vecs[:, 2]) #对应的实际框的w h = anchors[a, 1] * torch.exp(vecs[:, 3]) #对应的实际框的h cls = torch.sigmoid(vecs[:,4]) if len(vecs[:,5:85]) > 0: _,pred = torch.max(vecs[:,5:85],dim=1) #得到分类情况 box = torch.stack([n.float(), cx, cy, w, h,pred.float(),cls], dim=1) else: box = torch.stack([n.float(), cx, cy, w, h, h,cls], dim=1) return box
reso = 64 # it should be a multiple of 32 and greater than 32 confidence = 0.1 nms_thesh = 0.4 batch_size = 6 weightsfile = '/Users/reo911gt3/Desktop/mspenny/modules/yolo/yolov3.weights' cfgfile = '/Users/reo911gt3/Desktop/mspenny/modules/yolo/cfg/yolov3.cfg' # yolov3 load num_classes = 80 classes = load_classes( '/Users/reo911gt3/Desktop/mspenny/modules/yolo/data/coco.names') #Set up the neural network print("Loading network.....") model = Darknet(cfgfile) model.load_weights(weightsfile) print("Network successfully loaded") model.net_info["height"] = reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU CUDA = torch.cuda.is_available() if CUDA: model.cuda() #Set the model in evaluation mode model.eval()
def main(): datacfg = FLAGS.data cfgfile = FLAGS.config weightfile = FLAGS.weights no_eval = FLAGS.no_eval data_options = read_data_cfg(datacfg) net_options = parse_cfg(cfgfile)[0] global use_cuda use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda) globals()["trainlist"] = data_options['train'] globals()["testlist"] = data_options['valid'] globals()["backupdir"] = data_options['backup'] globals()["gpus"] = data_options['gpus'] # e.g. 0,1,2,3 globals()["ngpus"] = len(gpus.split(',')) globals()["num_workers"] = int(data_options['num_workers']) globals()["batch_size"] = int(net_options['batch']) globals()["max_batches"] = 10*int(net_options['max_batches']) globals()["learning_rate"] = float(net_options['learning_rate']) globals()["momentum"] = float(net_options['momentum']) globals()["decay"] = float(net_options['decay']) globals()["steps"] = [float(step) for step in net_options['steps'].split(',')] globals()["scales"] = [float(scale) for scale in net_options['scales'].split(',')] #Train parameters global max_epochs try: max_epochs = int(net_options['max_epochs']) except KeyError: nsamples = file_lines(trainlist) max_epochs = (max_batches*batch_size)//nsamples+1 seed = int(time.time()) torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) global device device = torch.device("cuda" if use_cuda else "cpu") global model model = Darknet(cfgfile, use_cuda=use_cuda) model.load_weights(weightfile) #model.print_network() nsamples = file_lines(trainlist) #initialize the model if FLAGS.reset: model.seen = 0 init_epoch = 0 else: init_epoch = model.seen//nsamples global loss_layers loss_layers = model.loss_layers for l in loss_layers: l.seen = model.seen globals()["test_loader"] = load_testlist(testlist) if use_cuda: if ngpus > 1: model = torch.nn.DataParallel(model).to(device) else: model = model.to(device) params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if key.find('.bn') >= 0 or key.find('.bias') >= 0: params += [{'params': [value], 'weight_decay': 0.0}] else: params += [{'params': [value], 'weight_decay': decay*batch_size}] global optimizer optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size) if evaluate: logging('evaluating ...') test(0) else: try: print("Training for ({:d},{:d})".format(init_epoch, max_epochs)) fscore = 0 if not no_eval and init_epoch > test_interval: print('>> initial evaluating ...') # mfscore = test(init_epoch) print('>> done evaluation.') else: mfscore = 0.5 for epoch in range(init_epoch+1, max_epochs): nsamples = train(epoch) if not no_eval and epoch > test_interval and (epoch%test_interval) == 0: print('>> intermittent evaluating ...') # fscore = test(epoch) print('>> done evaluation.') if epoch % save_interval == 0: savemodel(epoch, nsamples) pass if FLAGS.localmax and fscore > mfscore: mfscore = fscore savemodel(epoch, nsamples, True) print('-'*90) except KeyboardInterrupt: print('='*80) print('Exiting from training by interrupt')
args = arg_parse() images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 classes = load_classes( "/home/ripo/project/python/workspace/cv/yolo/my/data/coco.names") #Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) #断言式 assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU if CUDA: model.cuda() #Set the model in evaluation mode """
def valid(datacfg, cfgfile, weightfile, save_path, use_cuda = False, size = 416): options = read_data_cfg(datacfg) valid_images = options['valid'] name_list = options['names'] if os.path.exists(save_path) == False: os.mkdir(save_path) prefix = save_path names = load_class_names(name_list) with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] m = Darknet(cfgfile) m.load_weights(weightfile) num_classes = len(names) if use_cuda: m.cuda() m.eval() valid_dataset = MyDataset(valid_images, shape=(size, size), is_train = False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 10 assert(valid_batchsize > 1) if use_cuda: kwargs = {'num_workers': 4, 'pin_memory': True} else: kwargs = {} valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) fps = [0]*num_classes if not os.path.exists('results'): os.mkdir('results') for i in range(num_classes): buf = '%s/%s.txt' % (prefix, names[i]) fps[i] = open(buf, 'w') lineId = -1 conf_thresh = 0.01 nms_thresh = 0.5 for batch_id, (data, target) in enumerate(valid_loader): if use_cuda: data = data.cuda() print('start processing batch{}'.format(batch_id)) start1 = time.time() output = m(data) batch_boxes = get_all_boxes(output, conf_thresh, num_classes, only_objectness=0, validation=True, use_cuda = use_cuda) for i in range(data.size(0)): lineId = lineId + 1 fileId = os.path.basename(valid_files[lineId]).split('.')[0] width, height = get_image_size(valid_files[lineId]) boxes = batch_boxes[i] if boxes.numel() == 0: continue for cls_id in range(num_classes): cls_ind = (boxes[:, 6] == cls_id) cls_boxes = nms(boxes[cls_ind],nms_thresh) if cls_boxes.numel == 0: continue for box in cls_boxes: x1 = (box[0] - box[2]/2.0) * width y1 = (box[1] - box[3]/2.0) * height x2 = (box[0] + box[2]/2.0) * width y2 = (box[1] + box[3]/2.0) * height fps[cls_id].write('%s %f %f %f %f %f\n' %(fileId, box[4] * box[5], x1, y1, x2, y2)) end1 = time.time() print('average time {}s'.format((end1 - start1) / len(data))) del data,target for i in range(num_classes): fps[i].close()
class Car_DC(): def __init__(self, src_dir, dst_dir, car_cfg_path=local_car_cfg_path, car_det_weights_path=local_car_det_weights_path, inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ model initialization """ # super parameters self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_dir = dst_dir # clear dst_dir if os.path.exists(self.dst_dir): for x in os.listdir(self.dst_dir): if x.endswith('.jpg'): os.remove(self.dst_dir + '/' + x) else: os.makedirs(self.dst_dir) # initialize vehicle detection model self.detector = Darknet(car_cfg_path) self.detector.load_weights(car_det_weights_path) # set input dimension of image self.detector.net_info['height'] = self.inp_dim self.detector.to(device) self.detector.eval() # evaluation mode print('=> car detection model initiated.') # initiate multilabel classifier self.classifier = Car_Classifier(num_cls=19, model_path=local_model_path) # initiate imgs_path self.imgs_path = [ os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.endswith('.jpg') or x.endswith('.png') ] # MODIFIED! self.imgs_path = [ os.path.join(src_dir, x) for x in os.listdir(src_dir) ] def cls_draw_bbox(self, output, orig_img): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ labels = [] pt_1s = [] pt_2s = [] # 1 for det in output: if len(det) == 7: continue # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # turn BGR back to RGB ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]][:, :, ::-1]) # # ROI.show() # # call classifier to predict car_color, car_direction, car_type = self.classifier.predict(ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) print('=> predicted label: ', label) # 2 color = (0, 215, 255) for i, det in enumerate(output): if len(det) == 7: continue pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # get str text size txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # # draw text background rect cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # draw text cv2.putText( orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size): """ processing detections """ scaling_factor = min([inp_dim / float(x) for x in orig_img_size]) # W, H scaling factor output = post_process(prediction, prob_th, num_cls, nms=True, nms_conf=nms_th, CUDA=True) # post-process such as nms print('\n', output, '\na') if type(output) != int: output[:, [1, 3]] -= (inp_dim - scaling_factor * orig_img_size[0]) / 2.0 # x, w output[:, [2, 4]] -= (inp_dim - scaling_factor * orig_img_size[1]) / 2.0 # y, h output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, orig_img_size[0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, orig_img_size[1]) print('\n', output, '\n') return output def detect_classify(self): """ detect and classify """ for x in self.imgs_path: # read image data img = cv2.imread(x) img = cv2.copyMakeBorder(img, BORDER, BORDER, BORDER, BORDER, cv2.BORDER_CONSTANT, value=(100, 100, 100)) img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=True) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: print('\n', x) self.cls_draw_bbox(output, orig_img) dst_path = self.dst_dir + '/' + os.path.split(x)[1] if not os.path.exists(dst_path): cv2.imwrite(dst_path, orig_img) # MODIFIED! def detect_classify_modified(self): """ detect and classify """ # print(self.imgs_path) for tracklet in self.imgs_path: tracklet_camera_path = [ os.path.join(tracklet, x) for x in os.listdir(tracklet) ] for tracklet_camera in tracklet_camera_path: the_imgs_path = [ os.path.join(tracklet_camera, x) for x in os.listdir(tracklet_camera) if x.endswith('.jpg') ] # print(the_imgs_path) for the_img in the_imgs_path: # print(the_img) # read image data img = cv2.imread(the_img) img = cv2.copyMakeBorder(img, BORDER, BORDER, BORDER, BORDER, cv2.BORDER_CONSTANT, value=(100, 100, 100)) img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=True) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # RGB => BGR print(the_img) try: if type(output) != int: self.cls_draw_bbox(output, orig_img) print('\n', os.path.split(the_img)[0]) dst_path = self.dst_dir + '/' + os.path.split( the_img)[0] + '/' + os.path.split(the_img)[1] print(dst_path) if not os.path.exists(dst_path): cv2.imwrite(dst_path, orig_img) except Exception as inst: img.show() print(inst) exit(2)
def demo(): params = { "video": "video.avi", # Video to run detection upon "dataset": "pasacal", # Dataset on which the network has been trained "confidence": 0.5, # Object Confidence to filter predictions "nms_thresh": 0.4, # NMS Threshold "cfgfile": "cfg/yolov3.cfg", # Config file "weightsfile": "yolov3.weights", # Weightsfile "repo": 416 # Input resolution of the network. Increase to increase accuracy. Decrease to increase speed } confidence = float(params["confidence"]) nms_thesh = float(params["nms_thresh"]) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes bboxes = [] xywh = [] print("Loading network.....") model = Darknet(params["cfgfile"]) model.load_weights(params["weightsfile"]) print("Network successfully loaded") model.net_info["height"] = params["repo"] inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = params["video"] # set 0 for debug cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() print("ret: ", ret) print("frame: ", frame.shape) if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ) print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) print("output: ", output) print("output: ", output.shape) for i in output: x0 = i[1].int() y0 = i[2].int() x1 = i[3].int() y1 = i[4].int() bbox = (x0, y0, x1, y1) bboxes.append(bbox) print(bbox) w = x1 - x0 h = y1 - y0 xywh.append((x0, y0, w, h)) print(x0, y0, w, h) #return bboxes classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) # write bbox list(map(lambda x: write(x, orig_im, classes, colors), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}g7".format( frames / (time.time() - start))) #return xywh else: break
def main(): args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda().half() model.eval() # Use external camera for detection # rsh.initialize_camera(args.width, args.height) # Use the webcam for detection cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 # width = 640; height = 480; start = time.time() # pipe = rs.pipeline() # config = rs.config() # config.enable_stream(rs.stream.depth, width, height, rs.format.z16, 30) # config.enable_stream(rs.stream.color, width, height, rs.format.rgb8, 30) # profile = pipe.start(config) # align_to = rs.stream.color # align = rs.align(align_to) print('################| INITILIZATION SEQUENCE COMPLETE |#############') while (1): # rgb[1,:,:,:,], depth = rsh.get_rgbd() # temp = pipe.wait_for_frames() # aligned_frames = align.process(temp) # aligned_depth_frame = aligned_frames.get_depth_frame() # aligned_depth_frame is a 640x480 depth image # color_frame = aligned_frames.get_color_frame() # # if not aligned_depth_frame or not color_frame: # pass # # rgb = np.asanyarray(color_frame.get_data(),dtype=np.uint8) # depth = np.asanyarray(aligned_depth_frame.get_data(),dtype=np.uint8) # # rgb = rgb#.transpose(2,0,1)#, depth.tranpose(1,0) ret, rgb = cap.read() img, orig_im, dim = util.prep_image(rgb, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: img = img.cuda().half() im_dim = im_dim.half().cuda() # write_results = write_results_half predict_transform = predict_transform_half output = model(Variable(img, volatile=True), CUDA) output = util.write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) colors = pkl.load(open("pallete", "rb")) list(map(lambda x: util.write(x, orig_im), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): print('################| QUIT |#############') break frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
def center_to_corner_2d(boxes): boxes[:, 0] = (boxes[:, 0] - boxes[:, 2] / 2) boxes[:, 1] = (boxes[:, 1] - boxes[:, 3] / 2) boxes[:, 2] = (boxes[:, 2] + boxes[:, 0]) boxes[:, 3] = (boxes[:, 3] + boxes[:, 1]) return boxes if __name__ == "__main__": args = arg_parse() # Instantiate a model model = Darknet(args.cfgfile, train=False) # Get model specs inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 num_classes = int(model.net_info["classes"]) bbox_attrs = 5 + num_classes # Load weights PyTorch style model.load_state_dict(torch.load(args.weightsfile)) # Set to evaluation (don't accumulate gradients) model.eval() model = model.to(device) ## Really? You're gonna eval on the CPU? :)
def run_video_demo(input_data, UI): args = { 'confidence': CONFIDENCE_THRESH, 'cfgfile': CFG_FILE, 'nms_thres': NMS_THRESH, 'reso': RESO, 'weights': WEIGHTS_FILE, 'video': input_data['video'], 'object': input_data['object'], 'feature': input_data['feature'], 'color': input_data['color'], 'feature_flag': None, 'color_flag': None } # Setting up parameter flags if args['feature'] == '': # No feature is provided by the user args['feature_flag'] = False else: args['feature_flag'] = True if args['color'] == '': # Color detection is not to be performed args['color_flag'] = False else: args['color_flag'] = True confidence = float(args['confidence']) nms_thesh = float(args['nms_thres']) start = 0 CUDA = torch.cuda.is_available() num_classes = NUM_CLASSES bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args['cfgfile']) model.load_weights(args['weights']) print("Network successfully loaded") model.net_info["height"] = args['reso'] inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model(get_test_input(inp_dim, CUDA), CUDA) model.eval() output_for_ui = {} object_no = {} timestamp = {} videofile = args['video'] cap = cv2.VideoCapture(videofile) fps = cap.get(cv2.CAP_PROP_FPS) print('FPS of original video:', fps) assert cap.isOpened(), 'Cannot capture source' FPS_STORE = [] frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f} Frame no: {}".format( frames / (time.time() - start), frames)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) classes = load_classes('data/obj.names') print(classes) colors = pkl.load(open("pallete", "rb")) # Routine to find if current frame has object of interest. list( map( lambda x: write(x, orig_im, classes, colors, frames, fps, timestamp, output_for_ui, args, cap), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {} Frame no: {}".format( round(frames / (time.time() - start), 2), frames)) FPS_STORE.append(frames / (time.time() - start)) else: break cap.release() output_for_ui['AVG_FPS'] = 'Average FPS of the program: {}d'.format( sum(FPS_STORE) / len(FPS_STORE)) print(output_for_ui) # Color extraction routine if args['color_flag']: desired_color = args['color'] for file in listdir('{}'.format(OUTPUT_IMAGES_PATH)): kmc = km.KMeansColours(img=file, clusters=5, desired_color=desired_color, file_dir='{}\\'.format(OUTPUT_IMAGES_PATH), file_dest='.\\crop_thumbnails\\') output_for_ui[file] = output_for_ui[ file] + 'Desired color {} is present: {}\n\n'.format( desired_color, kmc.driver()) elif not args['color_flag']: for file in listdir('{}'.format(OUTPUT_IMAGES_PATH)): output_for_ui[file] = output_for_ui[file] + '\n\n' list( map(os.unlink, (os.path.join(OUTPUT_IMAGES_PATH, f) for f in os.listdir(OUTPUT_IMAGES_PATH)))) list( map(os.unlink, (os.path.join('.\\crop_thumbnails\\', f) for f in os.listdir('.\\crop_thumbnails')))) UI.write_output_data(output_for_ui)
def run(): logger = logging.getLogger() # Parse command window input parser = argparse.ArgumentParser(description='SingleShotPose') parser.add_argument('--datacfg', type=str, default='cfg/ape.data') # data config parser.add_argument('--modelcfg', type=str, default='cfg/yolo-pose.cfg') # network config parser.add_argument( '--initweightfile', type=str, default='backup/init.weights') # initialization weights parser.add_argument('--pretrain_num_epochs', type=int, default=0) # how many epoch to pretrain args = parser.parse_args() datacfg = args.datacfg modelcfg = args.modelcfg initweightfile = args.initweightfile pretrain_num_epochs = args.pretrain_num_epochs print("ARGS: ", args) # Parse data configuration file data_options = read_data_cfg(datacfg) trainlist = data_options['valid'] gpus = data_options['gpus'] num_workers = int(data_options['num_workers']) backupdir = data_options['backup'] im_width = int(data_options['width']) im_height = int(data_options['height']) fx = float(data_options['fx']) fy = float(data_options['fy']) u0 = float(data_options['u0']) v0 = float(data_options['v0']) print("DATA OPTIONS: ", data_options) # Parse network and training configuration parameters net_options = parse_cfg(modelcfg)[0] loss_options = parse_cfg(modelcfg)[-1] batch_size = int(net_options['batch']) max_batches = int(net_options['max_batches']) max_epochs = int(net_options['max_epochs']) learning_rate = float(net_options['learning_rate']) momentum = float(net_options['momentum']) decay = float(net_options['decay']) conf_thresh = float(net_options['conf_thresh']) num_keypoints = int(net_options['num_keypoints']) num_classes = int(loss_options['classes']) num_anchors = int(loss_options['num']) steps = [float(step) for step in net_options['steps'].split(',')] scales = [float(scale) for scale in net_options['scales'].split(',')] # anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')] print("NET OPTIONS: ", net_options) print("LOSS OPTIONS: ", loss_options) # Specifiy the model and the loss model = Darknet(modelcfg) # # Model settings model.load_weights(initweightfile) model.print_network() # model.seen = 0 # processed_batches = model.seen/batch_size init_width = 416 # model.width init_height = 416 # model.height batch_size = 1 num_workers = 0 # print("Size: ", init_width, init_height) bg_file_names = get_all_files('../VOCdevkit/VOC2012/JPEGImages') # Specify the number of workers use_cuda = True kwargs = { 'num_workers': num_workers, 'pin_memory': True } if use_cuda else {} logger.info("Loading data") # valid_dataset = dataset_multi.listDataset("../LINEMOD/duck/test_occlusion.txt", shape=(init_width, init_height), # shuffle=False, # objclass="duck", # transform=transforms.Compose([ # transforms.ToTensor(), # ])) # Get the dataloader for training dataset dataloader = torch.utils.data.DataLoader(dataset.listDataset( trainlist, shape=(init_width, init_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ]), train=False, seen=0, batch_size=batch_size, num_workers=num_workers, bg_file_names=bg_file_names), batch_size=batch_size, shuffle=False, **kwargs) model.cuda() model.eval() delay = {True: 0, False: 1} paused = True # print("Classes in dataset ", num_classes) print("Batches in dataloader: ", len(dataloader)) tbar = tqdm(dataloader, ascii=True, dynamic_ncols=True) for ii, s in enumerate(tbar): images, targets = s # print(ii, "IMAGES:" , images.shape) # print(ii, "TARGET\n", targets.shape) bs = images.shape[0] t = targets.cpu().numpy().reshape(bs, 50, -1) # print("TARGET [0, 0:1] \n", t[0, :1]) # print("CLASSES ", t[0, :, 0]) images_gpu = images.cuda() model_out = model(images_gpu).detach() all_boxes = np.array( get_region_boxes(model_out, num_classes, num_keypoints, anchor_dim=num_anchors)).reshape( batch_size, 1, -1) # print("Model OUT", all_boxes.shape) pred = np.zeros_like(all_boxes) pred[:, 0, 0] = all_boxes[:, 0, -1] pred[:, 0, 1:-2] = all_boxes[:, 0, :-3] viz = visualize_results(images, t, pred, img_size=416, show_3d=True) cv2.imshow("Res ", viz) k = cv2.waitKey(delay[paused]) if k & 0xFF == ord('q'): break if k & 0xFF == ord('p'): paused = not paused
# Test parameters conf_thresh = 0.25 nms_thresh = 0.4 iou_thresh = 0.5 if not os.path.exists(backupdir): os.mkdir(backupdir) ############### torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) #定义模型 model = Darknet(cfgfile) region_loss = model.loss model.load_weights(weightfile) model.print_network() model.seen=0 region_loss.seen = model.seen processed_batches = model.seen/batch_size init_width = model.width init_height = model.height init_epoch = model.seen/nsamples print('--------------------init_width,h------------',init_width,init_height) kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} ''' test_loader = torch.utils.data.DataLoader(
if __name__ == '__main__': args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 2 CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) if args.weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(args.weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(args.weights_path)) model.eval() # Set in evaluation mode print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32
batch_size = int(net_options['batch']) max_batches = int(net_options['max_batches']) learning_rate = float(net_options['learning_rate']) momentum = float(net_options['momentum']) max_epochs = max_batches*batch_size/nsamples+1 use_cuda = True seed = 22222 eps = 1e-5 ############### torch.manual_seed(seed) if use_cuda: torch.cuda.manual_seed(seed) model = Darknet(cfgfile) region_loss = model.loss model.load_weights(weightfile) model.print_network() init_epoch = model.seen // nsamples kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {} test_loader = torch.utils.data.DataLoader( lmdb_utils.lmdbDataset(testdb, shape=(160, 160), shuffle=False, transform=None, train=False), batch_size=batch_size, shuffle=False, **kwargs) if use_cuda:
def main(): # Parsing arguments arguments_parser = ArgumentsParser() args = arguments_parser.parse_arguments() images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thresh = float(args.nms_thresh) # Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 # If there's a GPU availible, put the model on GPU if CUDA: model.cuda() # Set the model in evaluation mode model.eval() read_dir = time.time() # Detection phase load_batch = time.time() image_manager = Cv2ImageManager() loaded_images, list_of_images = image_manager.read_images(images) im_batches = list( map(prep_image, loaded_images, [inp_dim for x in range(len(list_of_images))])) im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_images] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) leftover = 0 if (len(im_dim_list) % batch_size): leftover = 1 if batch_size != 1: num_batches = len(list_of_images) // batch_size + leftover im_batches = [ torch.cat( (im_batches[i * batch_size:min((i + 1) * batch_size, len(im_batches))])) for i in range(num_batches) ] if CUDA: im_dim_list = im_dim_list.cuda() start_det_loop = time.time() detector = Detector(model, im_batches, batch_size, inp_dim, confidence, nms_thresh, CLASSES, NUMBER_OF_CLASSES, CUDA) output = detector.detect(list_of_images, im_dim_list) output_recast = time.time() class_load = time.time() draw = time.time() det_images = list( map( lambda x: image_manager.draw_bounding_boxes( x, loaded_images, CLASSES), output)) det_names = list( map(lambda x: "{det}/{x}".format(det=args.det, x=x), [osp.basename(image_name) for image_name in list_of_images])) image_manager.write_images(det_names, det_images) end = time.time() print("SUMMARY") print("----------------------------------------------------------") print("{:25s}: {}".format("Task", "Time Taken (in seconds)")) print() print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir)) print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch)) print("{:25s}: {:2.3f}".format( "Detection (" + str(len(list_of_images)) + " images)", output_recast - start_det_loop)) print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast)) print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw)) print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch) / len(list_of_images))) print("----------------------------------------------------------") torch.cuda.empty_cache()
u0 = float(data_options['u0']) v0 = float(data_options['v0']) test_width = int(net_options['test_width']) test_height = int(net_options['test_height']) # 指定GPU use_cuda = False seed = int(time.time()) torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) # 指定模型和损失函数 model = Darknet(modelcfg) region_loss = RegionLoss(num_keypoints=9, num_classes=1, anchors=[], num_anchors=1, pretrain_num_epochs=15, use_cuda=use_cuda) # 加载权重 model.load_weights_until_last(initweightfile) #exportToOnnx(model) model.print_network() model.seen = 0 region_loss.iter = model.iter region_loss.seen = model.seen processed_batches = model.seen//batch_size init_width = model.width init_height = model.height
def demo(cfgfile, weightfile): # This vector decides in which Device the layer will be computed 0 for CPU 1 for GPU if args.gpu: het_part = np.array([ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]) else: het_part = np.zeros(32, dtype=int) if args.demo: het_part = np.ones(32, dtype=int) m = Darknet(cfgfile, het_part) m.print_network() if len(m.models) != len(het_part): print('Number of model layers and partition vector mismatch') exit(-1) m.load_weights(weightfile, het_part) print('Loading weights from %s... Done!' % (weightfile)) if m.num_classes == 20: namesfile = 'data/voc.names' elif m.num_classes == 80: namesfile = 'data/coco.names' else: namesfile = 'data/names' class_names = load_class_names(namesfile) use_cuda = args.gpu #if use_cuda: #m.cuda() #cap = cv2.VideoCapture("nvcamerasrc ! video/x-raw(memory:NVMM), width=(int)640, height=(int)480, format=(string)I420, framerate=(fraction)60/1 ! nvvidconv ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink") cap = cv2.VideoCapture( "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)1920, height=(int)1080,format=(string)NV12, framerate=(fraction)30/1 ! nvvidconv ! video/x-raw, format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink drop=1" ) if cap.isOpened(): # Window creation and specifications windowName = cfgfile cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) cv2.moveWindow(windowName, 1920 - 1280, 0) cv2.resizeWindow(windowName, 1280, 1080) cv2.setWindowTitle(windowName, "YOLOv2 Object Detection") font = cv2.FONT_HERSHEY_PLAIN helpText = "'Esc' to Quit" showFullScreen = False showHelp = True start = 0.0 end = 0.0 else: print("Unable to open camera") exit(-1) while True: res, img = cap.read() if res: sized = cv2.resize(img, (m.width, m.height)) bboxes = do_detect(m, sized, 0.5, 0.4, use_cuda, het_part) print('------') draw_img = plot_boxes_cv2(img, bboxes, None, class_names) if showHelp == True: cv2.putText(img, helpText, (11, 20), font, 1.0, (32, 32, 32), 4, cv2.LINE_AA) cv2.putText(img, helpText, (10, 20), font, 1.0, (240, 240, 240), 1, cv2.LINE_AA) end = time.time() cv2.putText(img, "{0:.0f}fps".format(1 / (end - start)), (531, 50), font, 3.0, (32, 32, 32), 8, cv2.LINE_AA) cv2.putText(img, "{0:.0f}fps".format(1 / (end - start)), (530, 50), font, 3.0, (240, 240, 240), 2, cv2.LINE_AA) cv2.imshow(windowName, draw_img) start = time.time() key = cv2.waitKey(1) if key == 27: # Check for ESC key cv2.destroyAllWindows() break elif key == 74: # Toggle fullscreen; This is the F3 key on this particular keyboard # Toggle full screen mode if showFullScreen == False: cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) else: cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL) showFullScreen = not showFullScreen else: print("Unable to read image") exit(-1)
def test(datacfg, cfgfile, weightfile, imgfile): # ******************************************# # PARAMETERS PREPARATION # # ******************************************# #parse configuration files options = read_data_cfg(datacfg) meshname = options['mesh'] name = options['name'] #Parameters for the network seed = int(time.time()) gpus = '0' # define gpus to use test_width = 544 # define test image size test_height = 544 torch.manual_seed(seed) # seed torch random use_cuda = True if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) # seed cuda random conf_thresh = 0.1 num_classes = 1 # Read object 3D model, get 3D Bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(options['diam']) # now configure camera intrinsics internal_calibration = get_camera_intrinsic() # ******************************************# # NETWORK CREATION # # ******************************************# # Create the network based on cfg file model = Darknet(cfgfile) model.print_network() model.load_weights(weightfile) model.cuda() model.eval() # ******************************************# # INPUT IMAGE PREPARATION FOR NN # # ******************************************# # Now prepare image: convert to RGB, resize, transform to Tensor # use cuda, img = Image.open(imgfile).convert('RGB') ori_size = img.size # store original size img = img.resize((test_width, test_height)) t1 = time.time() img = transforms.Compose([ transforms.ToTensor(), ])(img) #.float() img = Variable(img, requires_grad=True) img = img.unsqueeze(0) # add a fake batch dimension img = img.cuda() # ******************************************# # PASS IT TO NETWORK AND GET PREDICTION # # ******************************************# # Forward pass output = model(img).data #print("Output Size: {}".format(output.size(0))) t2 = time.time() # ******************************************# # EXTRACT PREDICTIONS # # ******************************************# # Using confidence threshold, eliminate low-confidence predictions # and get only boxes over the confidence threshold all_boxes = get_region_boxes(output, conf_thresh, num_classes) boxes = all_boxes[0] # iterate through boxes to find the one with highest confidence best_conf_est = -1 best_box_index = -1 for j in range(len(boxes)): # the confidence is in index = 18 if (boxes[j][18] > best_conf_est): box_pr = boxes[j] # get bounding box best_conf_est = boxes[j][18] best_box_index = j #print("Best box is: {} and 2D prediction is {}".format(best_box_index,box_pr)) # Denormalize the corner predictions # This are the predicted 2D points with which a bounding cube can be drawn corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_pr[:, 0] = corners2D_pr[:, 0] * ori_size[0] # Width corners2D_pr[:, 1] = corners2D_pr[:, 1] * ori_size[1] # Height t3 = time.time() # **********************************************# # GET OBJECT POSE ESTIMATION # # Remember the problem in 6D Pose estimation # # is exactly to estimate the pose - position # # and orientation of the object of interest # # with reference to a camera frame. That is # # why although the 2D projection of the 3D # # bounding cube are ready, we still need to # # compute the rotation matrix -orientation- # # and a translation vector -position- for the # # object # # # # **********************************************# # get rotation matrix and transform R_pr, t_pr = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(internal_calibration, dtype='float32')) t4 = time.time() # ******************************************# # DISPLAY IMAGE WITH BOUNDING CUBE # # ******************************************# # Reload Original img img = cv2.imread(imgfile) # create a window to display image wname = "Prediction" cv2.namedWindow(wname) # draw each predicted 2D point for i, (x, y) in enumerate(corners2D_pr): # get colors to draw the lines col1 = 28 * i col2 = 255 - (28 * i) col3 = np.random.randint(0, 256) cv2.circle(img, (x, y), 3, (col1, col2, col3), -1) cv2.putText(img, str(i), (int(x) + 5, int(y) + 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (col1, col2, col3), 1) # Get each predicted point and the centroid p1 = corners2D_pr[1] p2 = corners2D_pr[2] p3 = corners2D_pr[3] p4 = corners2D_pr[4] p5 = corners2D_pr[5] p6 = corners2D_pr[6] p7 = corners2D_pr[7] p8 = corners2D_pr[8] center = corners2D_pr[0] # Draw cube lines around detected object # draw front face line_point = 3 cv2.line(img, (p1[0], p1[1]), (p2[0], p2[1]), (0, 255, 0), line_point) cv2.line(img, (p2[0], p2[1]), (p4[0], p4[1]), (0, 255, 0), line_point) cv2.line(img, (p4[0], p4[1]), (p3[0], p3[1]), (0, 255, 0), line_point) cv2.line(img, (p3[0], p3[1]), (p1[0], p1[1]), (0, 255, 0), line_point) # draw back face cv2.line(img, (p5[0], p5[1]), (p6[0], p6[1]), (0, 255, 0), line_point) cv2.line(img, (p7[0], p7[1]), (p8[0], p8[1]), (0, 255, 0), line_point) cv2.line(img, (p6[0], p6[1]), (p8[0], p8[1]), (0, 255, 0), line_point) cv2.line(img, (p5[0], p5[1]), (p7[0], p7[1]), (0, 255, 0), line_point) # draw right face cv2.line(img, (p2[0], p2[1]), (p6[0], p6[1]), (0, 255, 0), line_point) cv2.line(img, (p1[0], p1[1]), (p5[0], p5[1]), (0, 255, 0), line_point) # draw left face cv2.line(img, (p3[0], p3[1]), (p7[0], p7[1]), (0, 255, 0), line_point) cv2.line(img, (p4[0], p4[1]), (p8[0], p8[1]), (0, 255, 0), line_point) # Show the image and wait key press cv2.imshow(wname, img) cv2.waitKey() print("Rotation: {}".format(R_pr)) print("Translation: {}".format(t_pr)) print(" Predict time: {}".format(t2 - t1)) print(" 2D Points extraction time: {}".format(t3 - t2)) print(" Pose calculation time: {}:".format(t4 - t3)) print(" Total time: {}".format(t4 - t1)) print("Press any key to close.")
def valid(datacfg, cfgfile, weightfile, outfile): options = read_data_cfg(datacfg) valid_images = options['valid'] name_list = options['names'] prefix = 'results' names = load_class_names(name_list) with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] m = Darknet(cfgfile) m.print_network() m.load_weights(weightfile) m.cuda() m.eval() valid_dataset = dataset.listDataset(valid_images, shape=(m.width, m.height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 2 assert (valid_batchsize > 1) kwargs = {'num_workers': 4, 'pin_memory': True} valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) fps = [0] * m.num_classes if not os.path.exists('results'): os.mkdir('results') for i in range(m.num_classes): buf = '%s/%s%s.txt' % (prefix, outfile, names[i]) fps[i] = open(buf, 'w') lineId = -1 conf_thresh = 0.005 nms_thresh = 0.45 for batch_idx, (data, target) in enumerate(valid_loader): data = data.cuda() data = Variable(data, volatile=True) output = m(data).data batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes, m.anchors, m.num_anchors, 0, 1) for i in range(output.size(0)): lineId = lineId + 1 fileId = os.path.basename(valid_files[lineId]).split('.')[0] width, height = get_image_size(valid_files[lineId]) print(valid_files[lineId]) boxes = batch_boxes[i] boxes = nms(boxes, nms_thresh) for box in boxes: x1 = (box[0] - box[2] / 2.0) * width y1 = (box[1] - box[3] / 2.0) * height x2 = (box[0] + box[2] / 2.0) * width y2 = (box[1] + box[3] / 2.0) * height det_conf = box[4] for j in range((len(box) - 5) / 2): cls_conf = box[5 + 2 * j] cls_id = box[6 + 2 * j] prob = det_conf * cls_conf fps[cls_id].write('%s %f %f %f %f %f\n' % (fileId, prob, x1, y1, x2, y2)) for i in range(m.num_classes): fps[i].close()
def valid(datacfg, cfgfile, weightfile, outfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] backupdir = options['backup'] name = options['name'] if not os.path.exists(backupdir): makedirs(backupdir) # Parameters prefix = 'results' seed = int(time.time()) gpus = '0' # Specify which gpus to use test_width = 416 #originally 544 test_height = 416 #originally 544 torch.manual_seed(seed) use_cuda = True if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) save = True testtime = True use_cuda = True num_classes = 1 testing_samples = 0.0 eps = 1e-5 notpredicted = 0 conf_thresh = 0.1 nms_thresh = 0.4 match_thresh = 0.5 if save: makedirs(backupdir + '/test') makedirs(backupdir + '/test/gt') makedirs(backupdir + '/test/pr') # To save testing_error_trans = 0.0 testing_error_angle = 0.0 testing_error_pixel = 0.0 errs_2d = [] errs_3d = [] errs_trans = [] errs_angle = [] errs_corner2D = [] preds_trans = [] preds_rot = [] preds_corners2D = [] gts_trans = [] gts_rot = [] gts_corners2D = [] # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) # diam = calc_pts_diameter(np.array(mesh.vertices)) diam = float(options['diam']) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.print_network() model.load_weights(weightfile) model.cuda() model.eval() # Get the parser for the test dataset valid_dataset = dataset.listDataset(valid_images, shape=(test_width, test_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) logging(" Testing {}...".format(name)) logging(" Number of test samples: %d" % len(test_loader.dataset)) # Iterate through test batches (Batch size for test data is 1) count = 0 z = np.zeros((3, 1)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions all_boxes = get_region_boxes(output, conf_thresh, num_classes) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) # Get how many object are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation for j in range(len(boxes)): if (boxes[j][18] > best_conf_est): match = corner_confidence9( box_gt[:18], torch.FloatTensor(boxes[j][:18])) box_pr = boxes[j] best_conf_est = boxes[j][18] # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * 416 corners2D_gt[:, 1] = corners2D_gt[:, 1] * 416 corners2D_pr[:, 0] = corners2D_pr[:, 0] * 416 corners2D_pr[:, 1] = corners2D_pr[:, 1] * 416 preds_corners2D.append(corners2D_pr) gts_corners2D.append(corners2D_gt) # Compute corner prediction error corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1) corner_dist = np.mean(corner_norm) errs_corner2D.append(corner_dist) # Compute [R|t] by pnp R_gt, t_gt = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_gt, np.array(internal_calibration, dtype='float32')) R_pr, t_pr = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(internal_calibration, dtype='float32')) demo_path = 'test/{}/demo/demo_'.format( weightfile) + valid_files[count][-8:-3] + 'png' result_path = 'test/{}/result/result_'.format( weightfile) + valid_files[count][-8:-3] + 'png' img_path = valid_files[count] print(img_path, os.path.exists(img_path), demo_path, 'saved.') img = cv2.imread(img_path) img = draw_demo_img(img, corners2D_pr, (0, 255, 0)) cv2.imwrite(result_path, img) img = draw_demo_img(img, corners2D_gt, (0, 0, 255)) cv2.imwrite(demo_path, img) if save: preds_trans.append(t_pr) gts_trans.append(t_gt) preds_rot.append(R_pr) gts_rot.append(R_gt) np.savetxt( backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32')) np.savetxt( backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32')) np.savetxt( backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32')) np.savetxt( backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32')) np.savetxt( backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32')) np.savetxt( backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32')) # Compute translation error trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr))) errs_trans.append(trans_dist) # Compute angle error angle_dist = calcAngularDistance(R_gt, R_pr) errs_angle.append(angle_dist) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) # Compute 3D distances transform_3d_gt = compute_transformation(vertices, Rt_gt) transform_3d_pred = compute_transformation(vertices, Rt_pr) norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0) vertex_dist = np.mean(norm3d) errs_3d.append(vertex_dist) # Sum errors testing_error_trans += trans_dist testing_error_angle += angle_dist testing_error_pixel += pixel_dist testing_samples += 1 count = count + 1 t5 = time.time() # Compute 2D projection error, 6D pose error, 5cm5degree error px_threshold = 5 acc = len(np.where( np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) acc3d10 = len(np.where( np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold) [0]) * 100. / (len(errs_corner2D) + eps) mean_err_2d = np.mean(errs_2d) mean_corner_err_2d = np.mean(errs_corner2D) nts = float(testing_samples) if testtime: print('-----------------------------------') print(' tensor to cuda : %f' % (t2 - t1)) print(' predict : %f' % (t3 - t2)) print('get_region_boxes : %f' % (t4 - t3)) print(' eval : %f' % (t5 - t4)) print(' total : %f' % (t5 - t1)) print('-----------------------------------') # Print test statistics logging('Results of {}'.format(name)) logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc)) logging(' Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'. format(diam * 0.1, acc3d10)) logging(' Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg)) logging( " Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d)) logging( ' Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans / nts, testing_error_angle / nts, testing_error_pixel / nts)) if save: predfile = backupdir + '/predictions_linemod_' + name + '.mat' scipy.io.savemat( predfile, { 'R_gts': gts_rot, 't_gts': gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs': preds_trans, 'corner_prs': preds_corners2D }) with open('test/{}/test_report.txt'.format(weightfile), 'a') as f: f.write('Results of {}\n'.format(name)) f.write('-----------------------------------\n') f.write(' tensor to cuda : %f\n' % (t2 - t1)) f.write(' predict : %f\n' % (t3 - t2)) f.write('get_region_boxes : %f\n' % (t4 - t3)) f.write(' eval : %f\n' % (t5 - t4)) f.write(' total : %f\n' % (t5 - t1)) f.write('-----------------------------------\n') f.write('[{:.2f}%]Acc using {} px 2D Projection = \n'.format( acc, px_threshold)) f.write('[{:.2f}%]Acc using 10% threshold - {} vx 3D Transformation\n'. format(acc3d10, diam * 0.1)) f.write('[{:.2f}%]Acc using 5 cm 5 degree metric\n'.format(acc5cm5deg)) f.write( "Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f\n" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d)) f.write( 'Translation error: %f m, angle error: %f degree, pixel error: % f pix\n' % (testing_error_trans / nts, testing_error_angle / nts, testing_error_pixel / nts))
batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 classes = load_classes("data/coco.names") #Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU if CUDA: model.cuda() #Set the model in evaluation mode model.eval()
args = arg_parse() images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 classes = load_classes("data/coco.names") #Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU if CUDA: model.cuda() #Set the model in evaluation mode model.eval()