label_id = self.classes.index(name) bdnbox.append(label_id) ret += [bdnbox] return np.array(ret) if __name__ == "__main__": classes = [ "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] anno_xml = Anno_xml(classes) root_path = "./data/VOCdevkit/VOC2012/" train_img_list, train_annotation_list, val_img_list, val_annotation_list = make_datapath_list( root_path) idx = 1 img_file_path = val_img_list[idx] # print(img_file_path) img = cv2.imread(img_file_path) height, width, channels = img.shape # cv2.imshow("img", img) # cv2.waitKey() annotation_infor = anno_xml(val_annotation_list[idx], width, height) print(annotation_infor)
parser = argparse.ArgumentParser() parser.add_argument('--batch-size', type=int, default=32) parser.add_argument('--epochs', type=int, default=100) FLAGS = parser.parse_args() batch_size = FLAGS.batch_size num_epochs = FLAGS.epochs device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("device:", device) torch.backends.cudnn.benchmark = True # dataloader root_path = "../stereo_datasets/training" train_img_list, train_lp_list, train_anno_list, val_img_list, val_lp_list, val_anno_list \ = make_datapath_list(root_path) classes = [ 'Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 'Misc', 'DontCare' ] color_mean = (104, 117, 123) input_size = 300 # img_list, anno_list, phase, transform, anno_xml train_dataset = MyDataset(train_img_list, train_lp_list, train_anno_list, phase="train", transform=DataTransform(input_size, color_mean),