def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None): x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(0.001) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(0.001) return axes
def keypoint_detection(i, frame, imagepath, detector, pose_net, ctx=mx.cpu(), axes=None): global pause_time x, img = gcv.data.transforms.presets.yolo.transform_test(frame, short=512, max_size=1024) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, output_shape=(1024, 768), ctx=ctx) #print(pose_input,"\n") if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) hackathon_action(i, frame, imagepath, pred_coords, confidence, class_IDs, bounding_boxs, scores) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(pause_time) #plt.pause(1.0) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(pause_time) return axes
def work(): train_trans = transforms.Compose( [transforms.RandomResizedCrop(224), transforms.ToTensor()]) # https://gluon-cv.mxnet.io/build/examples_datasets/imagenet.html # You need to specify ``root`` for ImageNet if you extracted the images into # a different folder, not use rec but general images, such as *.jpg train_data = DataLoader(ImageNet( train=True, root="/home1/ImageNet_ILSVRC2012/ILSVRC2012_img_train/"). transform_first(train_trans), batch_size=128, shuffle=True) for x, y in train_data: print(x.shape, y.shape) break from gluoncv.utils import viz val_dataset = ImageNet(train=False) viz.plot_image(val_dataset[1234][0]) # index 0 is image, 1 is label viz.plot_image(val_dataset[4567][0])
def keypoint_detection(img, detector, pose_net, ctx=mx.cpu(), axes=None): x, img = gcv.data.transforms.presets.yolo.transform_test(img, short=512, max_size=350) x = x.as_in_context(ctx) class_IDs, scores, bounding_boxs = detector(x) plt.cla() pose_input, upscale_bbox = detector_to_simple_pose(img, class_IDs, scores, bounding_boxs, output_shape=(128, 96), ctx=ctx) if len(upscale_bbox) > 0: predicted_heatmap = pose_net(pose_input) pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox) axes = plot_keypoints(img, pred_coords, confidence, class_IDs, bounding_boxs, scores, box_thresh=0.5, keypoint_thresh=0.2, ax=axes) plt.draw() plt.pause(0.001) else: axes = plot_image(frame, ax=axes) plt.draw() plt.pause(0.001) return axes
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST( 'dataset/MNIST', train=False, transform=transformer), batch_size=batch_size, shuffle=True, last_batch='discard') # 데이터 확인하기 for data, label in train_data: break print(data.shape, label.shape) ### graph from gluoncv.utils import viz viz.plot_image(data[0][0]) # index 0 is image, 1 is label ### 최적화 net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) ### trainer trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'momentum': 0.9, 'learning_rate': .1 }) # 오차 함수 loss_function = gluon.loss.SoftmaxCrossEntropyLoss() def evaluate_accuracy(data_iterator, net):
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder. train_dataset = ucf101.classification.UCF101(train=True, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### for x, y in train_data: print('Video frame size (batch, height, width, RGB):', x.shape) print('Video label:', y.shape) break ######################################################################### # Plot several training samples. index 0 is image, 1 is label from gluoncv.utils import viz viz.plot_image(train_dataset[7][0]) # Basketball viz.plot_image(train_dataset[22][0]) # CricketBowling ######################################################################### """Here is another example that randomly reads 25 videos each time, randomly selects one clip per video and performs center cropping. A clip can contain N consecutive frames, e.g., N=5. """ train_dataset = ucf101.classification.UCF101(train=True, new_length=5, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### for x, y in train_data: print('Video frame size (batch, height, width, RGB):', x.shape) print('Video label:', y.shape)
test_data = gluon.data.DataLoader( gluon.data.vision.datasets.ImageFolderDataset(test_path, transform=transformer), batch_size=batch_size, shuffle=False, last_batch='discard') for d, l in train_data: break print(d.shape, l.shape) ### graph from gluoncv.utils import viz viz.plot_image(d[63][2]) # index 0 is image, 1 is label from mxnet.gluon.model_zoo import vision net = vision.alexnet(classes=10, pretrained=False, ctx=ctx) net = gluon.nn.Sequential() # 은닉층1 (채널=96, 커널=11, 패딩=1, 스트라이드=4, 활성화함수=relu) # maxpooling(사이즈=3, 스트라이드2) # 입력사이즈 (224, 224), 출력사이즈 (27, 27) net.add( gluon.nn.Conv2D(96, kernel_size=11, padding=1, strides=4, activation='relu')) net.add(gluon.nn.MaxPool2D(pool_size=3, strides=2))
""" from gluoncv.data import ImageNet from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision import transforms train_trans = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor() ]) # You need to specify ``root`` for ImageNet if you extracted the images into # a different folder train_data = DataLoader( ImageNet(train=True).transform_first(train_trans), batch_size=128, shuffle=True) ######################################################################### for x, y in train_data: print(x.shape, y.shape) break ######################################################################### # Plot some validation images from gluoncv.utils import viz val_dataset = ImageNet(train=False) viz.plot_image(val_dataset[1234][0]) # index 0 is image, 1 is label viz.plot_image(val_dataset[4567][0])
# You need to specify ``setting`` and ``root`` for UCF101 if you decoded the video frames into a different folder. train_dataset = UCF101(train=True, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### # We can see the shape of our loaded data as below. ``extra`` indicates if we select multiple crops or multiple segments # from a video. Here, we only pick one frame per video, so the ``extra`` dimension is 1. for x, y in train_data: print('Video frame size (batch, extra, channel, height, width):', x.shape) print('Video label:', y.shape) break ######################################################################### # Let's plot several training samples. index 0 is image, 1 is label from gluoncv.utils import viz viz.plot_image(train_dataset[7][0].squeeze().transpose( (1, 2, 0)) * 255.0) # Basketball viz.plot_image(train_dataset[22][0].squeeze().transpose( (1, 2, 0)) * 255.0) # CricketBowling ######################################################################### # Here is the second example that randomly reads 25 videos each time, randomly selects one clip per video and # performs center cropping. A clip can contain N consecutive frames, e.g., N=5. train_dataset = UCF101(train=True, new_length=5, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### # We can see the shape of our loaded data as below. Now we have another ``depth`` dimension which # indicates how many frames in each clip (a.k.a, the temporal dimension). for x, y in train_data: print('Video frame size (batch, extra, channel, depth, height, width):',
# You need to specify ``setting`` and ``root`` for HMDB51 if you decoded the video frames into a different folder. train_dataset = HMDB51(train=True, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### # We can see the shape of our loaded data as below. ``extra`` indicates if we select multiple crops or multiple segments # from a video. Here, we only pick one frame per video, so the ``extra`` dimension is 1. for x, y in train_data: print('Video frame size (batch, extra, channel, height, width):', x.shape) print('Video label:', y.shape) break ######################################################################### # Let's plot several training samples. index 0 is image, 1 is label from gluoncv.utils import viz viz.plot_image(train_dataset[500][0].squeeze().transpose( (1, 2, 0)) * 255.0) # dive viz.plot_image(train_dataset[2500][0].squeeze().transpose( (1, 2, 0)) * 255.0) # shoot_bow ######################################################################### # Here is the second example that randomly reads 25 videos each time, randomly selects one clip per video and # performs center cropping. A clip can contain N consecutive frames, e.g., N=5. train_dataset = HMDB51(train=True, new_length=5, transform=transform_train) train_data = DataLoader(train_dataset, batch_size=25, shuffle=True) ######################################################################### # We can see the shape of our loaded data as below. Now we have another ``depth`` dimension which # indicates how many frames in each clip (a.k.a, the temporal dimension). for x, y in train_data: print('Video frame size (batch, extra, channel, depth, height, width):',
# You need to specify ``root`` for ImageNet if you extracted the images into # a different folder train_data = ImageRecordIter( path_imgrec = os.path.join(rec_path, 'train.rec'), path_imgidx = os.path.join(rec_path, 'train.idx'), data_shape = (3, 224, 224), batch_size = 32, shuffle = True ) ######################################################################### for batch in train_data: print(batch.data[0].shape, batch.label[0].shape) break ######################################################################### # Plot some validation images from gluoncv.utils import viz val_data = ImageRecordIter( path_imgrec = os.path.join(rec_path, 'val.rec'), path_imgidx = os.path.join(rec_path, 'val.idx'), data_shape = (3, 224, 224), batch_size = 32, shuffle = False ) for batch in val_data: viz.plot_image(nd.transpose(batch.data[0][12], (1, 2, 0))) viz.plot_image(nd.transpose(batch.data[0][21], (1, 2, 0))) break
batch_size=batch_size, shuffle=False, last_batch='discard') for d, l in train_data: break print(d.shape, l.shape) for da, la in test_data: break print(da.shape, la.shape) ######################################################################################################################## ### graph from gluoncv.utils import viz viz.plot_image(d[2][1]) # index 0 is image, 1 is label viz.plot_image(d[1][0]) ######################################################################################################################## ### model net = nn.HybridSequential() with net.name_scope(): net.add( # nn.Conv2D(channels=96, kernel_size=11, strides=4, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # nn.Conv2D(channels=256, kernel_size=5, padding=2, activation='relu'), nn.MaxPool2D(pool_size=3, strides=2), # nn.Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'),