def create_annotation(form, object_name, labels, frame): annotation = Region() annotation.object_name = object_name if form.cleaned_data['high_level']: annotation.full_frame = True annotation.x = 0 annotation.y = 0 annotation.h = 0 annotation.w = 0 else: annotation.full_frame = False annotation.x = form.cleaned_data['x'] annotation.y = form.cleaned_data['y'] annotation.h = form.cleaned_data['h'] annotation.w = form.cleaned_data['w'] annotation.text = form.cleaned_data['text'] annotation.metadata = form.cleaned_data['metadata'] annotation.frame = frame annotation.video = frame.video annotation.region_type = Region.ANNOTATION annotation.save() for lname in labels: if lname.strip(): dl, _ = Label.objects.get_or_create(name=lname, set="UI") rl = RegionLabel() rl.video = annotation.video rl.frame = annotation.frame rl.region = annotation rl.label = dl rl.save()
def detect_text_boxes(video_pk, cpu_mode=False): """ Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container :param detector_pk :param video_pk :return: """ setup_django() from dvaapp.models import Region, Frame from django.conf import settings from PIL import Image import sys video_pk = int(video_pk) sys.path.append('/opt/ctpn/CTPN/tools/') sys.path.append('/opt/ctpn/CTPN/src/') from cfg import Config as cfg from other import resize_im, CaffeModel import cv2, caffe from detectors import TextProposalDetector, TextDetector NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt" MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel" if cpu_mode: # Set this to true for CPU only mode caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(cfg.TEST_GPU_ID) text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) for f in Frame.objects.all().filter(video_id=video_pk): path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, video_pk, f.frame_index) im = cv2.imread(path) old_h, old_w, channels = im.shape im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) new_h, new_w, channels = im.shape mul_h = float(old_h) / float(new_h) mul_w = float(old_w) / float(new_w) text_lines = text_detector.detect(im) for k in text_lines: left, top, right, bottom, score = k left, top, right, bottom = int(left * mul_w), int( top * mul_h), int(right * mul_w), int(bottom * mul_h) r = Region() r.region_type = r.DETECTION r.confidence = int(100.0 * score) r.object_name = "CTPN_TEXTBOX" r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = f.pk r.video_id = video_pk r.save() right = r.w + r.x bottom = r.h + r.y img = Image.open(path) img2 = img.crop((left, top, right, bottom)) img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
def perform_face_indexing(video_id): from dvaapp.models import Region,Frame,Video,IndexEntries from dvalib import indexer,detector from dvaapp.operations.video_processing import WFrame,WVideo from django.conf import settings from scipy import misc face_indexer = indexer.FacenetIndexer() dv = Video.objects.get(id=video_id) video = WVideo(dv, settings.MEDIA_ROOT) frames = Frame.objects.all().filter(video=dv) wframes = [WFrame(video=video, frame_index=df.frame_index, primary_key=df.pk) for df in frames] input_paths = {f.local_path(): f.primary_key for f in wframes} faces_dir = '{}/{}/detections'.format(settings.MEDIA_ROOT, video_id) indexes_dir = '{}/{}/indexes'.format(settings.MEDIA_ROOT, video_id) face_detector = detector.FaceDetector() aligned_paths = face_detector.detect(wframes) logging.info(len(aligned_paths)) faces = [] faces_to_pk = {} count = 0 for path, v in aligned_paths.iteritems(): for scaled_img, bb in v: d = Region() d.region_type = Region.DETECTION d.video = dv d.confidence = 100.0 d.frame_id = input_paths[path] d.object_name = "mtcnn_face" left, top, right, bottom = bb[0], bb[1], bb[2], bb[3] d.y = top d.x = left d.w = right - left d.h = bottom - top d.save() face_path = '{}/{}.jpg'.format(faces_dir, d.pk) output_filename = os.path.join(faces_dir, face_path) misc.imsave(output_filename, scaled_img) faces.append(face_path) faces_to_pk[face_path] = d.pk count += 1 dv.refresh_from_db() dv.detections = dv.detections + count dv.save() path_count, emb_array, entries, feat_fname, entries_fname = face_indexer.index_faces(faces, faces_to_pk, indexes_dir, video_id) i = IndexEntries() i.video = dv i.count = len(entries) i.contains_frames = False i.contains_detections = True i.detection_name = "Face" i.algorithm = 'facenet' i.entries_file_name = entries_fname.split('/')[-1] i.features_file_name = feat_fname.split('/')[-1] i.save()
def detect_custom_objects(detector_pk, video_pk): """ Detection using customized trained YOLO detectors :param detector_pk: :param video_pk: :return: """ setup_django() from dvaapp.models import Region, Frame, CustomDetector from django.conf import settings from dvalib.yolo import trainer from PIL import Image args = {'detector_pk': int(detector_pk)} video_pk = int(video_pk) detector = CustomDetector.objects.get(pk=args['detector_pk']) args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT, detector.pk) class_names = {k: v for k, v in json.loads(detector.class_names)} i_class_names = {i: k for k, i in class_names.items()} frames = {} for f in Frame.objects.all().filter(video_id=video_pk): frames[f.pk] = f images = [] path_to_f = {} for k, f in frames.iteritems(): path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, f.video_id, f.frame_index) path_to_f[path] = f images.append(path) train_task = trainer.YOLOTrainer(boxes=[], images=images, class_names=i_class_names, args=args, test_mode=True) results = train_task.predict() for path, box_class, score, top, left, bottom, right in results: r = Region() r.region_type = r.DETECTION r.confidence = int(100.0 * score) r.object_name = "YOLO_{}_{}".format(detector.pk, box_class) r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = path_to_f[path].pk r.video_id = path_to_f[path].video_id r.save() right = r.w + r.x bottom = r.h + r.y img = Image.open(path) img2 = img.crop((r.x, r.y, right, bottom)) img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
def ssd_detect(video_id): """ This is a HACK since Tensorflow is absolutely atrocious in allocating and freeing up memory. Once a process / session is allocated a memory it cannot be forced to clear it up. As a result this code gets called via a subprocess which clears memory when it exits. :param video_id: :return: """ import django from PIL import Image sys.path.append(os.path.dirname(__file__)) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings") django.setup() from django.conf import settings from dvaapp.models import Video, Region, Frame from dvalib import entity, detector dv = Video.objects.get(id=video_id) frames = Frame.objects.all().filter(video=dv) v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT) wframes = { df.pk: entity.WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames } detection_count = 0 algorithm = detector.SSDetector() logging.info("starting detection {}".format(algorithm.name)) frame_detections = algorithm.detect(wframes.values()) for frame_pk, detections in frame_detections.iteritems(): for d in detections: dd = Region() dd.region_type = Region.DETECTION dd.video = dv dd.frame_id = frame_pk dd.object_name = d['name'] dd.confidence = d['confidence'] dd.x = d['left'] dd.y = d['top'] dd.w = d['right'] - d['left'] dd.h = d['bot'] - d['top'] dd.save() img = Image.open(wframes[frame_pk].local_path()) img2 = img.crop((d['left'], d['top'], d['right'], d['bot'])) img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_id, dd.pk)) detection_count += 1 dv.refresh_from_db() dv.detections = dv.detections + detection_count dv.save()
def recognize_text(video_pk): """ Recognize text in regions with name CTPN_TEXTBOX using CRNN :param detector_pk :param video_pk :return: """ setup_django() from dvaapp.models import Region from django.conf import settings from PIL import Image import sys video_pk = int(video_pk) import dvalib.crnn.utils as utils import dvalib.crnn.dataset as dataset import torch from torch.autograd import Variable from PIL import Image import dvalib.crnn.models.crnn as crnn model_path = '/root/DVA/dvalib/crnn/data/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256, 1) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) for r in Region.objects.all().filter(video_id=video_pk, object_name='CTPN_TEXTBOX'): img_path = "{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk) image = Image.open(img_path).convert('L') image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) dr = Region() dr.video_id = r.video_id dr.object_name = "CRNN_TEXT" dr.x = r.x dr.y = r.y dr.w = r.w dr.h = r.h dr.region_type = Region.ANNOTATION dr.metadata_text = sim_pred dr.frame_id = r.frame_id dr.save()
def detect_custom_objects(detector_pk,video_pk): """ Detection using customized trained YOLO detectors :param detector_pk: :param video_pk: :return: """ setup_django() from dvaapp.models import Region, Frame, CustomDetector from django.conf import settings from dvalib.yolo import trainer from PIL import Image args = {'detector_pk':int(detector_pk)} video_pk = int(video_pk) detector = CustomDetector.objects.get(pk=args['detector_pk']) args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT, detector.pk) class_names = {k:v for k,v in json.loads(detector.class_names)} i_class_names = {i: k for k, i in class_names.items()} frames = {} for f in Frame.objects.all().filter(video_id=video_pk): frames[f.pk] = f images = [] path_to_f = {} for k,f in frames.iteritems(): path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index) path_to_f[path] = f images.append(path) train_task = trainer.YOLOTrainer(boxes=[], images=images, class_names=i_class_names, args=args,test_mode=True) results = train_task.predict() for path, box_class, score, top, left, bottom, right in results: r = Region() r.region_type = r.DETECTION r.confidence = int(100.0 * score) r.object_name = "YOLO_{}_{}".format(detector.pk, box_class) r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = path_to_f[path].pk r.video_id = path_to_f[path].video_id r.save() right = r.w + r.x bottom = r.h + r.y img = Image.open(path) img2 = img.crop((r.x,r.y,right, bottom)) img2.save("{}/{}/regions/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
def annotate_entire_frame(request, frame_pk): frame = Frame.objects.get(pk=frame_pk) annotation = None if request.method == 'POST': if request.POST.get('text').strip() \ or request.POST.get('metadata').strip() \ or request.POST.get('object_name', None): annotation = Region() annotation.region_type = Region.ANNOTATION annotation.x = 0 annotation.y = 0 annotation.h = 0 annotation.w = 0 annotation.full_frame = True annotation.text = request.POST.get('text') annotation.metadata = request.POST.get('metadata') annotation.object_name = request.POST.get('object_name', 'frame_metadata') annotation.frame = frame annotation.video = frame.video annotation.save() for label_name in request.POST.get('tags').split(','): if label_name.strip(): if annotation: dl = RegionLabel() dl.video = frame.video dl.frame = frame dl.label = Label.objects.get_or_create(name=label_name, set="UI")[0] dl.region = annotation dl.save() else: dl = FrameLabel() dl.video = frame.video dl.frame = frame dl.label = Label.objects.get_or_create(name=label_name, set="UI")[0] dl.save() return redirect("frame_detail", pk=frame.pk)
def create_yolo_test_data(): import json import shutil import numpy as np import os from PIL import Image setup_django() from dvaapp.shared import handle_uploaded_file from django.core.files.uploadedfile import SimpleUploadedFile from dvaapp.models import Region, TEvent, Frame, AppliedLabel from dvaapp.tasks import extract_frames, export_video_by_id try: shutil.rmtree('tests/yolo_test') except: pass try: os.mkdir('tests/yolo_test') except: pass data = np.load('shared/underwater_data.npz') json_test = {} json_test['anchors'] = [(0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), (7.88282, 3.52778), (9.77052, 9.16828)] id_2_boxes = {} class_names = { 0: "red_buoy", 1: "green_buoy", 2: "yellow_buoy", 3: "path_marker", 4: "start_gate", 5: "channel" } for i, image in enumerate(data['images'][:500]): path = "tests/yolo_test/{}.jpg".format(i) Image.fromarray(image).save(path) id_2_boxes[path.split('/')[-1]] = data['boxes'][i].tolist() local('zip tests/yolo_test.zip -r tests/yolo_test/* ') fname = "tests/yolo_test.zip" name = "yolo_test" f = SimpleUploadedFile(fname, file(fname).read(), content_type="application/zip") dv = handle_uploaded_file(f, name) extract_frames(TEvent.objects.create(video=dv).pk) for df in Frame.objects.filter(video=dv): for box in id_2_boxes[df.name]: r = Region() r.video = dv r.frame = df c, top_x, top_y, bottom_x, bottom_y = box r.object_name = class_names[c] r.region_type = Region.ANNOTATION r.x = top_x r.y = top_y r.w = bottom_x - top_x r.h = bottom_y - top_y r.save() l = AppliedLabel() l.frame = df l.video = dv l.label_name = class_names[c] l.region = r l.save() export_video_by_id(TEvent.objects.create(video=dv).pk) try: shutil.rmtree('tests/yolo_test') except: pass
def train_yolo(start_pk): """ Train a yolo model specified in a TaskEvent. This is necessary to ensure that the Tensorflow process exits and releases the allocated GPU memory. :param start_pk: TEvent PK with information about lauching the training task :return: """ setup_django() from django.conf import settings from dvaapp.models import Region, Frame, CustomDetector, TEvent from dvaapp.shared import create_detector_folders, create_detector_dataset from dvalib.yolo import trainer start = TEvent.objects.get(pk=start_pk) args = json.loads(start.arguments_json) labels = set(args['labels']) if 'labels' in args else set() object_names = set(args['object_names']) if 'object_names' in args else set() detector = CustomDetector.objects.get(pk=args['detector_pk']) create_detector_folders(detector) args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT,detector.pk) class_distribution, class_names, rboxes, rboxes_set, frames, i_class_names = create_detector_dataset(object_names,labels) images, boxes = [], [] path_to_f = {} for k,f in frames.iteritems(): path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index) path_to_f[path] = f images.append(path) boxes.append(rboxes[k]) # print k,rboxes[k] with open("{}/input.json".format(args['root_dir']),'w') as input_data: json.dump({'boxes':boxes, 'images':images, 'args':args, 'class_names':class_names.items(), 'class_distribution':class_distribution.items()}, input_data) detector.boxes_count = sum([len(k) for k in boxes]) detector.frames_count = len(images) detector.classes_count = len(class_names) detector.save() train_task = trainer.YOLOTrainer(boxes=boxes,images=images,class_names=i_class_names,args=args) train_task.train() detector.phase_1_log = file("{}/phase_1.log".format(args['root_dir'])).read() detector.phase_2_log = file("{}/phase_2.log".format(args['root_dir'])).read() detector.class_distribution = json.dumps(class_distribution.items()) detector.class_names = json.dumps(class_names.items()) detector.trained = True detector.save() results = train_task.predict() bulk_regions = [] for path, box_class, score, top, left, bottom, right in results: r = Region() r.region_type = r.ANNOTATION r.confidence = int(100.0 * score) r.object_name = "YOLO_{}_{}".format(detector.pk,box_class) r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = path_to_f[path].pk r.video_id = path_to_f[path].video_id bulk_regions.append(r) Region.objects.bulk_create(bulk_regions,batch_size=1000) folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT,detector.pk) file_name = '{}/exports/{}.dva_detector.zip'.format(settings.MEDIA_ROOT,detector.pk) zipper = subprocess.Popen(['zip', file_name, '-r', '.'],cwd=folder_name) zipper.wait() return 0
train_task = trainer.YOLOTrainer(boxes=boxes, images=images, args=args) train_task.train() detector.phase_1_log = file("{}/phase_1.log".format( args['root_dir'])).read() detector.phase_2_log = file("{}/phase_2.log".format( args['root_dir'])).read() detector.class_distribution = json.dumps(class_distribution.items()) detector.class_names = json.dumps(class_names.items()) detector.trained = True detector.save() results = train_task.predict() bulk_regions = [] for path, box_class, score, top, left, bottom, right in results: r = Region() r.region_type = r.ANNOTATION r.confidence = int(100.0 * score) r.object_name = "YOLO_{}_{}".format(detector.pk, box_class) r.y = top r.x = left r.w = right - left r.h = bottom - top r.frame_id = path_to_f[path].pk r.video_id = path_to_f[path].video_id bulk_regions.append(r) Region.objects.bulk_create(bulk_regions, batch_size=1000) folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT, detector.pk) file_name = '{}/exports/{}.dva_detector.zip'.format( settings.MEDIA_ROOT, detector.pk) zipper = subprocess.Popen(['zip', file_name, '-r', '.'], cwd=folder_name) zipper.wait()
fname = "/Users/aub3/tests/yolo_test.zip" name = "yolo_test" f = SimpleUploadedFile(fname, file(fname).read(), content_type="application/zip") dv = handle_uploaded_file(f, name) perform_dataset_extraction(TEvent.objects.create(video=dv).pk) for df in Frame.objects.filter(video=dv): for box in id_2_boxes[df.name]: r = Region() r.video = dv r.frame = df c, top_x, top_y, bottom_x, bottom_y = box r.object_name = class_names[c] r.region_type = Region.ANNOTATION r.x = top_x r.y = top_y r.w = bottom_x - top_x r.h = bottom_y - top_y r.save() l = RegionLabel() l.frame = df l.video = dv l.label = labels[c] l.region = r l.save() perform_export(TEvent.objects.create(video=dv, arguments={'destination': 'FILE'}).pk) try: shutil.rmtree('/Users/aub3/tests/yolo_test') except: pass
def create_yolo_test_data(): import json import shutil import numpy as np import os from PIL import Image setup_django() from dvaapp.shared import handle_uploaded_file from django.core.files.uploadedfile import SimpleUploadedFile from dvaapp.models import Region,TEvent,Frame, AppliedLabel from dvaapp.tasks import extract_frames,export_video_by_id try: shutil.rmtree('tests/yolo_test') except: pass try: os.mkdir('tests/yolo_test') except: pass data = np.load('shared/underwater_data.npz') json_test = {} json_test['anchors'] = [(0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), (7.88282, 3.52778), (9.77052, 9.16828)] id_2_boxes = {} class_names = { 0:"red_buoy", 1:"green_buoy", 2:"yellow_buoy", 3:"path_marker", 4:"start_gate", 5:"channel" } for i,image in enumerate(data['images'][:500]): path = "tests/yolo_test/{}.jpg".format(i) Image.fromarray(image).save(path) id_2_boxes[path.split('/')[-1]] = data['boxes'][i].tolist() local('zip tests/yolo_test.zip -r tests/yolo_test/* ') fname = "tests/yolo_test.zip" name = "yolo_test" f = SimpleUploadedFile(fname, file(fname).read(), content_type="application/zip") dv = handle_uploaded_file(f, name) extract_frames(TEvent.objects.create(video=dv).pk) for df in Frame.objects.filter(video=dv): for box in id_2_boxes[df.name]: r = Region() r.video = dv r.frame = df c , top_x, top_y, bottom_x, bottom_y = box r.object_name = class_names[c] r.region_type = Region.ANNOTATION r.x = top_x r.y = top_y r.w = bottom_x - top_x r.h = bottom_y - top_y r.save() l = AppliedLabel() l.frame = df l.video = dv l.label_name = class_names[c] l.region = r l.save() export_video_by_id(TEvent.objects.create(video=dv).pk) try: shutil.rmtree('tests/yolo_test') except: pass