def assign_tags(video_id): import django from PIL import Image sys.path.append(os.path.dirname(__file__)) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings") django.setup() from django.conf import settings from dvaapp.models import Video, Frame, Region from dvalib import entity, annotator dv = Video.objects.get(id=video_id) frames = Frame.objects.all().filter(video=dv) v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT) wframes = { df.pk: entity.WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames } algorithm = annotator.OpenImagesAnnotator() logging.info("starting annotation {}".format(algorithm.name)) for k, f in wframes.items(): tags = algorithm.apply(f.local_path()) a = Region() a.region_type = Region.ANNOTATION a.frame_id = k a.video_id = video_id a.object_name = "OpenImagesTag" a.metadata_text = " ".join([t for t, v in tags.iteritems() if v > 0.1]) a.metadata_json = json.dumps( {t: 100.0 * v for t, v in tags.iteritems() if v > 0.1}) a.full_frame = True a.save() print a.metadata_text
def perform_detection(video_id): start = TEvent() start.video_id = video_id start.started = True start.operation = "detection" start.save() start_time = time.time() dv = Video.objects.get(id=video_id) frames = Frame.objects.all().filter(video=dv) v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT) wframes = [entity.WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames] darknet_path = os.path.join(settings.BASE_DIR,'darknet/') list_path = "{}/{}_list.txt".format(darknet_path,os.getpid()) output_path = "{}/{}_output.txt".format(darknet_path,os.getpid()) logging.info(darknet_path) path_to_pk = {} with open(list_path,'w') as framelist: for frame in wframes: framelist.write('{}\n'.format(frame.local_path())) path_to_pk[frame.local_path()] = frame.primary_key #./darknet detector test cfg/combine9k.data cfg/yolo9000.cfg yolo9000.weights data/list.txt with open(output_path,'w') as output: args = ["./darknet", 'detector', 'test', 'cfg/combine9k.data', 'cfg/yolo9000.cfg', 'yolo9000.weights', list_path] logging.info(args) returncode = subprocess.call(args,cwd=darknet_path,stdout=output) if returncode == 0: detections = 0 for line in file(output_path): if line.strip(): detections += 1 frame_path,name,confidence,left,right,top,bot = line.strip().split('\t') if frame_path not in path_to_pk: raise ValueError,frame_path top = int(top) left = int(left) right = int(right) bot = int(bot) confidence = float(confidence) dd = Detection() dd.video = dv dd.frame_id = path_to_pk[frame_path] dd.object_name = "darknet_yolo9k_{}".format(name.replace(' ','_')) dd.confidence = confidence dd.x = left dd.y = top dd.w = right - left dd.h = bot - top dd.save() img = Image.open(frame_path) img2 = img.crop((left, top, right,bot)) img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,video_id,dd.pk)) dv.detections = detections dv.save() start.completed = True start.seconds = time.time() - start_time start.save() return returncode
def detect(video_id): """ This is a HACK since Tensorflow is absolutely atrocious in allocating and freeing up memory. Once a process / session is allocated a memory it cannot be forced to clear it up. As a result this code gets called via a subprocess which clears memory when it exits. :param video_id: :return: """ import django from PIL import Image sys.path.append(os.path.dirname(__file__)) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings") django.setup() from django.conf import settings from dvaapp.models import Video, Detection, Frame from dvalib import entity, detector dv = Video.objects.get(id=video_id) frames = Frame.objects.all().filter(video=dv) v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT) wframes = { df.pk: entity.WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames } detection_count = 0 detector_list = { 'ssd': detector.SSDetector(), } if 'YOLO_ENABLE' in os.environ: detector_list['yolo'] = detector.YOLODetector() for alogrithm in detector_list.itervalues(): logging.info("starting detection {}".format(alogrithm.name)) frame_detections = alogrithm.detect(wframes.values()) for frame_pk, detections in frame_detections.iteritems(): for d in detections: dd = Detection() dd.video = dv dd.frame_id = frame_pk dd.object_name = d['name'] dd.confidence = d['confidence'] dd.x = d['left'] dd.y = d['top'] dd.w = d['right'] - d['left'] dd.h = d['bot'] - d['top'] dd.save() img = Image.open(wframes[frame_pk].local_path()) img2 = img.crop((d['left'], d['top'], d['right'], d['bot'])) img2.save("{}/{}/detections/{}.jpg".format( settings.MEDIA_ROOT, video_id, dd.pk)) detection_count += 1 dv.refresh_from_db() dv.detections = dv.detections + detection_count dv.save()
def perform_face_indexing(video_id): face_indexer = indexer.FacenetIndexer() dv = Video.objects.get(id=video_id) video = entity.WVideo(dv, settings.MEDIA_ROOT) frames = Frame.objects.all().filter(video=dv) wframes = [ entity.WFrame(video=video, frame_index=df.frame_index, primary_key=df.pk) for df in frames ] input_paths = {f.local_path(): f.primary_key for f in wframes} faces_dir = '{}/{}/detections'.format(settings.MEDIA_ROOT, video_id) indexes_dir = '{}/{}/indexes'.format(settings.MEDIA_ROOT, video_id) face_detector = detector.FaceDetector() aligned_paths = face_detector.detect(wframes) logging.info(len(aligned_paths)) faces = [] faces_to_pk = {} count = 0 for path, v in aligned_paths.iteritems(): for scaled_img, bb in v: d = Region() d.region_type = Region.DETECTION d.video = dv d.confidence = 100.0 d.frame_id = input_paths[path] d.object_name = "mtcnn_face" left, top, right, bottom = bb[0], bb[1], bb[2], bb[3] d.y = top d.x = left d.w = right - left d.h = bottom - top d.save() face_path = '{}/{}.jpg'.format(faces_dir, d.pk) output_filename = os.path.join(faces_dir, face_path) misc.imsave(output_filename, scaled_img) faces.append(face_path) faces_to_pk[face_path] = d.pk count += 1 dv.refresh_from_db() dv.detections = dv.detections + count dv.save() path_count, emb_array, entries, feat_fname, entries_fname = face_indexer.index_faces( faces, faces_to_pk, indexes_dir, video_id) i = IndexEntries() i.video = dv i.count = len(entries) i.contains_frames = False i.contains_detections = True i.detection_name = "Face" i.algorithm = 'facenet' i.entries_file_name = entries_fname.split('/')[-1] i.features_file_name = feat_fname.split('/')[-1] i.save()
def perform_face_indexing(video_id): dv = Video.objects.get(id=video_id) video = entity.WVideo(dv, settings.MEDIA_ROOT) frames = Frame.objects.all().filter(video=dv) wframes = [ entity.WFrame(video=video, frame_index=df.frame_index, primary_key=df.pk) for df in frames ] input_paths = {f.local_path(): f.primary_key for f in wframes} faces_dir = '{}/{}/detections'.format(settings.MEDIA_ROOT, video_id) indexes_dir = '{}/{}/indexes'.format(settings.MEDIA_ROOT, video_id) aligned_paths = facerecognition.align(input_paths.keys(), faces_dir) logging.info(len(aligned_paths)) faces = [] faces_to_pk = {} count = 0 for path, v in aligned_paths.iteritems(): for face_path, bb in v: d = Detection() d.video = dv d.confidence = 100.0 d.frame_id = input_paths[path] d.object_name = "mtcnn_face" top, left, bottom, right = bb[0], bb[1], bb[2], bb[3] d.y = top d.x = left d.w = right - left d.h = bottom - top d.save() os.rename(face_path, '{}/{}.jpg'.format(faces_dir, d.pk)) faces.append('{}/{}.jpg'.format(faces_dir, d.pk)) faces_to_pk['{}/{}.jpg'.format(faces_dir, d.pk)] = d.pk count += 1 dv.detections = dv.detections + count dv.save() path_count, emb_array, entries = facerecognition.represent( faces, faces_to_pk, indexes_dir) i = IndexEntries() i.video = dv i.count = len(entries) i.contains_frames = False i.contains_detections = True i.detection_name = "Face" i.algorithm = 'facenet' i.save()
def detect(video_id): """ This is a HACK since Tensorflow is absolutely atrocious in allocating and freeing up memory :param video_id: :return: """ import django from PIL import Image sys.path.append(os.path.dirname(__file__)) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings") django.setup() from django.conf import settings from dvaapp.models import Video, Detection, Frame from dvalib import entity, detector dv = Video.objects.get(id=video_id) frames = Frame.objects.all().filter(video=dv) v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT) wframes = { df.pk: entity.WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames } detection_count = 0 for alogrithm in detector.DETECTORS.itervalues(): logging.info("starting detection {}".format(alogrithm.name)) frame_detections = alogrithm.detect(wframes.values()) for frame_pk, detections in frame_detections.iteritems(): for d in detections: dd = Detection() dd.video = dv dd.frame_id = frame_pk dd.object_name = d['name'] dd.confidence = d['confidence'] dd.x = d['left'] dd.y = d['top'] dd.w = d['right'] - d['left'] dd.h = d['bot'] - d['top'] dd.save() img = Image.open(wframes[frame_pk].local_path()) img2 = img.crop((d['left'], d['top'], d['right'], d['bot'])) img2.save("{}/{}/detections/{}.jpg".format( settings.MEDIA_ROOT, video_id, dd.pk)) detection_count += 1 dv.detections = detection_count dv.save()