예제 #1
0
def assign_tags(video_id):
    import django
    from PIL import Image
    sys.path.append(os.path.dirname(__file__))
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings")
    django.setup()
    from django.conf import settings
    from dvaapp.models import Video, Frame, Region
    from dvalib import entity, annotator
    dv = Video.objects.get(id=video_id)
    frames = Frame.objects.all().filter(video=dv)
    v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT)
    wframes = {
        df.pk: entity.WFrame(video=v,
                             frame_index=df.frame_index,
                             primary_key=df.pk)
        for df in frames
    }
    algorithm = annotator.OpenImagesAnnotator()
    logging.info("starting annotation {}".format(algorithm.name))
    for k, f in wframes.items():
        tags = algorithm.apply(f.local_path())
        a = Region()
        a.region_type = Region.ANNOTATION
        a.frame_id = k
        a.video_id = video_id
        a.object_name = "OpenImagesTag"
        a.metadata_text = " ".join([t for t, v in tags.iteritems() if v > 0.1])
        a.metadata_json = json.dumps(
            {t: 100.0 * v
             for t, v in tags.iteritems() if v > 0.1})
        a.full_frame = True
        a.save()
        print a.metadata_text
예제 #2
0
def detect_text_boxes(video_pk, cpu_mode=False):
    """
    Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container
    :param detector_pk
    :param video_pk
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame
    from django.conf import settings
    from PIL import Image
    import sys
    video_pk = int(video_pk)
    sys.path.append('/opt/ctpn/CTPN/tools/')
    sys.path.append('/opt/ctpn/CTPN/src/')
    from cfg import Config as cfg
    from other import resize_im, CaffeModel
    import cv2, caffe
    from detectors import TextProposalDetector, TextDetector
    NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt"
    MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel"
    if cpu_mode:  # Set this to true for CPU only mode
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
    text_proposals_detector = TextProposalDetector(
        CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector = TextDetector(text_proposals_detector)
    for f in Frame.objects.all().filter(video_id=video_pk):
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, video_pk,
                                            f.frame_index)
        im = cv2.imread(path)
        old_h, old_w, channels = im.shape
        im, _ = resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h) / float(new_h)
        mul_w = float(old_w) / float(new_w)
        text_lines = text_detector.detect(im)
        for k in text_lines:
            left, top, right, bottom, score = k
            left, top, right, bottom = int(left * mul_w), int(
                top * mul_h), int(right * mul_w), int(bottom * mul_h)
            r = Region()
            r.region_type = r.DETECTION
            r.confidence = int(100.0 * score)
            r.object_name = "CTPN_TEXTBOX"
            r.y = top
            r.x = left
            r.w = right - left
            r.h = bottom - top
            r.frame_id = f.pk
            r.video_id = video_pk
            r.save()
            right = r.w + r.x
            bottom = r.h + r.y
            img = Image.open(path)
            img2 = img.crop((left, top, right, bottom))
            img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,
                                                       video_pk, r.pk))
예제 #3
0
def perform_face_indexing(video_id):
    from dvaapp.models import Region,Frame,Video,IndexEntries
    from dvalib import indexer,detector
    from dvaapp.operations.video_processing import WFrame,WVideo
    from django.conf import settings
    from scipy import misc
    face_indexer = indexer.FacenetIndexer()
    dv = Video.objects.get(id=video_id)
    video = WVideo(dv, settings.MEDIA_ROOT)
    frames = Frame.objects.all().filter(video=dv)
    wframes = [WFrame(video=video, frame_index=df.frame_index, primary_key=df.pk) for df in frames]
    input_paths = {f.local_path(): f.primary_key for f in wframes}
    faces_dir = '{}/{}/detections'.format(settings.MEDIA_ROOT, video_id)
    indexes_dir = '{}/{}/indexes'.format(settings.MEDIA_ROOT, video_id)
    face_detector = detector.FaceDetector()
    aligned_paths = face_detector.detect(wframes)
    logging.info(len(aligned_paths))
    faces = []
    faces_to_pk = {}
    count = 0
    for path, v in aligned_paths.iteritems():
        for scaled_img, bb in v:
            d = Region()
            d.region_type = Region.DETECTION
            d.video = dv
            d.confidence = 100.0
            d.frame_id = input_paths[path]
            d.object_name = "mtcnn_face"
            left, top, right, bottom = bb[0], bb[1], bb[2], bb[3]
            d.y = top
            d.x = left
            d.w = right - left
            d.h = bottom - top
            d.save()
            face_path = '{}/{}.jpg'.format(faces_dir, d.pk)
            output_filename = os.path.join(faces_dir, face_path)
            misc.imsave(output_filename, scaled_img)
            faces.append(face_path)
            faces_to_pk[face_path] = d.pk
            count += 1
    dv.refresh_from_db()
    dv.detections = dv.detections + count
    dv.save()
    path_count, emb_array, entries, feat_fname, entries_fname = face_indexer.index_faces(faces, faces_to_pk,
                                                                                         indexes_dir, video_id)
    i = IndexEntries()
    i.video = dv
    i.count = len(entries)
    i.contains_frames = False
    i.contains_detections = True
    i.detection_name = "Face"
    i.algorithm = 'facenet'
    i.entries_file_name = entries_fname.split('/')[-1]
    i.features_file_name = feat_fname.split('/')[-1]
    i.save()
예제 #4
0
def detect_custom_objects(detector_pk, video_pk):
    """
    Detection using customized trained YOLO detectors
    :param detector_pk:
    :param video_pk:
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame, CustomDetector
    from django.conf import settings
    from dvalib.yolo import trainer
    from PIL import Image
    args = {'detector_pk': int(detector_pk)}
    video_pk = int(video_pk)
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT,
                                                 detector.pk)
    class_names = {k: v for k, v in json.loads(detector.class_names)}
    i_class_names = {i: k for k, i in class_names.items()}
    frames = {}
    for f in Frame.objects.all().filter(video_id=video_pk):
        frames[f.pk] = f
    images = []
    path_to_f = {}
    for k, f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT, f.video_id,
                                            f.frame_index)
        path_to_f[path] = f
        images.append(path)
    train_task = trainer.YOLOTrainer(boxes=[],
                                     images=images,
                                     class_names=i_class_names,
                                     args=args,
                                     test_mode=True)
    results = train_task.predict()
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.DETECTION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk, box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        r.save()
        right = r.w + r.x
        bottom = r.h + r.y
        img = Image.open(path)
        img2 = img.crop((r.x, r.y, right, bottom))
        img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,
                                                   video_pk, r.pk))
예제 #5
0
def ssd_detect(video_id):
    """
    This is a HACK since Tensorflow is absolutely atrocious in allocating and freeing up memory.
    Once a process / session is allocated a memory it cannot be forced to clear it up.
    As a result this code gets called via a subprocess which clears memory when it exits.

    :param video_id:
    :return:
    """
    import django
    from PIL import Image
    sys.path.append(os.path.dirname(__file__))
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings")
    django.setup()
    from django.conf import settings
    from dvaapp.models import Video, Region, Frame
    from dvalib import entity, detector
    dv = Video.objects.get(id=video_id)
    frames = Frame.objects.all().filter(video=dv)
    v = entity.WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT)
    wframes = {
        df.pk: entity.WFrame(video=v,
                             frame_index=df.frame_index,
                             primary_key=df.pk)
        for df in frames
    }
    detection_count = 0
    algorithm = detector.SSDetector()
    logging.info("starting detection {}".format(algorithm.name))
    frame_detections = algorithm.detect(wframes.values())
    for frame_pk, detections in frame_detections.iteritems():
        for d in detections:
            dd = Region()
            dd.region_type = Region.DETECTION
            dd.video = dv
            dd.frame_id = frame_pk
            dd.object_name = d['name']
            dd.confidence = d['confidence']
            dd.x = d['left']
            dd.y = d['top']
            dd.w = d['right'] - d['left']
            dd.h = d['bot'] - d['top']
            dd.save()
            img = Image.open(wframes[frame_pk].local_path())
            img2 = img.crop((d['left'], d['top'], d['right'], d['bot']))
            img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,
                                                       video_id, dd.pk))
            detection_count += 1
    dv.refresh_from_db()
    dv.detections = dv.detections + detection_count
    dv.save()
예제 #6
0
def recognize_text(video_pk):
    """
    Recognize text in regions with name CTPN_TEXTBOX using CRNN
    :param detector_pk
    :param video_pk
    :return:
    """
    setup_django()
    from dvaapp.models import Region
    from django.conf import settings
    from PIL import Image
    import sys
    video_pk = int(video_pk)
    import dvalib.crnn.utils as utils
    import dvalib.crnn.dataset as dataset
    import torch
    from torch.autograd import Variable
    from PIL import Image
    import dvalib.crnn.models.crnn as crnn
    model_path = '/root/DVA/dvalib/crnn/data/crnn.pth'
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    model = crnn.CRNN(32, 1, 37, 256, 1)
    model.load_state_dict(torch.load(model_path))
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))
    for r in Region.objects.all().filter(video_id=video_pk,
                                         object_name='CTPN_TEXTBOX'):
        img_path = "{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,
                                                    video_pk, r.pk)
        image = Image.open(img_path).convert('L')
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        dr = Region()
        dr.video_id = r.video_id
        dr.object_name = "CRNN_TEXT"
        dr.x = r.x
        dr.y = r.y
        dr.w = r.w
        dr.h = r.h
        dr.region_type = Region.ANNOTATION
        dr.metadata_text = sim_pred
        dr.frame_id = r.frame_id
        dr.save()
예제 #7
0
def detect_custom_objects(detector_pk,video_pk):
    """
    Detection using customized trained YOLO detectors
    :param detector_pk:
    :param video_pk:
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame, CustomDetector
    from django.conf import settings
    from dvalib.yolo import trainer
    from PIL import Image
    args = {'detector_pk':int(detector_pk)}
    video_pk = int(video_pk)
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT, detector.pk)
    class_names = {k:v for k,v in json.loads(detector.class_names)}
    i_class_names = {i: k for k, i in class_names.items()}
    frames = {}
    for f in Frame.objects.all().filter(video_id=video_pk):
        frames[f.pk] = f
    images = []
    path_to_f = {}
    for k,f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index)
        path_to_f[path] = f
        images.append(path)
    train_task = trainer.YOLOTrainer(boxes=[], images=images, class_names=i_class_names, args=args,test_mode=True)
    results = train_task.predict()
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.DETECTION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk, box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        r.save()
        right = r.w + r.x
        bottom = r.h + r.y
        img = Image.open(path)
        img2 = img.crop((r.x,r.y,right, bottom))
        img2.save("{}/{}/regions/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
예제 #8
0
def train_yolo(start_pk):
    """
    Train a yolo model specified in a TaskEvent.
    This is necessary to ensure that the Tensorflow process exits and releases the allocated GPU memory.
    :param start_pk: TEvent PK with information about lauching the training task
    :return:
    """
    setup_django()
    from django.conf import settings
    from dvaapp.models import Region, Frame, CustomDetector, TEvent
    from dvaapp.shared import create_detector_folders, create_detector_dataset
    from dvalib.yolo import trainer
    start = TEvent.objects.get(pk=start_pk)
    args = json.loads(start.arguments_json)
    labels = set(args['labels']) if 'labels' in args else set()
    object_names = set(args['object_names']) if 'object_names' in args else set()
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    create_detector_folders(detector)
    args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT,detector.pk)
    class_distribution, class_names, rboxes, rboxes_set, frames, i_class_names = create_detector_dataset(object_names,labels)
    images, boxes = [], []
    path_to_f = {}
    for k,f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index)
        path_to_f[path] = f
        images.append(path)
        boxes.append(rboxes[k])
        # print k,rboxes[k]
    with open("{}/input.json".format(args['root_dir']),'w') as input_data:
        json.dump({'boxes':boxes,
                   'images':images,
                   'args':args,
                   'class_names':class_names.items(),
                   'class_distribution':class_distribution.items()},
                  input_data)
    detector.boxes_count = sum([len(k) for k in boxes])
    detector.frames_count = len(images)
    detector.classes_count = len(class_names)
    detector.save()
    train_task = trainer.YOLOTrainer(boxes=boxes,images=images,class_names=i_class_names,args=args)
    train_task.train()
    detector.phase_1_log = file("{}/phase_1.log".format(args['root_dir'])).read()
    detector.phase_2_log = file("{}/phase_2.log".format(args['root_dir'])).read()
    detector.class_distribution = json.dumps(class_distribution.items())
    detector.class_names = json.dumps(class_names.items())
    detector.trained = True
    detector.save()
    results = train_task.predict()
    bulk_regions = []
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.ANNOTATION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk,box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        bulk_regions.append(r)
    Region.objects.bulk_create(bulk_regions,batch_size=1000)
    folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT,detector.pk)
    file_name = '{}/exports/{}.dva_detector.zip'.format(settings.MEDIA_ROOT,detector.pk)
    zipper = subprocess.Popen(['zip', file_name, '-r', '.'],cwd=folder_name)
    zipper.wait()
    return 0
예제 #9
0
    train_task = trainer.YOLOTrainer(boxes=boxes, images=images, args=args)
    train_task.train()
    detector.phase_1_log = file("{}/phase_1.log".format(
        args['root_dir'])).read()
    detector.phase_2_log = file("{}/phase_2.log".format(
        args['root_dir'])).read()
    detector.class_distribution = json.dumps(class_distribution.items())
    detector.class_names = json.dumps(class_names.items())
    detector.trained = True
    detector.save()
    results = train_task.predict()
    bulk_regions = []
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.ANNOTATION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk, box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        bulk_regions.append(r)
    Region.objects.bulk_create(bulk_regions, batch_size=1000)
    folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT, detector.pk)
    file_name = '{}/exports/{}.dva_detector.zip'.format(
        settings.MEDIA_ROOT, detector.pk)
    zipper = subprocess.Popen(['zip', file_name, '-r', '.'], cwd=folder_name)
    zipper.wait()
예제 #10
0
def train_yolo(start_pk):
    """
    Train a yolo model specified in a TaskEvent.
    This is necessary to ensure that the Tensorflow process exits and releases the allocated GPU memory.
    :param start_pk: TEvent PK with information about lauching the training task
    :return:
    """
    setup_django()
    from django.conf import settings
    from dvaapp.models import Region, Frame, CustomDetector, TEvent
    from dvaapp.shared import create_detector_folders, create_detector_dataset
    from dvalib.yolo import trainer
    start = TEvent.objects.get(pk=start_pk)
    args = json.loads(start.arguments_json)
    labels = set(args['labels']) if 'labels' in args else set()
    object_names = set(args['object_names']) if 'object_names' in args else set()
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    create_detector_folders(detector)
    args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT,detector.pk)
    class_distribution, class_names, rboxes, rboxes_set, frames, i_class_names = create_detector_dataset(object_names,labels)
    images, boxes = [], []
    path_to_f = {}
    for k,f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index)
        path_to_f[path] = f
        images.append(path)
        boxes.append(rboxes[k])
        # print k,rboxes[k]
    with open("{}/input.json".format(args['root_dir']),'w') as input_data:
        json.dump({'boxes':boxes,
                   'images':images,
                   'args':args,
                   'class_names':class_names.items(),
                   'class_distribution':class_distribution.items()},
                  input_data)
    detector.boxes_count = sum([len(k) for k in boxes])
    detector.frames_count = len(images)
    detector.classes_count = len(class_names)
    detector.save()
    train_task = trainer.YOLOTrainer(boxes=boxes,images=images,class_names=i_class_names,args=args)
    train_task.train()
    detector.phase_1_log = file("{}/phase_1.log".format(args['root_dir'])).read()
    detector.phase_2_log = file("{}/phase_2.log".format(args['root_dir'])).read()
    detector.class_distribution = json.dumps(class_distribution.items())
    detector.class_names = json.dumps(class_names.items())
    detector.trained = True
    detector.save()
    results = train_task.predict()
    bulk_regions = []
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.ANNOTATION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk,box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        bulk_regions.append(r)
    Region.objects.bulk_create(bulk_regions,batch_size=1000)
    folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT,detector.pk)
    file_name = '{}/exports/{}.dva_detector.zip'.format(settings.MEDIA_ROOT,detector.pk)
    zipper = subprocess.Popen(['zip', file_name, '-r', '.'],cwd=folder_name)
    zipper.wait()
    return 0