コード例 #1
0
def create_yolo_test_data():
    import json
    import shutil
    import numpy as np
    import os
    from PIL import Image
    setup_django()
    from dvaapp.shared import handle_uploaded_file
    from django.core.files.uploadedfile import SimpleUploadedFile
    from dvaapp.models import Region,TEvent,Frame, AppliedLabel
    from dvaapp.tasks import extract_frames,export_video_by_id
    try:
        shutil.rmtree('tests/yolo_test')
    except:
        pass
    try:
        os.mkdir('tests/yolo_test')
    except:
        pass
    data = np.load('shared/underwater_data.npz')
    json_test = {}
    json_test['anchors'] = [(0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), (7.88282, 3.52778), (9.77052, 9.16828)]
    id_2_boxes = {}
    class_names = {
        0:"red_buoy",
        1:"green_buoy",
        2:"yellow_buoy",
        3:"path_marker",
        4:"start_gate",
        5:"channel"
    }
    for i,image in enumerate(data['images'][:500]):
        path = "tests/yolo_test/{}.jpg".format(i)
        Image.fromarray(image).save(path)
        id_2_boxes[path.split('/')[-1]] = data['boxes'][i].tolist()
    local('zip tests/yolo_test.zip -r tests/yolo_test/* ')
    fname = "tests/yolo_test.zip"
    name = "yolo_test"
    f = SimpleUploadedFile(fname, file(fname).read(), content_type="application/zip")
    dv = handle_uploaded_file(f, name)
    extract_frames(TEvent.objects.create(video=dv).pk)
    for df in Frame.objects.filter(video=dv):
        for box in id_2_boxes[df.name]:
            r = Region()
            r.video = dv
            r.frame = df
            c , top_x, top_y, bottom_x, bottom_y = box
            r.object_name = class_names[c]
            r.region_type = Region.ANNOTATION
            r.x = top_x
            r.y = top_y
            r.w = bottom_x - top_x
            r.h = bottom_y - top_y
            r.save()
            l = AppliedLabel()
            l.frame = df
            l.video = dv
            l.label_name = class_names[c]
            l.region = r
            l.save()
    export_video_by_id(TEvent.objects.create(video=dv).pk)
    try:
        shutil.rmtree('tests/yolo_test')
    except:
        pass
コード例 #2
0
def perform_face_indexing(video_id):
    from dvaapp.models import Region,Frame,Video,IndexEntries
    from dvalib import indexer,detector
    from dvaapp.operations.video_processing import WFrame,WVideo
    from django.conf import settings
    from scipy import misc
    face_indexer = indexer.FacenetIndexer()
    dv = Video.objects.get(id=video_id)
    video = WVideo(dv, settings.MEDIA_ROOT)
    frames = Frame.objects.all().filter(video=dv)
    wframes = [WFrame(video=video, frame_index=df.frame_index, primary_key=df.pk) for df in frames]
    input_paths = {f.local_path(): f.primary_key for f in wframes}
    faces_dir = '{}/{}/detections'.format(settings.MEDIA_ROOT, video_id)
    indexes_dir = '{}/{}/indexes'.format(settings.MEDIA_ROOT, video_id)
    face_detector = detector.FaceDetector()
    aligned_paths = face_detector.detect(wframes)
    logging.info(len(aligned_paths))
    faces = []
    faces_to_pk = {}
    count = 0
    for path, v in aligned_paths.iteritems():
        for scaled_img, bb in v:
            d = Region()
            d.region_type = Region.DETECTION
            d.video = dv
            d.confidence = 100.0
            d.frame_id = input_paths[path]
            d.object_name = "mtcnn_face"
            left, top, right, bottom = bb[0], bb[1], bb[2], bb[3]
            d.y = top
            d.x = left
            d.w = right - left
            d.h = bottom - top
            d.save()
            face_path = '{}/{}.jpg'.format(faces_dir, d.pk)
            output_filename = os.path.join(faces_dir, face_path)
            misc.imsave(output_filename, scaled_img)
            faces.append(face_path)
            faces_to_pk[face_path] = d.pk
            count += 1
    dv.refresh_from_db()
    dv.detections = dv.detections + count
    dv.save()
    path_count, emb_array, entries, feat_fname, entries_fname = face_indexer.index_faces(faces, faces_to_pk,
                                                                                         indexes_dir, video_id)
    i = IndexEntries()
    i.video = dv
    i.count = len(entries)
    i.contains_frames = False
    i.contains_detections = True
    i.detection_name = "Face"
    i.algorithm = 'facenet'
    i.entries_file_name = entries_fname.split('/')[-1]
    i.features_file_name = feat_fname.split('/')[-1]
    i.save()
コード例 #3
0
def assign_tags(video_id):
    import django
    from PIL import Image
    sys.path.append(os.path.dirname(__file__))
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings")
    django.setup()
    from django.conf import settings
    from dvaapp.models import Video,Frame,Region
    from dvalib import annotator
    from dvaapp.operations.video_processing import WVideo, WFrame
    dv = Video.objects.get(id=video_id)
    frames = Frame.objects.all().filter(video=dv)
    v = WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT)
    wframes = {df.pk: WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames}
    algorithm = annotator.OpenImagesAnnotator()
    logging.info("starting annotation {}".format(algorithm.name))
    for k,f in wframes.items():
        tags = algorithm.apply(f.local_path())
        a = Region()
        a.region_type = Region.ANNOTATION
        a.frame_id = k
        a.video_id = video_id
        a.object_name = "OpenImagesTag"
        a.metadata_text = " ".join([t for t,v in tags.iteritems() if v > 0.1])
        a.metadata_json = json.dumps({t:100.0*v for t,v in tags.iteritems() if v > 0.1})
        a.full_frame = True
        a.save()
        print a.metadata_text
コード例 #4
0
def recognize_text(video_pk):
    """
    Recognize text in regions with name CTPN_TEXTBOX using CRNN
    :param detector_pk
    :param video_pk
    :return:
    """
    setup_django()
    from dvaapp.models import Region
    from django.conf import settings
    from PIL import Image
    import sys
    video_pk = int(video_pk)
    import dvalib.crnn.utils as utils
    import dvalib.crnn.dataset as dataset
    import torch
    from torch.autograd import Variable
    from PIL import Image
    import dvalib.crnn.models.crnn as crnn
    model_path = '/root/DVA/dvalib/crnn/data/crnn.pth'
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    model = crnn.CRNN(32, 1, 37, 256, 1)
    model.load_state_dict(torch.load(model_path))
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))
    for r in Region.objects.all().filter(video_id=video_pk,object_name='CTPN_TEXTBOX'):
        img_path = "{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT,video_pk,r.pk)
        image = Image.open(img_path).convert('L')
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        dr = Region()
        dr.video_id = r.video_id
        dr.object_name = "CRNN_TEXT"
        dr.x = r.x
        dr.y = r.y
        dr.w = r.w
        dr.h = r.h
        dr.region_type = Region.ANNOTATION
        dr.metadata_text = sim_pred
        dr.frame_id = r.frame_id
        dr.save()
コード例 #5
0
def ssd_detect(video_id):
    """
    This is a HACK since Tensorflow is absolutely atrocious in allocating and freeing up memory.
    Once a process / session is allocated a memory it cannot be forced to clear it up.
    As a result this code gets called via a subprocess which clears memory when it exits.

    :param video_id:
    :return:
    """
    import django
    from PIL import Image
    sys.path.append(os.path.dirname(__file__))
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dva.settings")
    django.setup()
    from django.conf import settings
    from dvaapp.models import Video,Region,Frame
    from dvalib import detector
    from dvaapp.operations.video_processing import WVideo,WFrame
    dv = Video.objects.get(id=video_id)
    frames = Frame.objects.all().filter(video=dv)
    v = WVideo(dvideo=dv, media_dir=settings.MEDIA_ROOT)
    wframes = {df.pk: WFrame(video=v, frame_index=df.frame_index, primary_key=df.pk) for df in frames}
    detection_count = 0
    algorithm = detector.SSDetector()
    logging.info("starting detection {}".format(algorithm.name))
    frame_detections = algorithm.detect(wframes.values())
    for frame_pk,detections in frame_detections.iteritems():
        for d in detections:
            dd = Region()
            dd.region_type = Region.DETECTION
            dd.video = dv
            dd.frame_id = frame_pk
            dd.object_name = d['name']
            dd.confidence = d['confidence']
            dd.x = d['left']
            dd.y = d['top']
            dd.w = d['right'] - d['left']
            dd.h = d['bot'] - d['top']
            dd.save()
            img = Image.open(wframes[frame_pk].local_path())
            img2 = img.crop((d['left'], d['top'], d['right'], d['bot']))
            img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_id, dd.pk))
            detection_count += 1
    dv.refresh_from_db()
    dv.detections = dv.detections + detection_count
    dv.save()
コード例 #6
0
def train_yolo(start_pk):
    """
    Train a yolo model specified in a TaskEvent.
    This is necessary to ensure that the Tensorflow process exits and releases the allocated GPU memory.
    :param start_pk: TEvent PK with information about lauching the training task
    :return:
    """
    setup_django()
    from django.conf import settings
    from dvaapp.models import Region, Frame, CustomDetector, TEvent
    from dvaapp.shared import create_detector_folders, create_detector_dataset
    from dvalib.yolo import trainer
    start = TEvent.objects.get(pk=start_pk)
    args = json.loads(start.arguments_json)
    labels = set(args['labels']) if 'labels' in args else set()
    object_names = set(args['object_names']) if 'object_names' in args else set()
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    create_detector_folders(detector)
    args['root_dir'] = "{}/models/{}/".format(settings.MEDIA_ROOT,detector.pk)
    class_distribution, class_names, rboxes, rboxes_set, frames, i_class_names = create_detector_dataset(object_names,labels)
    images, boxes = [], []
    path_to_f = {}
    for k,f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index)
        path_to_f[path] = f
        images.append(path)
        boxes.append(rboxes[k])
        # print k,rboxes[k]
    with open("{}/input.json".format(args['root_dir']),'w') as input_data:
        json.dump({'boxes':boxes,
                   'images':images,
                   'args':args,
                   'class_names':class_names.items(),
                   'class_distribution':class_distribution.items()},
                  input_data)
    detector.boxes_count = sum([len(k) for k in boxes])
    detector.frames_count = len(images)
    detector.classes_count = len(class_names)
    detector.save()
    train_task = trainer.YOLOTrainer(boxes=boxes,images=images,class_names=i_class_names,args=args)
    train_task.train()
    detector.phase_1_log = file("{}/phase_1.log".format(args['root_dir'])).read()
    detector.phase_2_log = file("{}/phase_2.log".format(args['root_dir'])).read()
    detector.class_distribution = json.dumps(class_distribution.items())
    detector.class_names = json.dumps(class_names.items())
    detector.trained = True
    detector.save()
    results = train_task.predict()
    bulk_regions = []
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.ANNOTATION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk,box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        bulk_regions.append(r)
    Region.objects.bulk_create(bulk_regions,batch_size=1000)
    folder_name = "{}/models/{}".format(settings.MEDIA_ROOT,detector.pk)
    file_name = '{}/exports/{}.dva_detector.zip'.format(settings.MEDIA_ROOT,detector.pk)
    zipper = subprocess.Popen(['zip', file_name, '-r', '.'],cwd=folder_name)
    zipper.wait()
    return 0
コード例 #7
0
def detect_text_boxes(video_pk,cpu_mode=False):
    """
    Detect Text Boxes in frames for a video using CTPN, must be run in dva_ctpn container
    :param detector_pk
    :param video_pk
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame
    from django.conf import settings
    from PIL import Image
    import sys
    video_pk = int(video_pk)
    sys.path.append('/opt/ctpn/CTPN/tools/')
    sys.path.append('/opt/ctpn/CTPN/src/')
    from cfg import Config as cfg
    from other import resize_im, CaffeModel
    import cv2, caffe
    from detectors import TextProposalDetector, TextDetector
    NET_DEF_FILE = "/opt/ctpn/CTPN/models/deploy.prototxt"
    MODEL_FILE = "/opt/ctpn/CTPN/models/ctpn_trained_model.caffemodel"
    if cpu_mode:  # Set this to true for CPU only mode
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.TEST_GPU_ID)
    text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE))
    text_detector = TextDetector(text_proposals_detector)
    for f in Frame.objects.all().filter(video_id=video_pk):
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,video_pk,f.frame_index)
        im=cv2.imread(path)
        old_h,old_w, channels = im.shape
        im, _=resize_im(im, cfg.SCALE, cfg.MAX_SCALE)
        new_h, new_w, channels = im.shape
        mul_h = float(old_h)/float(new_h)
        mul_w = float(old_w)/float(new_w)
        text_lines=text_detector.detect(im)
        for k in text_lines:
            left, top, right, bottom ,score = k
            left, top, right, bottom = int(left*mul_w), int(top*mul_h), int(right*mul_w), int(bottom*mul_h)
            r = Region()
            r.region_type = r.DETECTION
            r.confidence = int(100.0 * score)
            r.object_name = "CTPN_TEXTBOX"
            r.y = top
            r.x = left
            r.w = right - left
            r.h = bottom - top
            r.frame_id = f.pk
            r.video_id = video_pk
            r.save()
            right = r.w + r.x
            bottom = r.h + r.y
            img = Image.open(path)
            img2 = img.crop((left,top,right,bottom))
            img2.save("{}/{}/detections/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
コード例 #8
0
def annotate_entire_frame(request, frame_pk):
    frame = Frame.objects.get(pk=frame_pk)
    annotation = None
    if request.method == 'POST':
        if request.POST.get('text').strip() \
                or request.POST.get('metadata').strip() \
                or request.POST.get('object_name', None):
            annotation = Region()
            annotation.region_type = Region.ANNOTATION
            annotation.x = 0
            annotation.y = 0
            annotation.h = 0
            annotation.w = 0
            annotation.full_frame = True
            annotation.text = request.POST.get('text')
            annotation.metadata = request.POST.get('metadata')
            annotation.object_name = request.POST.get('object_name',
                                                      'frame_metadata')
            annotation.frame = frame
            annotation.video = frame.video
            annotation.save()
        for label_name in request.POST.get('tags').split(','):
            if label_name.strip():
                if annotation:
                    dl = RegionLabel()
                    dl.video = frame.video
                    dl.frame = frame
                    dl.label = Label.objects.get_or_create(name=label_name,
                                                           set="UI")[0]
                    dl.region = annotation
                    dl.save()
                else:
                    dl = FrameLabel()
                    dl.video = frame.video
                    dl.frame = frame
                    dl.label = Label.objects.get_or_create(name=label_name,
                                                           set="UI")[0]
                    dl.save()
    return redirect("frame_detail", pk=frame.pk)
コード例 #9
0
ファイル: fabfile.py プロジェクト: ycaihua/DeepVideoAnalytics
def train_yolo(start_pk):
    """
    Train a yolo model specified in a TaskEvent.
    This is necessary to ensure that the Tensorflow process exits and releases the allocated GPU memory.
    :param start_pk: TEvent PK with information about lauching the training task
    :return:
    """
    setup_django()
    from django.conf import settings
    from dvaapp.models import Region, Frame, CustomDetector, TEvent
    from dvaapp.shared import create_detector_folders, create_detector_dataset
    from dvalib.yolo import trainer
    start = TEvent.objects.get(pk=start_pk)
    args = json.loads(start.arguments_json)
    labels = set(args['labels']) if 'labels' in args else set()
    object_names = set(args['object_names']) if 'object_names' in args else set()
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    create_detector_folders(detector)
    args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT,detector.pk)
    class_distribution, class_names, rboxes, rboxes_set, frames, i_class_names = create_detector_dataset(object_names,labels)
    images, boxes = [], []
    path_to_f = {}
    for k,f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index)
        path_to_f[path] = f
        images.append(path)
        boxes.append(rboxes[k])
        # print k,rboxes[k]
    with open("{}/input.json".format(args['root_dir']),'w') as input_data:
        json.dump({'boxes':boxes,
                   'images':images,
                   'args':args,
                   'class_names':class_names.items(),
                   'class_distribution':class_distribution.items()},
                  input_data)
    detector.boxes_count = sum([len(k) for k in boxes])
    detector.frames_count = len(images)
    detector.classes_count = len(class_names)
    detector.save()
    train_task = trainer.YOLOTrainer(boxes=boxes,images=images,class_names=i_class_names,args=args)
    train_task.train()
    detector.phase_1_log = file("{}/phase_1.log".format(args['root_dir'])).read()
    detector.phase_2_log = file("{}/phase_2.log".format(args['root_dir'])).read()
    detector.class_distribution = json.dumps(class_distribution.items())
    detector.class_names = json.dumps(class_names.items())
    detector.trained = True
    detector.save()
    results = train_task.predict()
    bulk_regions = []
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.ANNOTATION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk,box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        bulk_regions.append(r)
    Region.objects.bulk_create(bulk_regions,batch_size=1000)
    folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT,detector.pk)
    file_name = '{}/exports/{}.dva_detector.zip'.format(settings.MEDIA_ROOT,detector.pk)
    zipper = subprocess.Popen(['zip', file_name, '-r', '.'],cwd=folder_name)
    zipper.wait()
    return 0
コード例 #10
0
def detect_custom_objects(detector_pk,video_pk):
    """
    Detection using customized trained YOLO detectors
    :param detector_pk:
    :param video_pk:
    :return:
    """
    setup_django()
    from dvaapp.models import Region, Frame, CustomDetector
    from django.conf import settings
    from dvalib.yolo import trainer
    from PIL import Image
    args = {'detector_pk':int(detector_pk)}
    video_pk = int(video_pk)
    detector = CustomDetector.objects.get(pk=args['detector_pk'])
    args['root_dir'] = "{}/detectors/{}/".format(settings.MEDIA_ROOT, detector.pk)
    class_names = {k:v for k,v in json.loads(detector.class_names)}
    i_class_names = {i: k for k, i in class_names.items()}
    frames = {}
    for f in Frame.objects.all().filter(video_id=video_pk):
        frames[f.pk] = f
    images = []
    path_to_f = {}
    for k,f in frames.iteritems():
        path = "{}/{}/frames/{}.jpg".format(settings.MEDIA_ROOT,f.video_id,f.frame_index)
        path_to_f[path] = f
        images.append(path)
    train_task = trainer.YOLOTrainer(boxes=[], images=images, class_names=i_class_names, args=args,test_mode=True)
    results = train_task.predict()
    for path, box_class, score, top, left, bottom, right in results:
        r = Region()
        r.region_type = r.DETECTION
        r.confidence = int(100.0 * score)
        r.object_name = "YOLO_{}_{}".format(detector.pk, box_class)
        r.y = top
        r.x = left
        r.w = right - left
        r.h = bottom - top
        r.frame_id = path_to_f[path].pk
        r.video_id = path_to_f[path].video_id
        r.save()
        right = r.w + r.x
        bottom = r.h + r.y
        img = Image.open(path)
        img2 = img.crop((r.x,r.y,right, bottom))
        img2.save("{}/{}/regions/{}.jpg".format(settings.MEDIA_ROOT, video_pk, r.pk))
コード例 #11
0
def create_annotation(form, object_name, labels, frame):
    annotation = Region()
    annotation.object_name = object_name
    if form.cleaned_data['high_level']:
        annotation.full_frame = True
        annotation.x = 0
        annotation.y = 0
        annotation.h = 0
        annotation.w = 0
    else:
        annotation.full_frame = False
        annotation.x = form.cleaned_data['x']
        annotation.y = form.cleaned_data['y']
        annotation.h = form.cleaned_data['h']
        annotation.w = form.cleaned_data['w']
    annotation.text = form.cleaned_data['text']
    annotation.metadata = form.cleaned_data['metadata']
    annotation.frame = frame
    annotation.video = frame.video
    annotation.region_type = Region.ANNOTATION
    annotation.save()
    for lname in labels:
        if lname.strip():
            dl, _ = Label.objects.get_or_create(name=lname, set="UI")
            rl = RegionLabel()
            rl.video = annotation.video
            rl.frame = annotation.frame
            rl.region = annotation
            rl.label = dl
            rl.save()
コード例 #12
0
 detector.save()
 args['class_names'] = i_class_names
 train_task = trainer.YOLOTrainer(boxes=boxes, images=images, args=args)
 train_task.train()
 detector.phase_1_log = file("{}/phase_1.log".format(
     args['root_dir'])).read()
 detector.phase_2_log = file("{}/phase_2.log".format(
     args['root_dir'])).read()
 detector.class_distribution = json.dumps(class_distribution.items())
 detector.class_names = json.dumps(class_names.items())
 detector.trained = True
 detector.save()
 results = train_task.predict()
 bulk_regions = []
 for path, box_class, score, top, left, bottom, right in results:
     r = Region()
     r.region_type = r.ANNOTATION
     r.confidence = int(100.0 * score)
     r.object_name = "YOLO_{}_{}".format(detector.pk, box_class)
     r.y = top
     r.x = left
     r.w = right - left
     r.h = bottom - top
     r.frame_id = path_to_f[path].pk
     r.video_id = path_to_f[path].video_id
     bulk_regions.append(r)
 Region.objects.bulk_create(bulk_regions, batch_size=1000)
 folder_name = "{}/detectors/{}".format(settings.MEDIA_ROOT, detector.pk)
 file_name = '{}/exports/{}.dva_detector.zip'.format(
     settings.MEDIA_ROOT, detector.pk)
 zipper = subprocess.Popen(['zip', file_name, '-r', '.'], cwd=folder_name)