Exemple #1
0
def cli(ctx, fp_in, opt_format, opt_disk, opt_metadata_types, opt_verified, opt_num_pieces):
  """Add mappings data to chain"""

  # -------------------------------------------------
  # imports 

  import os
  
  from tqdm import tqdm

  from vframe.settings.paths import Paths  
  from vframe.utils import file_utils, logger_utils
  
  
  # -------------------------------------------------
  # process 

  log = logger_utils.Logger.getLogger()
  log.info('opt_format: {}'.format(opt_format))
  log.info('opt_disk: {}'.format(opt_disk))
  log.info('opt_metadata_type(s): {}'.format(opt_metadata_types))
  
  if not fp_in:
    fps_in = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, 
      file_format=opt_format, verified=opt_verified)

  log.info('fp_in: {}'.format(fp_in))

  # load the file raw
  data = file_utils.lazyload(fp_in)

  
Exemple #2
0
def cli(ctx, sink, opt_dir_media, opt_disk, opt_density, opt_size_type,
        opt_drawframes):
    """Appends images to ChairItem"""

    # -------------------------------------------------
    # imports

    from os.path import join

    import cv2 as cv

    from vframe.utils import file_utils, logger_utils
    from vframe.settings.paths import Paths

    # -------------------------------------------------
    # initialize

    log = logger_utils.Logger.getLogger()
    log.debug('append images to pipeline')

    # process keyframes
    if not opt_dir_media:
        dir_media = Paths.media_dir(types.Metadata.KEYFRAME,
                                    data_store=opt_disk,
                                    verified=ctx.opts['verified'])
    else:
        dir_media = opt_dir_media

    # -------------------------------------------------
    # process

    while True:

        chair_item = yield

        if chair_item.chair_type == types.ChairItemType.PHOTO:
            chair_item.load_images(dir_media,
                                   opt_size_type,
                                   opt_drawframes=opt_drawframes)
        if chair_item.chair_type == types.ChairItemType.VIDEO:
            pass
            #chair_item.load_images(opt_size_type, opt_drawframes=opt_drawframes)
        if chair_item.chair_type == types.ChairItemType.VIDEO_KEYFRAME:
            chair_item.load_images(opt_size_type,
                                   opt_drawframes=opt_drawframes)
        if chair_item.chair_type == types.ChairItemType.MEDIA_RECORD:
            chair_item.load_images(dir_media,
                                   opt_size_type,
                                   opt_density,
                                   opt_drawframes=opt_drawframes)
        # ------------------------------------------------------------
        # send back to generator

        sink.send(chair_item)
Exemple #3
0
def cli(ctx, sink, fp_out, opt_format, opt_disk, opt_metadata_type, opt_minify, 
  opt_force, opt_interval):
  """Writes items to disk as JSON or Pickle"""

  from vframe.utils import logger_utils
  
  log = logger_utils.Logger.getLogger()

  # construct path
  if not fp_out:
    fp_out = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, 
      file_format=opt_format, verified=ctx.opts['verified'])

  # accumulate items
  chair_items = []
  interval_count = 0

  while True:

    chair_item = yield
    chair_items.append(chair_item)

    if len(chair_items) >= opt_interval:
      # save data
      log.debug('chair_items: {}'.format(len(chair_items)))
      mapping_items = file_utils.chair_to_mapping(chair_items)
      log.debug('mapping_items: {}'.format(len(mapping_items)))
      
      # checkpoint filename
      fpp_out = Path(fp_out)
      ckpt_suffix = '{}_{}'.format(interval_count*opt_interval, ((interval_count+1)*opt_interval))
      fp_out = join(str(fpp_out.parent), '{}_{}{}'.format(fpp_out.stem, ckpt_suffix, fpp_out.suffix))

      # # write to disk
      log.debug('saving checkpoint to: {}'.format(fp_out))
      file_utils.write_serialized_items(mapping_items, fp_out, ensure_path=True, minify=opt_minify)
      
      # dump data
      interval_count += 1

      for chair_item in chair_items:
        chair_item.media_record.remove_metadata(opt_metadata_type)

      chair_items = []

    sink.send(chair_item)
Exemple #4
0
def cli(ctx, sink, fp_in, opt_format, opt_disk):
    """Add mappings data to chain"""

    import os
    import logging

    from tqdm import tqdm

    from vframe.settings.paths import Paths
    from vframe.utils import file_utils, logger_utils
    from vframe.models.chair_item import MediaRecordChairItem
    from vframe.utils import logger_utils

    log = logger_utils.Logger.getLogger()

    log.info('opt_format: {}'.format(opt_format))
    log.info('opt_disk: {}'.format(opt_disk))

    if not fp_in:
        fp_in = Paths.media_record_index(data_store=opt_disk,
                                         file_format=opt_format,
                                         verified=ctx.opts['verified'])

    # load mappings
    # TODO make multithreaded
    log.info('opening: {}'.format(fp_in))
    media_records = file_utils.load_records(fp_in)

    # update ctx variable
    log.debug('set num items: {}'.format(len(media_records)))
    ctx.opts['num_items'] = len(media_records)
    # ctx.opts['chair_type'] = ChairItemType.MEDIA_RECORD

    # begin processing
    if not media_records or not ctx.opts['num_items'] > 0:
        log.error('no media_record available to process')
        return
    else:
        log.info('dispatching {:,} records...'.format(ctx.opts['num_items']))
        for sha256, media_record in tqdm(media_records.items()):
            sink.send(MediaRecordChairItem(ctx, media_record))
Exemple #5
0
def cli(ctx, opt_fp_neg, opt_dir_project, opt_disk, opt_size):
  """Generates negative images"""

  # ------------------------------------------------
  # imports
  import os
  from os.path import join
  from glob import glob
  from pathlib import Path

  from vframe.utils import logger_utils, im_utils, file_utils
  from vframe.settings.paths import Paths

  log = logger_utils.Logger.getLogger()
  log.debug('negative mining')

  dir_media_unver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.UNVERIFIED)
  dir_media_ver = Paths.media_dir(types.Metadata.KEYFRAME, data_store=opt_disk, verified=types.Verified.VERIFIED)
  opt_size_label = cfg.IMAGE_SIZE_LABELS[opt_size]

  fp_train_neg = join(opt_dir_project, vcat_cfg.FP_TRAIN_NEGATIVES)
  dir_labels_negative = join(opt_dir_project, vcat_cfg.DIR_LABELS_NEGATIVE)
  dir_negative = join(opt_dir_project, vcat_cfg .DIR_IMAGES_NEGATIVE)

  file_utils.mkdirs(dir_negative)
  file_utils.mkdirs(dir_labels_negative)
  
  negative_list = pd.read_csv(opt_fp_neg)
  negative_list['description'] = negative_list['description'].fillna('')  # ensure not empty
  # negative_list['desc'] = negative_list['desc'].astype('str') 
  neg_training_files = []

  # for sha256 in sha256_list[:35]:
  for i, row in negative_list.iterrows():
    sha256 = row['sha256']
    sha256_tree = file_utils.sha256_tree(sha256)
    ver_list = glob(join(dir_media_ver, sha256_tree, sha256, "*"))
    unver_list = glob(join(dir_media_unver, sha256_tree, sha256, "*"))
    dir_frames = ver_list + unver_list

    log.debug('adding {} frames about "{}"'.format(len(dir_frames), row['description']))

    for dir_frame in dir_frames:
      frame_idx = Path(dir_frame).stem
      fp_keyframe_src = join(dir_frame, opt_size_label, 'index.jpg')
      fpp_keyframe_src = Path(fp_keyframe_src)
      if fpp_keyframe_src.exists():
        # create symlinked image
        fpp_keyframe_dst = Path(join(dir_negative, '{}_{}.jpg'.format(sha256, frame_idx)))
        if fpp_keyframe_dst.exists() and fpp_keyframe_dst.is_symlink():
          fpp_keyframe_dst.unlink()
        fpp_keyframe_dst.symlink_to(fpp_keyframe_src)
        # create empty label
        fp_label_txt = join(dir_labels_negative, '{}_{}.txt'.format(sha256, frame_idx))
        with open(fp_label_txt, 'w') as fp:
          fp.write('')
        # and, add this file to the training list
        neg_training_files.append(str(fpp_keyframe_dst))


  # for each keyframe if it exists
  log.info('writing {} lines to: {}'.format(len(neg_training_files), fp_train_neg))
  file_utils.write_text(neg_training_files, fp_train_neg)
  
  # add prompt
  log.info('mv labels_negative/*.txt labels/')
  log.info('mv images_negative/*.jpg images/')
Exemple #6
0
def cli(ctx, sink, fp_in, opt_format, opt_disk, opt_metadata_types):
    """Appends metadata to media record"""

    # -------------------------------------------------
    # imports

    import os

    from tqdm import tqdm

    from vframe.settings.paths import Paths
    from vframe.utils import file_utils, logger_utils
    from vframe.models.chair_item import ChairItem

    # -------------------------------------------------
    # process

    log = logger_utils.Logger.getLogger()
    log.info('fp_in: {}'.format(fp_in))
    log.info('opt_format: {}'.format(opt_format))
    log.info('opt_disk: {}'.format(opt_disk))
    log.info('opt_metadata_type(s): {}'.format(opt_metadata_types))

    if not fp_in:
        fps_in = [
            Paths.metadata_index(opt_metadata_type,
                                 data_store=opt_disk,
                                 file_format=opt_format,
                                 verified=ctx.opts['verified'])
            for opt_metadata_type in opt_metadata_types
        ]

    # accumulate items
    chair_items = []
    while True:
        try:
            chair_items.append((yield))
        except GeneratorExit as ex:
            break

    # ------------------------------------------------------------------------
    # append items

    for opt_metadata_type, fp_in in zip(opt_metadata_types, fps_in):

        log.debug('opening: {}'.format(fp_in))
        media_records = file_utils.load_records(fp_in)

        if not media_records:
            log.error(
                'no metadata items or file. check "-d" / "--disk" and try again'
            )
            return

        log.debug('appending: {}'.format(opt_metadata_type.name.lower()))

        for chair_item in tqdm(chair_items):
            sha256 = chair_item.sha256
            metadata = media_records[sha256].get_metadata(opt_metadata_type)
            chair_item.media_record.set_metadata(opt_metadata_type, metadata)

    # ------------------------------------------------
    # rebuild the generator
    for chair_item in tqdm(chair_items):
        sink.send(chair_item)
Exemple #7
0
def cli(ctx, sink, opt_metadata, opt_disk, opt_stroke_width, opt_stroke_color,
        opt_text_color):
    """Displays images"""

    # -------------------------------------------------
    # imports

    import os

    import cv2 as cv
    import numpy as np

    from vframe.settings.paths import Paths
    from vframe.utils import file_utils, logger_utils, chair_utils

    # -------------------------------------------------
    # init

    log = logger_utils.Logger.getLogger()

    # load class labels
    if opt_metadata == types.Metadata.COCO:
        opt_net = types.DetectorNet.COCO
        fp_classes = Paths.darknet_classes(data_store=opt_disk,
                                           opt_net=opt_net)
        classes = file_utils.load_text(fp_classes)  # returns list in idx order
    if opt_metadata == types.Metadata.OPENIMAGES:
        opt_net = types.DetectorNet.OPENIMAGES
        fp_classes = Paths.darknet_classes(data_store=opt_disk,
                                           opt_net=opt_net)
        classes = file_utils.load_text(fp_classes)  # returns list in idx order
    elif opt_metadata == types.Metadata.SUBMUNITION:
        opt_net = types.DetectorNet.SUBMUNITION
        fp_classes = Paths.darknet_classes(data_store=opt_disk,
                                           opt_net=opt_net)
        classes = file_utils.load_text(fp_classes)  # returns list in idx order
    elif opt_metadata == types.Metadata.PLACES365:
        opt_net = types.ClassifyNet.PLACES365
        # TODO add class file
    elif opt_metadata == types.Metadata.TEXT_ROI:
        pass
    elif opt_metadata == types.Metadata.FACE_ROI:
        pass

    # get colors for stroke
    colors = get_color_map(cmap='autumn', reverse=True, ncolors=len(classes))

    # TODO externalize function

    # -------------------------------------------------
    # process

    while True:

        chair_item = yield

        drawframes = {}  # new drawframes

        # ---------------------------------------------------------------
        # draw on images, assume detection results (not classify)

        detection_metadata = chair_item.get_metadata(opt_metadata)

        for frame_idx in chair_item.drawframes.keys():

            drawframe = chair_item.drawframes.get(frame_idx)
            imh, imw = drawframe.shape[:2]

            detection_results = detection_metadata.metadata.get(frame_idx)

            for detection_result in detection_results:

                if opt_metadata == types.Metadata.COCO \
                  or opt_metadata == types.Metadata.SUBMUNITION \
                  or opt_metadata == types.Metadata.VOC \
                  or opt_metadata == types.Metadata.OPENIMAGES:
                    # draw object detection boxes and labels
                    log.debug(detection_result)
                    frame = draw_utils.draw_detection_result(
                        drawframe,
                        classes,
                        detection_result,
                        imw,
                        imh,
                        stroke_weight=opt_stroke_width,
                        rect_color=colors[detection_result.idx],
                        text_color=opt_text_color)

                elif opt_metadata == types.Metadata.TEXT_ROI:
                    frame = draw_utils.draw_roi(drawframe,
                                                detection_result,
                                                imw,
                                                imh,
                                                text='TEXT',
                                                stroke_weight=opt_stroke_width,
                                                rect_color=opt_stroke_color,
                                                text_color=opt_text_color)
                elif opt_metadata == types.Metadata.FACE_ROI:
                    frame = draw_utils.draw_roi(drawframe,
                                                detection_result,
                                                imw,
                                                imh,
                                                text='FACE',
                                                stroke_weight=opt_stroke_width,
                                                rect_color=opt_stroke_color,
                                                text_color=opt_text_color)

            # add to current items drawframes dict
            drawframes[frame_idx] = drawframe

        chair_item.set_drawframes(drawframes)

        # ------------------------------------------------
        # rebuild the generator
        sink.send(chair_item)
Exemple #8
0
def cli(ctx, fp_in, opt_metadata_type, opt_type, opt_verified, opt_id,
        opt_disk, opt_format):
    """search for info with ID"""

    # -------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    from vframe.utils import file_utils, logger_utils
    from vframe.settings.paths import Paths
    from vframe.settings import vframe_cfg as cfg

    # -------------------------------------------------
    # process

    log = logger_utils.Logger.getLogger()
    log.debug('opt_type: {}, opt_metadata_type: {}'.format(
        opt_type, opt_metadata_type))

    # if not opt_type:
    #   # auto guess
    #   nchars = len(opt_id)
    #   if nchars == 64:
    #     opt_types = [types.SearchParam.SA_ID, types.SearchParam.SHA256]
    #   elif nchars == 32:
    #     opt_type = [types.SearchParam.MD5]
    #   else:
    #     log.error('id not a valid format. use either 32-hex MD5 or 64-hex SHA256')
    #     return

    if not fp_in:
        if opt_metadata_type:
            fp_in = Paths.metadata_index(data_store=opt_disk,
                                         file_format=opt_format,
                                         verified=opt_verified,
                                         metadata_type=opt_metadata_type)
            # use source media_records
        else:
            fp_in = Paths.media_record_index(data_store=opt_disk,
                                             file_format=opt_format,
                                             verified=opt_verified)

    log.info('opening: {}'.format(fp_in))

    media_records = file_utils.load_records(fp_in)

    log.info('searching {:,} media records for {}: {}'.format(
        len(media_records), opt_type, opt_id))

    found_items = []
    for sha256, media_record in media_records.items():

        if opt_type == types.SearchParam.SHA256:
            # quick match sha256
            if opt_id == sha256:
                found_items.append(media_record)
                break
        else:
            # get sc metadata
            sugarcube_metadata = media_record.get_metadata(
                types.Metadata.SUGARCUBE)

            if not sugarcube_metadata:
                log.error('no sugarcube metadata. Try "append -t sugarcube"')
                return

            # match other params
            if opt_type == types.SearchParam.SA_ID:
                if opt_id == sugarcube_metadata.sa_id:
                    found_items.append(media_record)
                    break
            elif opt_type == types.SearchParam.MD5:
                if opt_id == sugarcube_metadata.md5:
                    found_items.append(media_record)
                    break

    if not len(found_items) > 0:
        log.error('No results')
    else:
        log.info('{} item found'.format(len(found_items)))

        metadata_records = media_record.metadata
        log.debug('sha256: {}'.format(media_record.sha256))
        log.debug('\tformat: {}'.format(media_record.media_format))
        log.debug('\tverified: {}'.format(media_record.verified))
        if opt_metadata_type:
            for metadata_type, metadata_obj in metadata_records.items():
                log.debug('\ttype: {}'.format(metadata_type))
                try:
                    log.debug('\tmetadata: {}'.format(
                        metadata_obj.serialize()))
                except Exception as ex:
                    log.debug('\tmetadata: {}'.format(metadata_obj.__dict__))
Exemple #9
0
def cli(ctx, fp_in, fp_out, opt_disk, opt_verified, opt_format_in,
        opt_format_out, opt_metadata_type, opt_minify, opt_force):
    """Converts JSON to Pickle"""

    # -------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    import click_spinner

    from vframe.settings import vframe_cfg as cfg
    from vframe.settings.paths import Paths
    from vframe.settings import types
    from vframe.utils import file_utils, logger_utils

    # -------------------------------------------------
    # process

    log = logger_utils.Logger.getLogger()

    if not opt_metadata_type and not fp_in:
        log.error(
            'Error: missing option for either "-t" / "--type" or "-i" / "--input"'
        )
        return

    if not fp_in:
        fp_in = Paths.metadata_index(opt_metadata_type,
                                     data_store=opt_disk,
                                     file_format=opt_format_in,
                                     verified=opt_verified)

    if not fp_out:
        fpp_in = Path(fp_in)
        ext = opt_format_out.name.lower()
        fp_out = join(str(fpp_in.parent), '{}.{}'.format(fpp_in.stem, ext))

    # check again
    ext_in, ext_out = (file_utils.get_ext(fp_in), file_utils.get_ext(fp_out))
    if ext_in == ext_out or opt_format_in == opt_format_out:
        ctx.fail('Cannot convert from "{}" to "{}" (same)'.format(
            ext_in, ext_in))

    if Path(fp_out).exists() and not opt_force:
        log.error(
            'Files exists. Use "-f/--force" to overwrite. {}'.format(fp_out))
    else:
        with click_spinner.spinner():
            log.info('Converting {} to {}'.format(fp_in, fp_out))
            if ext_out == types.FileExt.PKL.name.lower():
                file_utils.write_pickle(file_utils.load_json(fp_in), fp_out)
            elif ext_out == types.FileExt.JSON.name.lower():
                file_utils.write_json(file_utils.load_pickle(fp_in),
                                      fp_out,
                                      minify=opt_minify)

        # compare sizes
        size_src = os.path.getsize(fp_in) / 1000000
        size_dst = os.path.getsize(fp_out) / 1000000
        per = size_dst / size_src * 100
        txt_verb = 'increased' if size_dst > size_src else 'decreased'
        log.info('Size {} from {:.2f}MB to {:.2f}MB ({:.2f}%)'.format(
            txt_verb, size_src, size_dst, per))
def cli(ctx, sink, opt_disk, opt_density):
    """Generates KeyframeStatus metadata"""
    # Recommended: Use Expanded density to check for all keyframes

    # -------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    from vframe.settings.paths import Paths
    from vframe.settings import vframe_cfg as cfg
    from vframe.utils import file_utils, logger_utils

    from vframe.models.metadata_item import KeyframeStatusMetadataItem

    # -------------------------------------------------
    # process

    log = logger_utils.Logger.getLogger()

    # set paths
    media_type = types.Metadata.KEYFRAME
    metadata_type = types.Metadata.KEYFRAME_STATUS
    dir_keyframes = Paths.media_dir(media_type,
                                    data_store=opt_disk,
                                    verified=ctx.opts['verified'])

    # iterate sink
    while True:
        chair_item = yield
        sha256 = chair_item.sha256
        sha256_tree = file_utils.sha256_tree(sha256)
        dir_parent = join(dir_keyframes, sha256_tree, sha256)

        # check if keyframe metadata exists
        keyframe_metadata_item = chair_item.item.get_metadata(
            types.Metadata.KEYFRAME)
        if not keyframe_metadata_item:
            log.error(
                'no keyframe metadata. try "append -t keyframe", {}'.format(
                    keyframe_metadata_item))
            chair_item.item.set_metadata(metadata_type, {})
        else:
            # check if the keyframes images exist
            status = {k: False for k in cfg.IMAGE_SIZE_LABELS}
            if Path(dir_parent).exists():

                # get keyframe numbers
                idxs = keyframe_metadata_item.get_keyframes(opt_density)

                for idx in idxs:
                    labels = [v for k, v in cfg.IMAGE_SIZE_LABELS.items()]
                    for k, label in cfg.IMAGE_SIZE_LABELS.items():
                        fpp_im = Path(dir_parent, file_utils.zpad(idx), label,
                                      'index.jpg')
                        if fpp_im.exists():
                            status[k] = True

                # append metadata to chair_item's mapping item
                chair_item.item.set_metadata(
                    metadata_type, KeyframeStatusMetadataItem(status))

        # -------------------------------------------------
        # continue processing other items

        sink.send(chair_item)
Exemple #11
0
def cli(ctx, fp_in, fp_out, opt_media_record_type, opt_client_record_type,
        opt_disk, opt_media_format_type, opt_format, opt_verified, opt_minify,
        opt_force):
    """Generates dataset records"""

    # greeet

    #
    import os
    from os.path import join
    from pathlib import Path

    from tqdm import tqdm

    from vframe.settings.paths import Paths
    from vframe.utils import file_utils, logger_utils
    from vframe.settings import vframe_cfg as cfg
    from vframe.models.media_item import MediaRecordItem
    from vframe.models.chair_item import ChairItem

    # -------------------------------------------------
    # process here
    metadata_type = types.Metadata.MEDIA_RECORD
    log = logger_utils.Logger.getLogger()
    if not fp_out:
        fp_out = Paths.metadata_index(metadata_type,
                                      data_store=opt_disk,
                                      file_format=opt_format,
                                      verified=opt_verified)

    log.debug('fp_in: {}'.format(fp_in))
    log.debug('fp_in: {}'.format(fp_out))
    log.debug('opt_disk: {}'.format(opt_disk))
    log.debug('opt_media_format_type: {}'.format(opt_media_format_type))
    log.debug('opt_media_record_type: {}'.format(opt_media_record_type))
    log.debug('opt_verified: {}'.format(opt_verified))

    # input error handling
    if opt_media_format_type == types.MediaFormat.PHOTO:
        log.error('Option not available: {}'.format(types.MediaFormat.PHOTO))
        return
    if opt_media_record_type != types.MediaRecord.SHA256:
        log.error('Option not available: {}'.format(opt_media_record_type))
        return
    if opt_client_record_type != types.ClientRecord.SUGARCUBE:
        log.error('Option not available: {}'.format(opt_media_record_type))
        return

    # handle different types of input records
    if opt_client_record_type == types.ClientRecord.SUGARCUBE:
        # generate records from Sugarcube client export data

        verified_status = True if opt_verified is types.Verified.VERIFIED else False
        # sa_id,sha256,md5,location,verified
        csv_rows = file_utils.load_csv(fp_in)  # as list

        # remap as sugarcube item
        media_records = {}
        # map sugarcube items

        log.debug('mapping {:,} entries to {}'.format(len(csv_rows),
                                                      opt_media_record_type))
        for row in tqdm(csv_rows):

            sha256 = row.get('sha256', None)
            fp_media = row.get('location', None)
            is_verified = row.get('verified', '').lower() == 'true'
            verified = types.Verified.VERIFIED if is_verified else types.Verified.UNVERIFIED

            if sha256 and fp_media and len(
                    sha256) == 64 and verified == opt_verified:
                ext = file_utils.get_ext(fp_media)
                media_format = file_utils.ext_media_format(
                    ext)  # enums.MediaType
                if media_format == opt_media_format_type:
                    media_records[sha256] = MediaRecordItem(
                        sha256, media_format, verified)

        log.debug('non-filtered: {:,} records'.format(len(media_records)))

        log.debug('fp_out: {}'.format(fp_out))
        file_utils.write_serialized_items(media_records,
                                          fp_out,
                                          ensure_path=True,
                                          minify=opt_minify)

        # -------------------------------------------------
Exemple #12
0
def cli(ctx, sink, opt_disk, opt_net, opt_gpu):
  """Generates detection metadata (CV DNN)"""

  # ----------------------------------------------------------------
  # imports

  import os
  from os.path import join
  from pathlib import Path

  import click
  import cv2 as cv
  import numpy as np

  from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils
  from vframe.models.metadata_item import DetectMetadataItem, DetectResult
  from vframe.settings.paths import Paths

  # ----------------------------------------------------------------
  # init

  log = logger_utils.Logger.getLogger()


  # TODO externalize function

  # initialize dnn
  dnn_clr = (0, 0, 0)  # mean color to subtract
  dnn_scale = 1/255  # ?
  nms_threshold = 0.4   #Non-maximum suppression threshold
  dnn_px_range = 1  # pixel value range ?
  dnn_crop = False  # probably crop or force resize

  # Use mulitples of 32: 416, 448, 480, 512, 544, 576, 608, 640, 672, 704
  if opt_net == types.DetectorNet.OPENIMAGES:
    metadata_type = types.Metadata.OPENIMAGES
    dnn_size = (608, 608)
    dnn_threshold = 0.875
  elif  opt_net == types.DetectorNet.COCO:
    metadata_type = types.Metadata.COCO
    dnn_size = (416, 416)
    dnn_threshold = 0.925
  elif  opt_net == types.DetectorNet.COCO_SPP:
    metadata_type = types.Metadata.COCO
    dnn_size = (608, 608)
    dnn_threshold = 0.875
  elif  opt_net == types.DetectorNet.VOC:
    metadata_type = types.Metadata.VOC
    dnn_size = (416, 416)
    dnn_threshold = 0.875
  elif  opt_net == types.DetectorNet.SUBMUNITION:
    metadata_type = types.Metadata.SUBMUNITION
    dnn_size = (608, 608)
    dnn_threshold = 0.90

  # Initialize the parameters
  fp_cfg = Paths.darknet_cfg(opt_net=opt_net, data_store=opt_disk, as_bytes=False)
  fp_weights = Paths.darknet_weights(opt_net=opt_net, data_store=opt_disk, as_bytes=False)
  fp_data = Paths.darknet_data(opt_net=opt_net, data_store=opt_disk, as_bytes=False)
  fp_classes = Paths.darknet_classes(opt_net=opt_net, data_store=opt_disk)
  class_names = file_utils.load_text(fp_classes)
  class_idx_lookup = {label: i for i, label in enumerate(class_names)}

  log.debug('fp_cfg: {}'.format(fp_cfg))
  log.debug('fp_weights: {}'.format(fp_weights))
  log.debug('fp_data: {}'.format(fp_data))
  log.debug('fp_classes: {}'.format(fp_classes))

  net = cv.dnn.readNetFromDarknet(fp_cfg, fp_weights)
  net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
  net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

  # ----------------------------------------------------------------
  # process

  # iterate sink
  while True:
    
    chair_item = yield
    
    metadata = {}
    
    for frame_idx, frame in chair_item.keyframes.items():

      frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1])
      blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_clr, 
        dnn_px_range, crop=dnn_crop)
      
      # Sets the input to the network
      net.setInput(blob)

      # Runs the forward pass to get output of the output layers
      net_outputs = net.forward(dnn_utils.getOutputsNames(net))
      det_results = dnn_utils.nms_cvdnn(net_outputs, dnn_threshold, nms_threshold)
      
      metadata[frame_idx] = det_results

    # append metadata to chair_item's mapping item
    chair_item.set_metadata(metadata_type, DetectMetadataItem(metadata))
  
    # ----------------------------------------------------------------
    # yield back to the processor pipeline

    # send back to generator
    sink.send(chair_item)
Exemple #13
0
def cli(ctx, sink, opt_disk, opt_format, opt_metadata_tree_type):
  """Collate depated metadata tree files"""

  # -------------------------------------------------
  # imports
  
  import click
  from pathlib import Path
  from tqdm import tqdm

  from vframe.settings import vframe_cfg as cfg
  from vframe.settings.paths import Paths
  from vframe.utils import file_utils, logger_utils
  from vframe.models.metadata_item import MediainfoMetadataItem, KeyframeMetadataItem

  from cli_vframe import processor

  
  # -------------------------------------------------
  # process

  log = logger_utils.Logger.getLogger()

  if opt_metadata_tree_type == types.MetadataTree.MEDIAINFO_TREE:
    metdata_type = types.Metadata.MEDIAINFO
  if opt_metadata_tree_type == types.MetadataTree.KEYFRAME_TREE:
    metdata_type = types.Metadata.KEYFRAME
  
  dir_metadata = Paths.metadata_tree_dir(opt_metadata_tree_type, data_store=opt_disk)

  # accumulate chair items
  chair_items = []
  while True:
    try:
      chair_items.append( (yield) )
    except GeneratorExit as ex:
      break

  skipped = []
  num_skipped = 0
  found = []
  num_found = 0

  # iterate chair items and gather metadata index.json files
  num_items = len(chair_items)
  for chair_item in tqdm(chair_items):
    item = chair_item.item
    sha256 = item.sha256
    sha256_tree = file_utils.sha256_tree(sha256)
    fpp_metadata = Path(dir_metadata, sha256_tree, sha256, 'index.json')
    
    # skip if not existing 
    metadata = {}
    if fpp_metadata.exists():
      try:
        metadata = file_utils.lazyload(fpp_metadata)
      except Exception as ex:
        log.error('could not read json: {}, ex: {}'.format(str(fpp_metadata), ex))
        continue
    
    # count items skipped
    if not metadata:
      skipped.append(fpp_metadata)
      num_skipped = len(skipped)
      per = num_skipped / (num_found + num_skipped) * 100
      # log.debug('{:.2f}% ({:,}/{:,}) not found: {}'.format(per, num_skipped, (num_found + num_skipped), str(fpp_metadata)))
      log.debug('{:.2f}% ({:,}/{:,}) missing'.format(per, num_skipped, (num_found + num_skipped)))
      chair_item.item.set_metadata(metdata_type, metadata_obj)
    else:
      found.append(fpp_metadata)
      num_found = len(found)
      # construct and append metadata
      if metdata_type == types.Metadata.MEDIAINFO:
        metadata_obj = MediainfoMetadataItem.from_index_json(metadata)
        chair_item.item.set_metadata(metdata_type, metadata_obj)
      elif metdata_type == types.Metadata.KEYFRAME:
        metadata_obj = KeyframeMetadataItem.from_index_json(metadata)
        chair_item.item.set_metadata(metdata_type, metadata_obj)
      else:
        raise ValueError('{} is not a valid metadata type'.format(metdata_type))

  log.info('skipped: {:,} items'.format(len(skipped)))


  # -------------------------------------------------
  # rebuild

  for chair_item in chair_items:
      sink.send(chair_item)
Exemple #14
0
def cli(ctx, sink, fp_out, opt_format, opt_disk, opt_metadata_type, 
  opt_minify, opt_force, opt_suffix, opt_ckpt_size, opt_purge):
  """Writes items to disk as JSON or Pickle"""

  
  # ------------------------------------------------------
  # imports

  import sys
  from os.path import join
  from pathlib import Path
  from collections import OrderedDict
  import gc
  import copy
  import numpy as np

  from vframe.settings.paths import Paths
  from vframe.utils import file_utils, click_utils
  from vframe.utils import logger_utils
  from vframe.models.chair_item import MediaRecordChairItem
  

  # --------------------------------------------------------
  # init

  log = logger_utils.Logger.getLogger()

  if not fp_out:
    fp_out = Paths.metadata_index(opt_metadata_type, data_store=opt_disk, 
      file_format=opt_format, verified=ctx.opts['verified'])
  
  fpp_out = Path(fp_out)
  
  if opt_suffix:
    fp_out = join(str(fpp_out.parent), '{}_{}{}'.format(fpp_out.stem, opt_suffix, fpp_out.suffix))
  
  def create_ckpt_fpaths(num_items, opt_ckpt_size):
    ckpts = list(range(0, num_items, opt_ckpt_size))
    if np.max(np.array(ckpts)) < num_items:
      ckpts.append(num_items)

    for i, ckpt in enumerate(ckpts[:-1]):
      n_start = file_utils.zpad(ckpt, num_zeros=cfg.CKPT_ZERO_PADDING)
      n_end = file_utils.zpad(ckpts[i+1], num_zeros=cfg.CKPT_ZERO_PADDING)
      ckpt_suffix = 'ckpt_{}_{}{}'.format(n_start, n_end, fpp_out.suffix)  # 0_10.pkl
      fp = join(str(fpp_out.parent), '{}_{}'.format(fpp_out.stem, ckpt_suffix))
      ckpt_fpaths.append(fp)

    return ckpt_fpaths

  # --------------------------------------------------------
  # checkpoint interval saving
  
  if opt_ckpt_size:
   
    # save items every N iterations
    yield_count = 0
    ckpt_iter_num = 0
    # chair_items = OrderedDict({})
    chair_items = []
    ckpt_fpaths = []

    while True:
      
      chair_item = yield
      yield_count += 1

      # ctx variables can only be accessed after processor starts
      # hack: set filepaths after while/yield loop starts
      if not ckpt_fpaths:
        num_items = ctx.opts['num_items']
        ckpt_fpaths = create_ckpt_fpaths(num_items, opt_ckpt_size)
        log.debug('{}'.format(ckpt_fpaths))
        # ensure it does not already exist
        for fp in ckpt_fpaths:
          if Path(fp).exists() and not opt_force:
            log.error('File "{}" exists. Use "-f" to override'.format(fp))
            log.error('This error occurs later because it uses variables from the processor context')
            return

      # accumulate chair items
      chair_items.append(chair_item)

      if (yield_count > 0 and yield_count % opt_ckpt_size == 0) or yield_count >= num_items:
        
        fp_out = ckpt_fpaths[ckpt_iter_num]
        # convert chair items to media records
        log.debug('chair_items: {}'.format(len(chair_items)))
        mapping_items = file_utils.chair_to_mapping(chair_items)
        # write to disk
        log.debug('fp_out: {}'.format(fp_out))
        file_utils.write_serialized_items(mapping_items, fp_out, 
          ensure_path=True, minify=opt_minify)
        
        # TODO improve this
        #
        # purge metadata,        
        for chair_item in chair_items:
          chair_item.purge_metadata()
        
        chair_items = []
        mapping_items = []
        ckpt_iter_num += 1
        # collect/empty garbage
        gc.collect()

      # continue chair processors
      sink.send(chair_item)


  else:

    # --------------------------------------------------------
    # save all 
  

    # save all items
    # exit if file exists
    if Path(fp_out).exists() and not opt_force:
      m = 'File "{}" exists. Use "-f" to override'.format(fp_out)
      log.error(m)
      return

    # accumulate items
    chair_items = []
    while True:
      try:
        chair_items.append( (yield) )
      except GeneratorExit as ex:
        break

    if not len(chair_items) > 0:
      log.error('no items to save')
      return

    # convert chair items to media records
    log.debug('chair_items: {}'.format(len(chair_items)))
    mapping_items = file_utils.chair_to_mapping(chair_items)
    log.debug('mapping_items: {}'.format(len(mapping_items)))
    
    # write to disk
    log.debug('fp_out: {}'.format(fp_out))
    file_utils.write_serialized_items(mapping_items, fp_out, ensure_path=True, minify=opt_minify)

    # rebuild the generator
    for chair_item in chair_items:
      sink.send(chair_item)