def tbl_log(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('LOG')
    logs = db.get_collection(tblname)
    cur = logs.find({})

    rpt = {
        'total_items': 0,
        'unique_rel_filenames': set(),
        'all_rel_filenames': [],
        'total_unique_rel_filenames': 0,
        'total_rel_filenames': 0
    }

    for item in cur:
        rpt['unique_rel_filenames'].add(item['rel_filename'])
        rpt['all_rel_filenames'].append(item['rel_filename'])
        rpt['total_items'] += 1

    rpt['total_unique_rel_filenames'] = len(rpt['unique_rel_filenames'])
    rpt['total_rel_filenames'] = len(rpt['all_rel_filenames'])

    log.debug("=> len(unique_rel_filenames): {}".format(
        len(rpt['unique_rel_filenames'])))
    log.debug("=> total_unique_rel_filenames: {}".format(
        rpt['total_unique_rel_filenames']))
    log.debug("total_rel_filenames: {}".format(rpt['total_rel_filenames']))
    log.debug("total_items: {}".format(rpt['total_items']))
    log.debug("---x---x---x---\n")

    return rpt
def create_modelinfo(args, cfg, db):
  log.info("----------------------------->")
  from_path = args.from_path
  if not from_path:
    raise Exception('from_path not defined')
  if not os.path.exists(from_path) or not os.path.isfile(from_path):
    raise Exception('File does not exists: {}'.format(from_path))
  
  ##TODO: for the entire directory
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp

  tblname = annonutils.get_tblname('MODELINFO')
  # annonutils.create_unique_index(db, tblname, 'created_on')
  annonutils.create_unique_index(db, tblname, 'weights_path')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': data['created_on']}
    ,{'$setOnInsert': data}
    ,upsert=True
  )
def tbl_release(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('RELEASE')
    logs = db.get_collection(tblname)

    rpt = {'total_items': 0, 'rel_ids': [], 'total_annon_file_processed': 0}

    if logs:
        cur = logs.find({}, {'_id': 0})
        if cur:
            for item in cur:
                rel = {}
                rel[item['rel_id']] = item['timestamp']
                rpt['rel_ids'].append(rel)
                rpt['total_annon_file_processed'] += item[
                    'total_annon_file_processed']
                rpt['total_items'] += 1

    log.debug("=> len(rel_ids): {}".format(len(rpt['rel_ids'])))
    log.debug("rel_ids: {}".format(rpt['rel_ids']))
    log.debug("total_annon_file_processed: {}".format(
        rpt['total_annon_file_processed']))
    log.debug("total_items: {}".format(rpt['total_items']))
    log.debug("---x---x---x---\n")

    return rpt
def tbl_modelinfo(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('MODELINFO')
    modelinfo = db.get_collection(tblname)

    rpt = {
        'total_items': 0,
        'total_annon_file_processed': 0,
        'model_ids': None,
        'weights_path': None
    }

    if modelinfo:
        items = list(modelinfo.find({}, {'_id': 0}))
        rpt['total_items'] = len(items)
        rpt['model_ids'] = [o['uuid'] for o in items]
        rpt['weights_path'] = [o['weights_path'] for o in items]

    log.debug("items: {}".format(items))
    log.debug("len(items): {}".format(rpt['total_items']))
    log.debug("model_ids: {}".format(rpt['model_ids']))
    log.debug("weights_path: {}".format(rpt['weights_path']))
    log.debug("---x---x---x---\n")

    return rpt
def tbl_classinfo(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('CLASSINFO')

    classinfo = db.get_collection(tblname)
    cur = classinfo.find()

    rpt = {
        'total_items': 0,
        'unique_lbl_ids': set(),
        'lbl_ids': [],
        'total_unique_lbl_ids': 0,
        'total_lbl_ids': 0
    }

    lbl_ids = []
    for item in cur:
        rpt['unique_lbl_ids'].add(item['lbl_id'])
        rpt['lbl_ids'].append(item['lbl_id'])
        rpt['total_items'] += 1

    rpt['total_unique_lbl_ids'] = len(rpt['unique_lbl_ids'])
    rpt['total_lbl_ids'] = len(rpt['lbl_ids'])

    log.debug("=> len(unique_lbl_ids): {}".format(len(rpt['unique_lbl_ids'])))
    log.debug("=> total_unique_lbl_ids: {}".format(
        rpt['total_unique_lbl_ids']))
    log.debug("total_lbl_ids: {}".format(rpt['total_lbl_ids']))
    log.debug("total_items: {}".format(rpt['total_items']))
    log.debug("---x---x---x---\n")

    return rpt
Example #6
0
def save_Label(cfg, Label, dst_dir=None, db=None):
  if len(Label) > 0:
    ## TODO
    # tblname = 'LABELS'
    # annonutils.write2db(db, tblname, list(Label.values()))
    tblname = annonutils.get_tblname('CLASSINFO')
    colors = common.random_colors(len(Label))
    class_info = annonutils.get_class_info(Label, colors=colors)

    log.info("len(Label): {}".format(len(Label)))
    log.info("Label: {}".format(Label))
    save_to_file = cfg['SAVE_TO_FILE']
    if save_to_file:
      lbl_filename = os.path.join(dst_dir,os.path.basename(dst_dir)+'-'+cfg['FILES']['LABELS'])
      log.info("lbl_filename: {}".format(lbl_filename))
      # db[tblname].insert_many(list(Label.values()))
      with open(lbl_filename,'w') as fw:
        fw.write(json.dumps(Label))

      classinfo_filename = os.path.join(dst_dir,os.path.basename(dst_dir)+'-'+cfg['FILES']['CLASSINFO'])
      log.info("classinfo_filename, tblname: {}, {}".format(classinfo_filename, tblname))

      with open(classinfo_filename,'w') as fw:
        json.dump(class_info,fw)
    else:
      log.info("tblname: {}".format(tblname))
      annonutils.write2db(db, tblname, class_info, idx_col='lbl_id')
Example #7
0
def _create_modelinfo(from_path, dbname, db):
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()}

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp
  data['filename'] = from_path.split(os.path.sep)[-1]
  data['filepath'] = from_path
  data['dbname'] = dbname

  data['rel_num'] = str(data['rel_num'])

  try:
    tblname = annonutils.get_tblname('MODELINFO')
    # annonutils.create_unique_index(db, tblname, 'created_on')
    annonutils.create_unique_index(db, tblname, 'weights_path')
    collection = db.get_collection(tblname)
    collection.update_one(
      {'created_on': data['created_on']}
      ,{'$setOnInsert': data}
      ,upsert=True
    )
  except pymongo.errors.PyMongoError as e:
    print(e.details)

  return uuid
def get_info(args, cfg, db):
  log.info("----------------------------->")

  aids_data = None
  tblname = annonutils.get_tblname('AIDS')
  collection = db.get_collection(tblname)
  if collection:
    aids_data = list(collection.find({},{'_id':False,'aids_dbname':True
      ,'aids_id':True, 'annon_type':True, 'classes':True}))
    aids_data = list(collection.find({},{'_id':False}))

  return aids_data
def tbl_aids(db):
    """
  TODO
  - cmd as user input which can be train, evaluate, predict, publish, report
  """
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('AIDS')
    aids = db.get_collection(tblname)
    rpt = {
        'total_items': 0,
        'dbnames': [],
        'dbnames_with_exp_id': {},
        'items': [],
        'total_annon_file_processed': 0,
        'stats': None
    }

    cmd = 'train'

    if cmd not in rpt['dbnames_with_exp_id']:
        rpt['dbnames_with_exp_id'][cmd] = []

    if aids:
        # cur = aids.find({},{'_id':0, 'anndb_id':1, 'anndb_id':1, 'dbname':1, 'classes':1})
        cur = aids.find({}, {'_id': 0})

        ## https://stackoverflow.com/questions/36229123/return-only-matched-sub-document-elements-within-a-nested-array
        ## query = {'train':{'$elemMatch':{'uuid':'exp-1e329cfa-2156-491f-b41a-171e62284cf6'}}},{'_id':0,'dbname':1,'train.$':1}
        ## aids.find(query)

        if cur:
            for item in cur:
                rpt['items'].append(item)
                rpt['dbnames'].append(item['dbname'])
                exp = item[cmd]
                exp_id = None
                if len(exp) > 0:
                    exp_id = [o['uuid'] for o in exp]

                x = {}
                x[item['dbname']] = exp_id
                rpt['dbnames_with_exp_id'][cmd].append(x)
                rpt['total_items'] += 1

    log.debug("=> len(dbnames): {}".format(len(rpt['dbnames'])))
    # log.debug('items: {}'.format(items))
    log.debug("dbnames: {}".format(rpt['dbnames']))
    log.debug("total_items: {}".format(rpt['total_items']))
    log.debug("cmd, dbnames_with_exp_id: {}, {}".format(
        cmd, rpt['dbnames_with_exp_id'][cmd]))
    log.debug("---x---x---x---\n")

    return rpt
Example #10
0
def save_Image(cfg, Image, dst_dir=None, db=None):
  if len(Image) > 0:
    tblname = annonutils.get_tblname('IMAGES')

    save_to_file = cfg['SAVE_TO_FILE']
    if save_to_file:
      img_filename = os.path.join(dst_dir,os.path.basename(dst_dir)+'-'+cfg['FILES']['IMAGES'])
      log.info("img_filename, tblname: {}, {}".format(img_filename, tblname))
      with open(img_filename,'w') as fw:
        fw.write(json.dumps(Image))
    else:
      log.info("tblname: {}".format(tblname))
      annonutils.write2db(db, tblname, list(Image.values()), idx_col='img_id')
Example #11
0
def classinfo_from_modelinfo(mongodb, dbname, filepath):
    modelinfo = common.yaml_load(filepath)
    log.info("modelinfo: {}".format(modelinfo))
    lbl_ids = modelinfo['classes']
    classinfo = [{
        'lbl_id': lbl_id,
        'source': 'hmd',
        'name': lbl_id
    } for lbl_id in lbl_ids[1:]]
    tblname = annonutils.get_tblname('CLASSINFO')

    mongodb.connect(dbname)
    mongodb.write(tblname, classinfo, idx_col='lbl_id')
Example #12
0
def save_Annotation_Info(cfg, Annotation_Info, dst_dir=None, db=None):
  if len(Annotation_Info) > 0:
    tblname = annonutils.get_tblname('ANNOTATIONS')
    json_str = common.numpy_to_json(Annotation_Info)
    # log.info("json_str: {}".format(json_str))

    save_to_file = cfg['SAVE_TO_FILE']
    if save_to_file:
      ant_filename = os.path.join(dst_dir,os.path.basename(dst_dir)+'-'+cfg['FILES']['ANNOTATIONS'])
      log.info("ant_filename, tblname: {}, {}".format(ant_filename, tblname))
      with open(ant_filename,'w') as fw:
        # fw.write(json.dumps(Annotation_Info))
        fw.write(json_str)
    else:
      log.info("tblname: {}".format(tblname))
      annonutils.write2db(db, tblname, list(json.loads(json_str).values()), idx_col='ant_id')
Example #13
0
def save_Error(cfg, Error, dst_dir=None, db=None):
  if len(Error) > 0:
    # log.info("Error:\n{}".format(Error))
    tblname = annonutils.get_tblname('ERRORS')

    save_to_file = cfg['SAVE_TO_FILE']
    if save_to_file:
      err_filename = os.path.join(dst_dir,os.path.basename(dst_dir)+'-'+cfg['FILES']['ERRORS'])
      log.info("err_filename, tblname: {}, {}".format(err_filename, tblname))
      # db[tblname].insert_many(list(Error.values()))
      with open(err_filename,'w') as fw:
        fw.write(json.dumps(Error))
    else:
      log.info("tblname: {}".format(tblname))
      # annonutils.write2db(db, tblname, list(Error.values()), idx_col='rel_filename')
      annonutils.write2db(db, tblname, list(Error.values()), idx_col='rel_filepath')
def save_to_annon_db(cfg, aidsdata):
  """Save to Annotation DB
  """
  DBCFG = cfg['DBCFG']
  mclient = MongoClient('mongodb://'+DBCFG['HOST']+':'+str(DBCFG['PORT']))
  db = mclient[DBCFG['DBNAME']]

  tblname = annonutils.get_tblname('AIDS')
  annonutils.create_unique_index(db, tblname, 'created_on')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': aidsdata['created_on']}
    ,{'$setOnInsert': aidsdata}
    ,upsert=True
  )

  mclient.close()
def save_to_annon_db(cfg, aidsdata):
    """Save to Annotation DB
  """
    DBCFG = cfg['DBCFG']
    ANNONCFG = DBCFG['ANNONCFG']
    mclient = MongoClient('mongodb://' + ANNONCFG['host'] + ':' +
                          str(ANNONCFG['port']))
    dbname = ANNONCFG['dbname']
    log.info("ANNONCFG['dbname']: {}".format(dbname))
    db = mclient[dbname]

    tblname = annonutils.get_tblname('AIDS')
    annonutils.create_unique_index(db, tblname, 'created_on')
    collection = db.get_collection(tblname)
    collection.update_one({'created_on': aidsdata['created_on']},
                          {'$setOnInsert': aidsdata},
                          upsert=True)

    mclient.close()
def tbl_annotations(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('ANNOTATIONS')

    stats = db.get_collection(tblname)
    cur = stats.find({})

    rpt = {
        'total_items': 0,
        'total_ant': 0,
        'unique_labels': set(),
        'unique_images': set(),
        'total_images': set(),
        'unique_rel_filenames': set(),
        'total_unique_rel_filenames': 0
    }

    for item in cur:
        rpt['unique_rel_filenames'].add(item['rel_filename'])
        rpt['unique_labels'].add(item['lbl_id'])
        rpt['unique_images'].add(item['image_name'])
        rpt['total_images'].add(item['img_id'])
        rpt['total_ant'] += 1
        rpt['total_items'] += 1

    rpt['unique_labels'] = len(rpt['unique_labels'])
    rpt['unique_images'] = len(rpt['unique_images'])
    rpt['total_images'] = len(rpt['total_images'])
    rpt['total_unique_rel_filenames'] = len(rpt['unique_rel_filenames'])

    log.debug("=> len(unique_rel_filenames): {}".format(
        len(rpt['unique_rel_filenames'])))
    log.debug("=> total_unique_rel_filenames: {}".format(
        rpt['total_unique_rel_filenames']))
    log.debug("* total_ant: {}".format(rpt['total_ant']))
    log.debug("** len(total_images): {}".format(rpt['total_images']))
    log.debug("** len(unique_images): {}".format(rpt['unique_images']))
    log.debug("** len(unique_labels): {}".format(rpt['unique_labels']))
    log.debug("---x---x---x---\n")

    return rpt
Example #17
0
def save_Stats(cfg, Stats, Total_Stats, dataset=None, annon_filepath=None, dst_dir=None, db=None):
  stats_data = json.loads(common.numpy_to_json(Stats))
  total_stats_data = json.loads(common.numpy_to_json(Total_Stats))

  stats_total_stats_data = common.merge_dict([stats_data, total_stats_data])

  tblname = annonutils.get_tblname('STATS')

  save_to_file = cfg['SAVE_TO_FILE']
  if save_to_file:
    ## Stats files
    create_stats_files(cfg, Stats, Total_Stats, dst_dir)
    ## Move processed annotation file to archive folder
    log.info("annon_filepath, tblname: {}, {}".format(annon_filepath, tblname))
    rel_dir = cfg['BASE_PATH']['RELEASE_DIR']
    with open(os.path.join(rel_dir, os.path.basename(annon_filepath)),'w') as fw:
      json.dump(dataset,fw)
  else:
    log.info("tblname: {}".format(tblname))
    # annonutils.write2db(db, tblname, [stats_total_stats_data], idx_col='rel_filename')
    annonutils.write2db(db, tblname, [stats_total_stats_data], idx_col='rel_filepath')
def tbl_images(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('IMAGES')
    images = db.get_collection(tblname)
    cur = images.find({})

    rpt = {
        'total_items': 0,
        'unique_rel_filenames': set(),
        'total_unique_rel_filenames': 0,
        'unique_images': set(),
        'total_images': set(),
        'total_img': 0
    }

    for item in cur:
        rpt['unique_rel_filenames'].add(item['rel_filename'])
        rpt['unique_images'].add(item['filename'])
        rpt['total_images'].add(item['img_id'])
        rpt['total_img'] += 1
        rpt['total_items'] += 1

    rpt['unique_images'] = len(rpt['unique_images'])
    rpt['total_images'] = len(rpt['total_images'])
    rpt['total_unique_rel_filenames'] = len(rpt['unique_rel_filenames'])

    log.debug("=> len(unique_rel_filenames): {}".format(
        len(rpt['unique_rel_filenames'])))
    log.debug("=> total_unique_rel_filenames: {}".format(
        rpt['total_unique_rel_filenames']))
    log.debug('** total_img: {}'.format(rpt['total_img']))

    log.debug("len(unique_images): {}".format(rpt['unique_images']))
    log.debug("len(total_images): {}".format(rpt['total_images']))
    log.debug("---x---x---x---\n")

    return rpt
def create_db(cfg, args, datacfg, aids):
    """release the AIDS database i.e. creates the PXL DB (AI Datasets)
  and create respective entries in AIDS table in annon database
  """
    log.info("-----------------------------")

    by = args.by

    splits = datacfg['splits']

    DBCFG = cfg['DBCFG']
    PXLCFG = DBCFG['PXLCFG']
    mclient = MongoClient('mongodb://' + PXLCFG['host'] + ':' +
                          str(PXLCFG['port']))
    dbname = 'PXL-' + cfg['TIMESTAMP']
    log.info("dbname: {}".format(dbname))
    db = mclient[dbname]

    uuid_aids = None
    if len(aids) > 0:
        uuid_aids = common.createUUID('aids')
        ## Save aids - AI Datasets
        for split in splits:
            for tbl in aids[split]:
                # log.info("aids[{}][{}]".format(split, tbl))
                log.info("split: {}".format(split))

                if aids[split][tbl] is not None:
                    tblname = annonutils.get_tblname(tbl)
                    log.info("tblname: {}".format(tblname))
                    log.info("aids[split][tbl]: {}".format(
                        type(aids[split][tbl])))
                    if isinstance(aids[split][tbl], dict):
                        log.info('dict')
                        data = list(aids[split][tbl].values())
                        # log.info(aids[split][tbl]['img-19a68326-3468-4b1e-9fc6-5a739523c6f6'])
                    elif isinstance(aids[split][tbl], list):
                        log.info('list')
                        data = aids[split][tbl]

                    log.info(
                        "tblname, type(data), len(data): {}, {}, {}".format(
                            tblname, type(data), len(data)))
                    if len(data) > 0:
                        for doc in data:
                            doc['dbid'] = uuid_aids
                            doc['timestamp'] = cfg['TIMESTAMP']
                            doc['subset'] = split

                            if tblname == 'STATS':
                                log.info('doc: {}'.format(doc))
                            # log.debug('doc: {}'.format(doc))
                            annonutils.write2db(db, tblname, doc)

        created_on = common.now()
        uuid_rel = common.createUUID('rel')

        datacfg['dbid'] = uuid_aids
        datacfg['dbname'] = dbname
        datacfg['created_on'] = created_on
        datacfg['modified_on'] = None
        datacfg['anndb_id'] = dbname
        datacfg['timestamp'] = cfg['TIMESTAMP']
        datacfg['anndb_rel_id'] = None
        datacfg['rel_id'] = uuid_rel
        datacfg['log_dir'] = dbname
        datacfg['rel_type'] = 'aids'
        datacfg['creator'] = by.upper()

        log.info("datacfg: {}".format(datacfg))

        tblname = annonutils.get_tblname('AIDS')
        annonutils.create_unique_index(db, tblname, 'created_on')
        collection = db.get_collection(tblname)
        collection.update_one({'created_on': datacfg['created_on']},
                              {'$setOnInsert': datacfg},
                              upsert=True)

        tblname = annonutils.get_tblname('CLASSINFO')
        collection = db.get_collection(tblname)
        annonutils.write2db(db,
                            tblname,
                            datacfg['classinfo'],
                            idx_col='lbl_id')

        save_to_annon_db(cfg, aidsdata=datacfg)

        ## TODO:
        ## generate STATS, STATSLABEL and respective SUMMARY csv files for AIDS

    mclient.close()

    return dbname
Example #20
0
def release_db(cfg, args):
  """Entry point to parse VIA based annotations for creating and saving basic data structures - IMAGES, ANNOTATIONS, LABELS and related data
  Implements the DRC - Design Rule Checks and acts as a gatekeeper, also reports any possible errors
  Create data structures to be parsed in 2nd pass to create the AIDS - AI Datasets with the actual splits 

  Test Cases:
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/images-p1-230119_AT1_via205_250119.json
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  """

  ## Check required args
  for d in ['from_path']:
    if d not in args:
      log.info("'{}' is not present.\n".format(d))
      sys.exit(-1)
  if not os.path.exists(args.from_path):
    raise NotADirectoryError("{}".format(args.from_path))

  dbname = None
  if 'to_path' in args and not os.path.exists(args.to_path):
    dbname = args.to_path

  from_path = args.from_path

  tic = time.time()
  log.info("\nrelease_db:-----------------------------")

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  uuid_rel = common.createUUID('rel')

  timestamp = cfg['RELEASE']['COLS']['timestamp'] = cfg['LOG']['COLS']['timestamp'] = cfg['TIMESTAMP']
  cfg['RELEASE']['COLS']['rel_id'] = cfg['LOG']['COLS']['rel_id'] = uuid_rel

  cfg['SAVE_TO_FILE'] = False

  log.info("-------")
  log.info("cfg: {}".format(cfg))

  if os.path.isdir(from_path):
    ## normalizes and takes care of path ending with slash or not as the user input
    files = glob.glob(os.path.join(base_from_path, cfg['ANNON_FILENAME_PREFIX']+'*.json'))
  else:
    files = [from_path]

  total_files = len(files)

  log.info("-------")
  log.debug("\nfiles: {}".format(files))
  log.info("-------")
  log.info("\nTotal files to process =======>: {}".format(total_files))

  total_annon_file_processed = 0
  total_annon_file_existed = 0

  DBCFG = cfg['DBCFG']
  ANNONCFG = DBCFG['ANNONCFG']
  mclient = MongoClient('mongodb://'+ANNONCFG['host']+':'+str(ANNONCFG['port']))
  dbname = ANNONCFG['dbname'] if not dbname else dbname
  log.info("dbname: {}".format(dbname))
  db = mclient[dbname]

  rel_tblname = annonutils.get_tblname('RELEASE')
  annonutils.create_unique_index(db, rel_tblname, 'rel_id')
  rel_collection = db.get_collection(rel_tblname)

  log_tblname = annonutils.get_tblname('LOG')
  annonutils.create_unique_index(db, log_tblname, 'created_on')
  log_collection = db.get_collection(log_tblname)

  for annon_filepath in files:
    log.info("-------")
    tic2 = time.time()
    annon_filename = os.path.basename(annon_filepath)

    ## check if the file is parsed: skip the processing in normal mode of the already parsed file
    # res = log_collection.find_one({'rel_filename': annon_filename})
    res = log_collection.find_one({'rel_filepath': annon_filepath})
    
    ## TODO: in update mode
    ## delete the entries of annotations and images before inserting the values of the same file again 
    if not res:
      log.info(" annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))

      created_on  = common.now()
      cfg['RELEASE']['COLS']['created_on'] = cfg['LOG']['COLS']['created_on'] = created_on
      log.info("created_on: {}".format(created_on))

      cfg['LOG']['COLS']['rel_filename'] = annon_filename
      cfg['LOG']['COLS']['rel_filepath'] = annon_filepath
      annondata = annon_parser.parse_annon_file(cfg, annon_filepath, base_from_path)
      total_annon_file_processed += 1

      save_parsed_data(cfg, annondata, db=db)

      cfg['LOG']['COLS']['modified_on'] = None

      toc2 = time.time()
      cfg['LOG']['COLS']['total_exec_time'] = '{:0.2f}s'.format(toc2 - tic)

      ## if exception occurs or terminate, save what has been processed so for in the log instead of one-shot update of log out of for loop
      ## this helps to recover from the abrupt termination and start from previous successfuly processed file 
      log_collection.update_one(
        {'created_on': created_on}
        ,{'$setOnInsert': cfg['LOG']['COLS']}
        ,upsert=True
      )

      log.info("=======> Total Execution Time: {:0.2f}s, Processed files: {}, Remaning files: {}".format(toc2 - tic2, total_annon_file_processed, total_files - total_annon_file_processed))

      ## Update the LOG table here itself
    else:
      log.info("Already Exist in Database: annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))
      log.info("Use update / delete command to process this file again")
      total_annon_file_existed += 1


  cfg['RELEASE']['COLS']['total_annon_file_processed'] = total_annon_file_processed
  # cfg['RELEASE']['COLS']['total_exec_time'] = '{:0.2f}s'.format(time.time() - tic)
  cfg['RELEASE']['COLS']['total_exec_time_in_sec'] = '{:0.2f}'.format(time.time() - tic)

  if total_annon_file_processed:
    rel_collection.update_one(
      {'rel_id': uuid_rel}
      ,{'$setOnInsert': cfg['RELEASE']['COLS']}
      ,upsert=True
    )

  log.info("total_files, total_annon_file_processed, total_annon_file_existed: {} = {} + {}".format(total_files, total_annon_file_processed, total_annon_file_existed))

  mclient.close()

  return timestamp
def create_db(cfg, args, datacfg):
  """release the AIDS database i.e. creates the PXL DB (AI Datasets)
  and create respective entries in AIDS table in annon database
  """
  log.info("-----------------------------")

  by = args.by
  db_images, db_annon, latest_release_info, lbl_ids = get_annon_data(cfg)
  aids, datacfg = prepare_aids(cfg, db_images, db_annon, lbl_ids, datacfg)

  DBCFG = cfg['DBCFG']
  mclient = MongoClient('mongodb://'+DBCFG['HOST']+':'+str(DBCFG['PORT']))
  rel_timestamp = latest_release_info['timestamp']
  DBNAME = 'PXL-'+rel_timestamp+'_'+cfg['TIMESTAMP']
  log.info("DBNAME: {}".format(DBNAME))
  db = mclient[DBNAME]

  uuid_aids = None
  if len(aids) > 0:
    uuid_aids = common.createUUID('aids')

    AIDS_SPLITS_CRITERIA = cfg['AIDS_SPLITS_CRITERIA'][cfg['AIDS_SPLITS_CRITERIA']['USE']]
    splits = AIDS_SPLITS_CRITERIA[0] ## directory names

    ## Save aids - AI Datasets
    for split in splits:
      for tbl in aids[split]:
        log.info("aids[{}][{}]".format(split, tbl))
        
        tblname = annonutils.get_tblname(tbl)
        log.info("tblname: {}".format(tblname))
        log.info("aids[split][tbl]: {}".format(type(aids[split][tbl])))
        if isinstance(aids[split][tbl], dict):
          log.info('dict')
          data = list(aids[split][tbl].values())
          # log.info(aids[split][tbl]['img-19a68326-3468-4b1e-9fc6-5a739523c6f6'])
        elif isinstance(aids[split][tbl], list):
          log.info('list')
          data = aids[split][tbl]


        log.info("tblname, type(data): {}, {}".format(tblname, type(data)))
        for doc in data:
          # if tblname == 'STATS':
          #   log.info(doc)

          doc['dbid'] = uuid_aids
          doc['timestamp'] = cfg['TIMESTAMP']
          doc['subset'] = split
          annonutils.write2db(db, tblname, doc)

    created_on = common.now()
    uuid_rel = common.createUUID('rel')

    datacfg['dbid'] = uuid_aids
    datacfg['dbname'] = DBNAME
    datacfg['created_on'] = created_on
    datacfg['modified_on'] = None
    datacfg['anndb_id'] = rel_timestamp
    datacfg['timestamp'] = cfg['TIMESTAMP']
    datacfg['anndb_rel_id'] = latest_release_info['rel_id']
    datacfg['rel_id'] = uuid_rel
    datacfg['log_dir'] = DBNAME
    datacfg['rel_type'] = 'aids'
    datacfg['creator'] = by.upper()

    tblname = annonutils.get_tblname('AIDS')
    annonutils.create_unique_index(db, tblname, 'created_on')
    collection = db.get_collection(tblname)
    collection.update_one(
      {'created_on': datacfg['created_on']}
      ,{'$setOnInsert': datacfg}
      ,upsert=True
    )

    tblname = annonutils.get_tblname('CLASSINFO')
    collection = db.get_collection(tblname)
    annonutils.write2db(db, tblname, datacfg['classinfo'], idx_col='lbl_id')

    save_to_annon_db(cfg, aidsdata=datacfg)

    ## TODO:
    ## generate STATS, STATSLABEL and respective SUMMARY csv files for AIDS
 
  mclient.close()

  return uuid_aids
Example #22
0
def release_files(cfg, args):
  """Entry point to parse VIA based annotations for creating and saving basic data structures - IMAGES, ANNOTATIONS, LABELS and related data
  Implements the DRC - Design Rule Checks and acts as a gatekeeper, also reports any possible errors
  Create data structures to be parsed in 2nd pass to create the AIDS - AI Datasets with the actual splits 

  Test Cases:
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/images-p1-230119_AT1_via205_250119.json
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  """

  ## Check required args
  for d in ['from_path', 'to_path']:
    if d not in args:
      log.info("'{}' is not present.\n".format(d))
      sys.exit(-1)
  if not os.path.exists(args.from_path):
    raise NotADirectoryError("{}".format(args.from_path))
  if not os.path.exists(args.to_path):
    raise NotADirectoryError("{}".format(args.to_path))

  from_path, to_path = args.from_path, args.to_path

  tic = time.time()
  log.info("\nrelease_db:-----------------------------")
  cfg['TIMESTAMP'] = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  base_to_path = common.getBasePath(to_path)
  log.info("base_to_path: {}".format(base_to_path))

  cfg['LOG']['COLS']['timestamp'] = cfg['TIMESTAMP']

  ## Create Base Directories
  db_dir = os.path.join(base_to_path, cfg['BASEDIR_NAME']['DB'])
  log.info("db_dir: {}".format(db_dir))
  common.mkdir_p(db_dir)

  db_data_dir = os.path.join(db_dir, cfg['TIMESTAMP'])
  log.info("ANNDB db_data_dir: {}".format(db_data_dir))
  common.mkdir_p(db_data_dir)

  rel_dir = os.path.join(base_to_path, cfg['BASEDIR_NAME']['RELEASE'], cfg['TIMESTAMP'])
  log.info("rel_dir: {}".format(rel_dir))
  common.mkdir_p(rel_dir)

  log_dir = os.path.join(base_to_path, cfg['BASEDIR_NAME']['LOG'])
  log.info("log_dir: {}".format(log_dir))
  common.mkdir_p(log_dir)

  ant_data_dir = os.path.join(db_data_dir,cfg["BASEDIR_NAME"]["ANNON"])
  log.info("ant_data_dir: {}".format(ant_data_dir))
  common.mkdir_p(ant_data_dir)

  cfg['BASE_PATH']['DB_DIR'] = db_dir
  cfg['BASE_PATH']['DB_DATA_DIR'] = db_data_dir
  cfg['BASE_PATH']['RELEASE_DIR'] = rel_dir
  cfg['BASE_PATH']['LOG_DIR'] = log_dir
  cfg['BASE_PATH']['ANT_DATA_DIR'] = ant_data_dir

  log.info("-------")
  log.info("cfg: {}".format(cfg))

  if os.path.isdir(from_path):
    ## normalizes and takes care of path ending with slash or not as the user input
    files = glob.glob(os.path.join(base_from_path,cfg['ANNON_FILENAME_PREFIX']+'*.json'))
  else:
    files = [from_path]

  log.info("-------")
  log.info("\nfiles: {}".format(files))
  log.info("-------")
  log.info("\nTotal files to process =======>: {}".format(len(files)))

  total_annon_file_processed = 0

  log_tblname = annonutils.get_tblname('LOG')
  for annon_filepath in files:
    log.info("-------")
    tic2 = time.time()
    annon_filename = os.path.basename(annon_filepath)

    ## TODO: check if the file is parsed: skip the processing in normal mode of the already parsed file
    res = False
    
    ## TODO: in update mode
    ## delete the entries of annotations and images before inserting the values of the same file again 
    if not res:
      log.info(" annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))

      created_on = cfg['LOG']['COLS']['created_on'] = common.now()
      log.info("created_on: {}".format(created_on))

      cfg['LOG']['COLS']['rel_filename'] = annon_filename
      cfg['LOG']['COLS']['rel_filename'] = annon_filepath
      annondata = annon_parser.parse_annon_file(cfg, annon_filepath, base_from_path)
      total_annon_file_processed += 1

      # ## if the annon_filepath is absolute path, base_bast gets ignored and thus the dst_dir is the file's directory
      ## dst_dir= os.path.join(base_from_path,os.path.splitext(annon_filepath)[0])

      ## log.info("annon_filepath: {}".format(annon_filepath))
      ## dst_dir = os.path.join(db_dir,os.path.splitext(annon_filepath)[0])

      ## dst_dir = os.path.join(db_dir,os.path.splitext(annon_filepath)[0])

      dst_dir = os.path.join(rel_dir, os.path.splitext(annon_filename)[0])
      ## log.info("dst_dir: {}".format(dst_dir))
      common.mkdir_p(dst_dir)
      save_parsed_data(cfg, annondata, dst_dir=dst_dir, ant_data_dir=ant_data_dir, annon_filepath=annon_filepath)

      cfg['LOG']['COLS']['modified_on'] = None

      toc2 = time.time()
      total_exec_time = '{:0.2f}s'.format(toc2 - tic)
      cfg['LOG']['COLS']['total_exec_time'] = total_exec_time

      ##TODO:
      ## if exception occurs or terminate, save what has been processed so for in the log instead of one-shot update of log out of for loop
      ## this helps to recover from the abrupt termination and start from previous successfuly processed file 

      log.info("=======> Total Execution Time: {:0.2f}s, Processed files: {}, Remaning files: {}".format(toc2 - tic2, total_annon_file_processed, len(files) - total_annon_file_processed))

      ## Update the LOG table here itself
    else:
      log.info("Already Exist in Database: annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))
      log.info("Use update / delete command to process this file again")

  ## Every execution of the script is a release
  ## For every release, recreate the entire database either for directory or specific file release
  
  ## create and save db data i.e. consolidated data with index structure
  db_data = create_db_data(cfg, rel_dir)

  save_db_data(cfg, db_dir, db_data)

  log.info("total_annon_file_processed: {}".format(total_annon_file_processed))

  return db_data_dir
def tbl_errors(db):
    log.debug("\nErrors::")
    log.debug("--------")
    tblname = annonutils.get_tblname('ERRORS')

    stats = db.get_collection(tblname)
    cur = stats.find({'has_error': True})

    rpt = {
        'total_items': 0,
        'total_error_ant': 0,
        'total_error_empty_ant': 0,
        'total_error_img_notfound': 0,
        'total_error_img_reading': 0,
        'total_error_unlabeled_ant': 0,
        'total_error_in_rel_filename': 0,
        'errors_for_reporting': None,
        'unique_rel_filenames': set(),
        'all_rel_filenames': [],
        'total_unique_rel_filenames': 0,
        'total_rel_filenames': 0
    }

    errors_for_reporting = {}

    for item in cur:
        rpt['unique_rel_filenames'].add(item['rel_filename'])
        rpt['all_rel_filenames'].append(item['rel_filename'])
        rpt['total_error_ant'] += item['total_error_ant']
        rpt['total_error_empty_ant'] += item['total_error_empty_ant']
        rpt['total_error_img_notfound'] += item['total_error_img_notfound']
        rpt['total_error_img_reading'] += item['total_error_img_reading']
        rpt['total_error_unlabeled_ant'] += item['total_error_unlabeled_ant']
        rpt['total_error_in_rel_filename'] += 1
        rpt['total_items'] += 1

        if item['has_error']:
            error_types = appcfg['ERROR_TYPES']
            rel_filename = os.path.splitext(item['rel_filename'])[0]
            if rel_filename not in errors_for_reporting:
                errors_for_reporting[rel_filename] = {}

            errors_for_reporting[rel_filename]['rel_filename'] = item[
                'rel_filename']
            for et in error_types:
                if et in item and len(item[et]) > 0:
                    errors_for_reporting[rel_filename][et] = item[et]

    rpt['errors_for_reporting'] = errors_for_reporting

    rpt['total_unique_rel_filenames'] = len(rpt['unique_rel_filenames'])
    rpt['total_rel_filenames'] = len(rpt['all_rel_filenames'])

    log.debug("=> len(unique_rel_filenames): {}".format(
        len(rpt['unique_rel_filenames'])))
    log.debug("=> total_unique_rel_filenames: {}".format(
        rpt['total_unique_rel_filenames']))
    log.debug("total_rel_filenames: {}".format(rpt['total_rel_filenames']))
    log.debug('total_error_ant: {}'.format(rpt['total_error_ant']))
    log.debug('total_error_empty_ant: {}'.format(rpt['total_error_empty_ant']))
    log.debug('total_error_img_notfound: {}'.format(
        rpt['total_error_img_notfound']))
    log.debug('total_error_img_reading: {}'.format(
        rpt['total_error_img_reading']))
    log.debug('total_error_unlabeled_ant: {}'.format(
        rpt['total_error_unlabeled_ant']))
    log.debug('total_error_in_rel_filename: {}'.format(
        rpt['total_error_in_rel_filename']))
    log.debug("---x---x---x---\n")

    return rpt
def create_experiment(args, cfg, db):
  log.info("----------------------------->")
  from_path = args.from_path
  if not from_path:
    raise Exception('--from not defined')
  dbid = args.to
  if not dbid:
    raise Exception('--to not defined')
  exp_type = args.exp
  if not exp_type:
    raise Exception('--exp not defined')

  if not os.path.exists(from_path) or not os.path.isfile(from_path):
    raise Exception('File does not exists: {}'.format(from_path))
  
  ##TODO: for the entire directory
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

 # teppr_items = cfg['TEPPR_ITEMS']
 #  for item in teppr_items:
 #    data[item]
  ## TODO: empty data and other sanity checks

  if exp_type in data:
    data = data[exp_type]

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp
  log_dir = os.path.join(data['dnnarch'], timestamp)
  data['log_dir'] = log_dir
  

  tblname = annonutils.get_tblname('AIDS')
  collection = db.get_collection(tblname)
  # aids_data = list(collection.find({'aids_id':from_path},{'_id':False}))

  expdata = {}
  expdata[exp_type] = data
  ## {'train':data}

  log.info("data:{}".format(expdata))
  
  ## TODO if collection does not exist raise error
  # if collection:
  collection.update_one(
    {'dbid': dbid}
    ,{'$push': expdata}
  )

  res = {
    'dbid': dbid
    ,'exp_id': uuid
  }

  return res
Example #25
0
def create_experiment(args, cfg):
  log.info("----------------------------->")

  from_path = args.from_path
  dbname = args.to
  exp_type = args.exp

  DBCFG = cfg['DBCFG']
  PXLCFG = DBCFG['PXLCFG']
  mclient = MongoClient('mongodb://'+PXLCFG['host']+':'+str(PXLCFG['port']))

  check_args('experiment', args, cfg)
  expdata = common.loadcfg(from_path)
  if expdata and len(expdata) > 0:
    expdata = {k.lower():v for k,v in expdata.items()} 

  creator = 'AIE3'
  if 'creator' in expdata:
    creator = expdata['creator']

  if exp_type in expdata:
    expdata = expdata[exp_type]

  if expdata and len(expdata) > 0:
    expdata = {k.lower():v for k,v in expdata.items()} 

  modelinfo_abspath = os.path.join(os.getenv('AI_CFG'), 'model')
  modelinfo_filepath = os.path.join(modelinfo_abspath, expdata['model_info'])

  args.from_path = modelinfo_filepath
  check_args('modelinfo', args, cfg)

  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID(exp_type)
  expdata['uuid'] = uuid
  expdata['created_on'] = created_on
  expdata['timestamp'] = timestamp
  expdata['creator'] = creator
  expdata['filename'] = from_path.split(os.path.sep)[-1]
  expdata['filepath'] = from_path
  expdata['dbname'] = dbname

  log_dir = os.path.join(expdata['dnnarch'], timestamp)
  expdata['log_dir'] = log_dir

  modelinfo = common.loadcfg(modelinfo_filepath)
  if modelinfo and len(modelinfo) > 0:
    modelinfo = {k.lower():v for k,v in modelinfo.items()}

  modelinfo['uuid'] = uuid
  modelinfo['created_on'] = created_on
  modelinfo['timestamp'] = timestamp
  modelinfo['filename'] = expdata['model_info']
  modelinfo['filepath'] = modelinfo_filepath

  expdata['modelinfo'] = modelinfo

  log.info("expdata:{}".format(expdata))

  db = mclient[dbname]

  tblname = annonutils.get_tblname(exp_type.upper())
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': expdata['created_on']}
    ,{'$setOnInsert': expdata}
    ,upsert=True
  )

  aidsdata = {}
  aidsdata[exp_type] = uuid

  tblname = annonutils.get_tblname('AIDS')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'dbname': dbname}
    ,{'$push': aidsdata}
  )

  mclient.close()

  return uuid
    def load_data_from_db(self):
        """Load the annotation data from the database

    TODO
    - plit is replaced with subset
    """
        log.info("-------------------------------->")
        import pymongo
        from pymongo import MongoClient
        import arrow

        dbcfg = self.dbcfg
        log.debug("dbcfg: {}".format(dbcfg))
        mclient = MongoClient('mongodb://' + dbcfg['host'] + ':' +
                              str(dbcfg['port']))
        dbname = dbcfg['dbname']
        db = mclient[dbname]

        query_annotations = {}
        query_images = {}
        query_classinfo = {}

        if self.subset:
            query_annotations = {'subset': self.subset}
            query_images = {'subset': self.subset}

        tblname = annonutils.get_tblname('ANNOTATIONS')
        annotations = db.get_collection(tblname)
        annotations_data = list(annotations.find(query_annotations,
                                                 {'_id': 0}))
        self.dataset['annotations'] = annotations_data

        tblname = annonutils.get_tblname('IMAGES')
        images = db.get_collection(tblname)
        images_data = list(images.find(query_images, {'_id': 0}))
        self.dataset['images'] = images_data

        tblname = annonutils.get_tblname('CLASSINFO')
        classinfo = db.get_collection(tblname)

        ## sorting is critical to avoid label mismatch issues
        ## https://stackoverflow.com/questions/8109122/how-to-sort-mongodb-with-pymongo
        classinfo = list(
            classinfo.find(query_classinfo, {
                '_id': 0
            }).sort('lbl_id', pymongo.ASCENDING))

        # lbl_ids = []
        # for item in classinfo:
        #     lbl_ids.append(item['lbl_id'])

        # log.info('lbl_ids: {}'.format(lbl_ids))
        # lbl_ids.sort()
        # log.info('len(lbl_ids): {}'.format(len(lbl_ids)))
        # log.info('lbl_ids: {}'.format(lbl_ids))

        log.info("classinfo: {}".format(classinfo))
        self.dataset['categories'] = classinfo

        ## get RELEASE data
        self.dataset['release'] = None
        tblname = annonutils.get_tblname('RELEASE')
        collection = db.get_collection(tblname)
        if collection:
            release = list(
                collection.find({'rel_type': 'annon'}, {'_id': False}))
            log.info("len(release): {}".format(len(release)))
            ## 'YYYY-MM-DD HH:mm:ss ZZ'
            release.sort(key=lambda x: arrow.get(x['created_on'], common.
                                                 _date_format_).date(),
                         reverse=True)
            self.dataset['release'] = release

        mclient.close()
def tbl_stats(db):
    log.info("-----------------------------")
    tblname = annonutils.get_tblname('STATS')

    stats = db.get_collection(tblname)
    cur = stats.find({})

    rpt = {
        'total_items': 0,
        'total_ant': 0,
        'total_error_ant': 0,
        'total_error_img_notfound': 0,
        'total_error_img_reading': 0,
        'total_error_unlabeled_ant': 0,
        'total_img': 0,
        'total_lbl': 0,
        'unique_rel_filenames': set(),
        'all_rel_filenames': [],
        'total_unique_rel_filenames': 0,
        'total_rel_filenames': 0,
        'error_rel_filenames': set()
    }

    for item in cur:
        rpt['unique_rel_filenames'].add(item['rel_filename'])
        rpt['all_rel_filenames'].append(item['rel_filename'])
        rpt['total_error_ant'] += item['total_error_ant']
        rpt['total_error_img_notfound'] += item['total_error_img_notfound']
        rpt['total_error_img_reading'] += item['total_error_img_reading']
        rpt['total_error_unlabeled_ant'] += item['total_error_unlabeled_ant']
        rpt['total_img'] += item['total_img']
        rpt['total_lbl'] += item['total_lbl']
        rpt['total_ant'] += item['total_ant']
        rpt['total_items'] += 1
        if item['total_ant'] == 0:
            rpt['error_rel_filenames'].add(item['rel_filename'])

        total_ant_type = item['total_ant_type'][0]
        for ant_type in total_ant_type.keys():
            if 'total_ant_' + ant_type not in rpt:
                rpt['total_ant_' + ant_type] = 0
            rpt['total_ant_' + ant_type] += total_ant_type[ant_type]

    rpt['total_ant'] -= rpt['total_error_unlabeled_ant'] - rpt[
        'total_error_ant']
    rpt['total_unique_rel_filenames'] = len(rpt['unique_rel_filenames'])
    rpt['total_rel_filenames'] = len(rpt['all_rel_filenames'])

    log.debug("=> len(unique_rel_filenames): {}".format(
        len(rpt['unique_rel_filenames'])))
    log.debug("=> total_unique_rel_filenames: {}".format(
        rpt['total_unique_rel_filenames']))
    log.debug("total_rel_filenames: {}".format(rpt['total_rel_filenames']))
    log.debug(
        '* total_ant - total_error_unlabeled_ant - total_error_ant: {}'.format(
            rpt['total_ant']))
    log.debug('** total_img: {}'.format(rpt['total_img']))
    log.debug('total_error_ant: {}'.format(rpt['total_error_ant']))
    log.debug('total_error_img_notfound: {}'.format(
        rpt['total_error_img_notfound']))
    log.debug('total_error_img_reading: {}'.format(
        rpt['total_error_img_reading']))
    log.debug('total_error_unlabeled_ant: {}'.format(
        rpt['total_error_unlabeled_ant']))
    log.debug('total_lbl: {}'.format(rpt['total_lbl']))
    log.debug("---x---x---x---\n")

    return rpt
def get_annon_data_bk(cfg):
  """
  return actual data hash of all images, all allotations (annon), release info and classinfo (categories/labels)
  """
  query_images = {}
  query_annotations = {}
  query_classinfo = {}

  filter_enable = cfg['AIDS_FILTER']['ENABLE']
  if filter_enable:
    # filter_by = cfg['AIDS_FILTER']['LABELS']
    filter_by = cfg['AIDS_FILTER'][ cfg['AIDS_FILTER']['BY'] ]

  DBCFG = cfg['DBCFG']
  mclient = MongoClient('mongodb://'+DBCFG['HOST']+':'+str(DBCFG['PORT']))
  db = mclient[DBCFG['DBNAME']]

  ## get IMAGES data
  tblname = annonutils.get_tblname('IMAGES')
  collection = db.get_collection(tblname)
  images = np.array(list(collection.find(query_images, {'_id':False})))
  log.info("len(images): {}".format(len(images)))
  # images = {item['img_id']:item for item in images}

  ## get ANNOTATIONS data
  tblname = annonutils.get_tblname('ANNOTATIONS')
  collection = db.get_collection(tblname)
  annotations = list(collection.find(query_annotations, {'_id':False}))
  log.info("len(annotations): {}".format(len(annotations)))
  annon = {item['ant_id']:item for item in annotations}

  ## get RELEASE data
  tblname = annonutils.get_tblname('RELEASE')
  collection = db.get_collection(tblname)
  release = list(collection.find({'rel_type':'annon'}, {'_id':False}))
  log.info("len(release): {}".format(len(release)))
  ## 'YYYY-MM-DD HH:mm:ss ZZ'
  release.sort(key = lambda x: arrow.get(x['created_on'], common._date_format_).date(), reverse=True)
  latest_release_info = release[0]
  # wanted_keys = ['rel_id','timestamp','created_on']
  # release_info = {k: latest_release_info[k] for k in set(wanted_keys) & set(latest_release_info.keys())}

  ## get CLASSINFO (labels) data
  tblname = annonutils.get_tblname('CLASSINFO')
  collection = db.get_collection(tblname)
  classinfo = list(collection.find(query_classinfo, {'_id':False}))
  lbl_ids = []
  for item in classinfo:
    lbl_ids.append(item['lbl_id'])

  log.info('lbl_ids: {}'.format(lbl_ids))

  lbl_ids.sort()

  log.info('len(lbl_ids): {}'.format(len(lbl_ids)))
  log.info('lbl_ids: {}'.format(lbl_ids))

  mclient.close()

  if filter_enable and filter_by and len(filter_by)>0:
    images = aids_filter(cfg, images, filter_by)

  return images, annon, latest_release_info, lbl_ids