Beispiel #1
0
def _create_modelinfo(from_path, dbname, db):
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()}

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp
  data['filename'] = from_path.split(os.path.sep)[-1]
  data['filepath'] = from_path
  data['dbname'] = dbname

  data['rel_num'] = str(data['rel_num'])

  try:
    tblname = annonutils.get_tblname('MODELINFO')
    # annonutils.create_unique_index(db, tblname, 'created_on')
    annonutils.create_unique_index(db, tblname, 'weights_path')
    collection = db.get_collection(tblname)
    collection.update_one(
      {'created_on': data['created_on']}
      ,{'$setOnInsert': data}
      ,upsert=True
    )
  except pymongo.errors.PyMongoError as e:
    print(e.details)

  return uuid
def create_modelinfo(args, cfg, db):
  log.info("----------------------------->")
  from_path = args.from_path
  if not from_path:
    raise Exception('from_path not defined')
  if not os.path.exists(from_path) or not os.path.isfile(from_path):
    raise Exception('File does not exists: {}'.format(from_path))
  
  ##TODO: for the entire directory
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp

  tblname = annonutils.get_tblname('MODELINFO')
  # annonutils.create_unique_index(db, tblname, 'created_on')
  annonutils.create_unique_index(db, tblname, 'weights_path')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': data['created_on']}
    ,{'$setOnInsert': data}
    ,upsert=True
  )
def save_to_annon_db(cfg, aidsdata):
  """Save to Annotation DB
  """
  DBCFG = cfg['DBCFG']
  mclient = MongoClient('mongodb://'+DBCFG['HOST']+':'+str(DBCFG['PORT']))
  db = mclient[DBCFG['DBNAME']]

  tblname = annonutils.get_tblname('AIDS')
  annonutils.create_unique_index(db, tblname, 'created_on')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': aidsdata['created_on']}
    ,{'$setOnInsert': aidsdata}
    ,upsert=True
  )

  mclient.close()
def save_to_annon_db(cfg, aidsdata):
    """Save to Annotation DB
  """
    DBCFG = cfg['DBCFG']
    ANNONCFG = DBCFG['ANNONCFG']
    mclient = MongoClient('mongodb://' + ANNONCFG['host'] + ':' +
                          str(ANNONCFG['port']))
    dbname = ANNONCFG['dbname']
    log.info("ANNONCFG['dbname']: {}".format(dbname))
    db = mclient[dbname]

    tblname = annonutils.get_tblname('AIDS')
    annonutils.create_unique_index(db, tblname, 'created_on')
    collection = db.get_collection(tblname)
    collection.update_one({'created_on': aidsdata['created_on']},
                          {'$setOnInsert': aidsdata},
                          upsert=True)

    mclient.close()
def create_db(cfg, args, datacfg):
  """release the AIDS database i.e. creates the PXL DB (AI Datasets)
  and create respective entries in AIDS table in annon database
  """
  log.info("-----------------------------")

  by = args.by
  db_images, db_annon, latest_release_info, lbl_ids = get_annon_data(cfg)
  aids, datacfg = prepare_aids(cfg, db_images, db_annon, lbl_ids, datacfg)

  DBCFG = cfg['DBCFG']
  mclient = MongoClient('mongodb://'+DBCFG['HOST']+':'+str(DBCFG['PORT']))
  rel_timestamp = latest_release_info['timestamp']
  DBNAME = 'PXL-'+rel_timestamp+'_'+cfg['TIMESTAMP']
  log.info("DBNAME: {}".format(DBNAME))
  db = mclient[DBNAME]

  uuid_aids = None
  if len(aids) > 0:
    uuid_aids = common.createUUID('aids')

    AIDS_SPLITS_CRITERIA = cfg['AIDS_SPLITS_CRITERIA'][cfg['AIDS_SPLITS_CRITERIA']['USE']]
    splits = AIDS_SPLITS_CRITERIA[0] ## directory names

    ## Save aids - AI Datasets
    for split in splits:
      for tbl in aids[split]:
        log.info("aids[{}][{}]".format(split, tbl))
        
        tblname = annonutils.get_tblname(tbl)
        log.info("tblname: {}".format(tblname))
        log.info("aids[split][tbl]: {}".format(type(aids[split][tbl])))
        if isinstance(aids[split][tbl], dict):
          log.info('dict')
          data = list(aids[split][tbl].values())
          # log.info(aids[split][tbl]['img-19a68326-3468-4b1e-9fc6-5a739523c6f6'])
        elif isinstance(aids[split][tbl], list):
          log.info('list')
          data = aids[split][tbl]


        log.info("tblname, type(data): {}, {}".format(tblname, type(data)))
        for doc in data:
          # if tblname == 'STATS':
          #   log.info(doc)

          doc['dbid'] = uuid_aids
          doc['timestamp'] = cfg['TIMESTAMP']
          doc['subset'] = split
          annonutils.write2db(db, tblname, doc)

    created_on = common.now()
    uuid_rel = common.createUUID('rel')

    datacfg['dbid'] = uuid_aids
    datacfg['dbname'] = DBNAME
    datacfg['created_on'] = created_on
    datacfg['modified_on'] = None
    datacfg['anndb_id'] = rel_timestamp
    datacfg['timestamp'] = cfg['TIMESTAMP']
    datacfg['anndb_rel_id'] = latest_release_info['rel_id']
    datacfg['rel_id'] = uuid_rel
    datacfg['log_dir'] = DBNAME
    datacfg['rel_type'] = 'aids'
    datacfg['creator'] = by.upper()

    tblname = annonutils.get_tblname('AIDS')
    annonutils.create_unique_index(db, tblname, 'created_on')
    collection = db.get_collection(tblname)
    collection.update_one(
      {'created_on': datacfg['created_on']}
      ,{'$setOnInsert': datacfg}
      ,upsert=True
    )

    tblname = annonutils.get_tblname('CLASSINFO')
    collection = db.get_collection(tblname)
    annonutils.write2db(db, tblname, datacfg['classinfo'], idx_col='lbl_id')

    save_to_annon_db(cfg, aidsdata=datacfg)

    ## TODO:
    ## generate STATS, STATSLABEL and respective SUMMARY csv files for AIDS
 
  mclient.close()

  return uuid_aids
Beispiel #6
0
def release_db(cfg, args):
  """Entry point to parse VIA based annotations for creating and saving basic data structures - IMAGES, ANNOTATIONS, LABELS and related data
  Implements the DRC - Design Rule Checks and acts as a gatekeeper, also reports any possible errors
  Create data structures to be parsed in 2nd pass to create the AIDS - AI Datasets with the actual splits 

  Test Cases:
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/images-p1-230119_AT1_via205_250119.json
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  """

  ## Check required args
  for d in ['from_path']:
    if d not in args:
      log.info("'{}' is not present.\n".format(d))
      sys.exit(-1)
  if not os.path.exists(args.from_path):
    raise NotADirectoryError("{}".format(args.from_path))

  dbname = None
  if 'to_path' in args and not os.path.exists(args.to_path):
    dbname = args.to_path

  from_path = args.from_path

  tic = time.time()
  log.info("\nrelease_db:-----------------------------")

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  uuid_rel = common.createUUID('rel')

  timestamp = cfg['RELEASE']['COLS']['timestamp'] = cfg['LOG']['COLS']['timestamp'] = cfg['TIMESTAMP']
  cfg['RELEASE']['COLS']['rel_id'] = cfg['LOG']['COLS']['rel_id'] = uuid_rel

  cfg['SAVE_TO_FILE'] = False

  log.info("-------")
  log.info("cfg: {}".format(cfg))

  if os.path.isdir(from_path):
    ## normalizes and takes care of path ending with slash or not as the user input
    files = glob.glob(os.path.join(base_from_path, cfg['ANNON_FILENAME_PREFIX']+'*.json'))
  else:
    files = [from_path]

  total_files = len(files)

  log.info("-------")
  log.debug("\nfiles: {}".format(files))
  log.info("-------")
  log.info("\nTotal files to process =======>: {}".format(total_files))

  total_annon_file_processed = 0
  total_annon_file_existed = 0

  DBCFG = cfg['DBCFG']
  ANNONCFG = DBCFG['ANNONCFG']
  mclient = MongoClient('mongodb://'+ANNONCFG['host']+':'+str(ANNONCFG['port']))
  dbname = ANNONCFG['dbname'] if not dbname else dbname
  log.info("dbname: {}".format(dbname))
  db = mclient[dbname]

  rel_tblname = annonutils.get_tblname('RELEASE')
  annonutils.create_unique_index(db, rel_tblname, 'rel_id')
  rel_collection = db.get_collection(rel_tblname)

  log_tblname = annonutils.get_tblname('LOG')
  annonutils.create_unique_index(db, log_tblname, 'created_on')
  log_collection = db.get_collection(log_tblname)

  for annon_filepath in files:
    log.info("-------")
    tic2 = time.time()
    annon_filename = os.path.basename(annon_filepath)

    ## check if the file is parsed: skip the processing in normal mode of the already parsed file
    # res = log_collection.find_one({'rel_filename': annon_filename})
    res = log_collection.find_one({'rel_filepath': annon_filepath})
    
    ## TODO: in update mode
    ## delete the entries of annotations and images before inserting the values of the same file again 
    if not res:
      log.info(" annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))

      created_on  = common.now()
      cfg['RELEASE']['COLS']['created_on'] = cfg['LOG']['COLS']['created_on'] = created_on
      log.info("created_on: {}".format(created_on))

      cfg['LOG']['COLS']['rel_filename'] = annon_filename
      cfg['LOG']['COLS']['rel_filepath'] = annon_filepath
      annondata = annon_parser.parse_annon_file(cfg, annon_filepath, base_from_path)
      total_annon_file_processed += 1

      save_parsed_data(cfg, annondata, db=db)

      cfg['LOG']['COLS']['modified_on'] = None

      toc2 = time.time()
      cfg['LOG']['COLS']['total_exec_time'] = '{:0.2f}s'.format(toc2 - tic)

      ## if exception occurs or terminate, save what has been processed so for in the log instead of one-shot update of log out of for loop
      ## this helps to recover from the abrupt termination and start from previous successfuly processed file 
      log_collection.update_one(
        {'created_on': created_on}
        ,{'$setOnInsert': cfg['LOG']['COLS']}
        ,upsert=True
      )

      log.info("=======> Total Execution Time: {:0.2f}s, Processed files: {}, Remaning files: {}".format(toc2 - tic2, total_annon_file_processed, total_files - total_annon_file_processed))

      ## Update the LOG table here itself
    else:
      log.info("Already Exist in Database: annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))
      log.info("Use update / delete command to process this file again")
      total_annon_file_existed += 1


  cfg['RELEASE']['COLS']['total_annon_file_processed'] = total_annon_file_processed
  # cfg['RELEASE']['COLS']['total_exec_time'] = '{:0.2f}s'.format(time.time() - tic)
  cfg['RELEASE']['COLS']['total_exec_time_in_sec'] = '{:0.2f}'.format(time.time() - tic)

  if total_annon_file_processed:
    rel_collection.update_one(
      {'rel_id': uuid_rel}
      ,{'$setOnInsert': cfg['RELEASE']['COLS']}
      ,upsert=True
    )

  log.info("total_files, total_annon_file_processed, total_annon_file_existed: {} = {} + {}".format(total_files, total_annon_file_processed, total_annon_file_existed))

  mclient.close()

  return timestamp
def create_db(cfg, args, datacfg, aids):
    """release the AIDS database i.e. creates the PXL DB (AI Datasets)
  and create respective entries in AIDS table in annon database
  """
    log.info("-----------------------------")

    by = args.by

    splits = datacfg['splits']

    DBCFG = cfg['DBCFG']
    PXLCFG = DBCFG['PXLCFG']
    mclient = MongoClient('mongodb://' + PXLCFG['host'] + ':' +
                          str(PXLCFG['port']))
    dbname = 'PXL-' + cfg['TIMESTAMP']
    log.info("dbname: {}".format(dbname))
    db = mclient[dbname]

    uuid_aids = None
    if len(aids) > 0:
        uuid_aids = common.createUUID('aids')
        ## Save aids - AI Datasets
        for split in splits:
            for tbl in aids[split]:
                # log.info("aids[{}][{}]".format(split, tbl))
                log.info("split: {}".format(split))

                if aids[split][tbl] is not None:
                    tblname = annonutils.get_tblname(tbl)
                    log.info("tblname: {}".format(tblname))
                    log.info("aids[split][tbl]: {}".format(
                        type(aids[split][tbl])))
                    if isinstance(aids[split][tbl], dict):
                        log.info('dict')
                        data = list(aids[split][tbl].values())
                        # log.info(aids[split][tbl]['img-19a68326-3468-4b1e-9fc6-5a739523c6f6'])
                    elif isinstance(aids[split][tbl], list):
                        log.info('list')
                        data = aids[split][tbl]

                    log.info(
                        "tblname, type(data), len(data): {}, {}, {}".format(
                            tblname, type(data), len(data)))
                    if len(data) > 0:
                        for doc in data:
                            doc['dbid'] = uuid_aids
                            doc['timestamp'] = cfg['TIMESTAMP']
                            doc['subset'] = split

                            if tblname == 'STATS':
                                log.info('doc: {}'.format(doc))
                            # log.debug('doc: {}'.format(doc))
                            annonutils.write2db(db, tblname, doc)

        created_on = common.now()
        uuid_rel = common.createUUID('rel')

        datacfg['dbid'] = uuid_aids
        datacfg['dbname'] = dbname
        datacfg['created_on'] = created_on
        datacfg['modified_on'] = None
        datacfg['anndb_id'] = dbname
        datacfg['timestamp'] = cfg['TIMESTAMP']
        datacfg['anndb_rel_id'] = None
        datacfg['rel_id'] = uuid_rel
        datacfg['log_dir'] = dbname
        datacfg['rel_type'] = 'aids'
        datacfg['creator'] = by.upper()

        log.info("datacfg: {}".format(datacfg))

        tblname = annonutils.get_tblname('AIDS')
        annonutils.create_unique_index(db, tblname, 'created_on')
        collection = db.get_collection(tblname)
        collection.update_one({'created_on': datacfg['created_on']},
                              {'$setOnInsert': datacfg},
                              upsert=True)

        tblname = annonutils.get_tblname('CLASSINFO')
        collection = db.get_collection(tblname)
        annonutils.write2db(db,
                            tblname,
                            datacfg['classinfo'],
                            idx_col='lbl_id')

        save_to_annon_db(cfg, aidsdata=datacfg)

        ## TODO:
        ## generate STATS, STATSLABEL and respective SUMMARY csv files for AIDS

    mclient.close()

    return dbname