def create_modelinfo(args, cfg, db):
  log.info("----------------------------->")
  from_path = args.from_path
  if not from_path:
    raise Exception('from_path not defined')
  if not os.path.exists(from_path) or not os.path.isfile(from_path):
    raise Exception('File does not exists: {}'.format(from_path))
  
  ##TODO: for the entire directory
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp

  tblname = annonutils.get_tblname('MODELINFO')
  # annonutils.create_unique_index(db, tblname, 'created_on')
  annonutils.create_unique_index(db, tblname, 'weights_path')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': data['created_on']}
    ,{'$setOnInsert': data}
    ,upsert=True
  )
Beispiel #2
0
def _create_modelinfo(from_path, dbname, db):
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()}

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp
  data['filename'] = from_path.split(os.path.sep)[-1]
  data['filepath'] = from_path
  data['dbname'] = dbname

  data['rel_num'] = str(data['rel_num'])

  try:
    tblname = annonutils.get_tblname('MODELINFO')
    # annonutils.create_unique_index(db, tblname, 'created_on')
    annonutils.create_unique_index(db, tblname, 'weights_path')
    collection = db.get_collection(tblname)
    collection.update_one(
      {'created_on': data['created_on']}
      ,{'$setOnInsert': data}
      ,upsert=True
    )
  except pymongo.errors.PyMongoError as e:
    print(e.details)

  return uuid
def detect_batch(model,
                 verbose=1,
                 modelcfg=None,
                 batch=None,
                 imagenames=None,
                 colors=None,
                 get_mask=False,
                 class_names=None):
    """API
  """
    log.info("len(batch): {}".format(len(batch)))

    # log.info("len(imagenames): {}".format(len(imagenames)))
    # assert len(batch) == len(imagenames)

    total_items = len(batch)
    res = []
    cc = None

    r = model.detect(batch, verbose)

    if class_names:
        if not colors:
            colors = viz.random_colors(len(class_names))
        cc = dict(zip(class_names, colors))

    for i in range(total_items):
        jsonres = viz.get_detections(batch[i],
                                     r[i]['rois'],
                                     r[i]['masks'],
                                     r[i]['class_ids'],
                                     class_names,
                                     r[i]['scores'],
                                     colors=cc,
                                     get_mask=get_mask)

        uid = common.createUUID('pred')
        # image_name = imagenames[i]
        image_name = uid
        jsonres["filename"] = image_name
        jsonres["file_attributes"]["uuid"] = uid
        via_jsonres = {}
        via_jsonres[image_name] = jsonres
        json_str = common.numpy_to_json(via_jsonres)

        res.append(json.loads(json_str))

    return res
def predictq(app, request):
  """Predict images for high performance throughput (least response time) using redis queue

  Credit: https://www.pyimagesearch.com/2018/01/29/scalable-keras-deep-learning-rest-api/

  Redis will act as our temporary data store on the server. Images will come in to the server via a variety of methods such as cURL, a Python script, or even a mobile app.

  Furthermore, images could come in only every once in awhile (a few every hours or days) or at a very high rate (multiple per second). We need to put the images somewhere as they queue up prior to being processed. Our Redis store will act as the temporary storage.

  In order to store our images in Redis, they need to be serialized. Since images are just NumPy arrays, we can utilize base64 encoding to serialize the images. Using base64 encoding also has the added benefit of allowing us to use JSON to store additional attributes with the image. Similarly, we need to deserialize our image prior to passing them through our model.

  Ref:
  https://stackoverflow.com/questions/26998223/what-is-the-difference-between-contiguous-and-non-contiguous-arrays
  """
  try:
    t0 = time.time()
    rdb = app.config['RDB']
    appcfg = app.config['APPCFG']
    DBCFG = appcfg['APP']['DBCFG']
    REDISCFG = DBCFG['REDISCFG']

    image = request.files["image"]

    image_name = secure_filename(image.filename)
    api_model_key = ''
    dnnarch = ''

    if image and allowed_file( appcfg, image_name):
      image_bytes = image.read()
      im_non_numpy = Image.open(io.BytesIO(image_bytes))

      # im = np.array(im_non_numpy)
      im = img_common.prepare_image(im_non_numpy)

      # ensure our NumPy array is C-contiguous as well,
      # otherwise we won't be able to serialize it
      im = im.copy(order="C")
      im_shape = im.shape

      log.info("Before Encoding:...")
      log.info("type(im), im_shape: {}, {}".format(type(im), im_shape))

      im = img_common.base64_encode_numpy_array_to_string(im)

      log.info("After Encoding:...")
      log.info("type(im): {}".format(type(im)))

      # generate an ID for the classification then add the
      # uuid  + image to the queue
      uid = common.createUUID()
      d = {
        "id": uid
        ,'image': im
        ,'shape': im_shape
        ,'image_name': image_name
      }
      rdb.rpush(REDISCFG['image_queue'], json.dumps(d))

      res_code = 200
      apires = {
        "api": None
        ,"type": api_model_key
        ,"dnnarch": dnnarch
        # ,"org_name": modelkeys[0]
        # ,"problem_id": modelkeys[1]
        # ,"rel_num": modelkeys[2]
        # ,"image_name": image
        ,"result": []
        ,'status_code': res_code
        ,'timings': {
          'image_read': -1
          ,'detect_with_json': -1
          ,'res_preparation': -1
          ,'tt_turnaround': -1
        }
      }

      t1 = time.time()
      time_taken_imread = (t1 - t0)
      log.debug('Total time taken in time_taken_imread: %f seconds' %(time_taken_imread))
      # keep looping until our model server returns the output predictions
      while True:
        # Attempt to grab the output predictions
        output = rdb.get(uid)

        # Check to see if our model has classified/detected the input image
        if output is not None:
          # Add the output predictions to our data dictionary so we can return it to the client
          output = output.decode("utf-8")
          apires['result'] = json.loads(output)

          rdb.delete(uid)
          break
        
        # Sleep for a small amount to give the model a chance to classify/detect the input image
        time.sleep(REDISCFG['client_sleep'])


      t5 = time.time()
      tt_turnaround = (t5 - t0)
      log.debug('Total time taken in tt_turnaround: %f seconds' %(tt_turnaround))

      apires['timings']['time_taken_imread'] = time_taken_imread
      apires['timings']['tt_turnaround'] = tt_turnaround
    else:
      res_code = 400
      apires = {
        "api": None
        ,"type": api_model_key
        ,"dnnarch": dnnarch
        # ,"org_name": None
        # ,"problem_id": None
        # ,"rel_num": None
        # ,"image_name": None
        ,"result": None
        ,"error": "Invalid Image Type. Allowed Image Types are: {}".format(appcfg['APP']['ALLOWED_IMAGE_TYPE'])
        ,'status_code': res_code
        ,'timings': {
          'image_read': -1
          ,'detect': -1
          ,'res_preparation': -1
          ,'tt_turnaround': -1
        }
      }
  except Exception as e:
    log.error("Exception in detection", exc_info=True)
    res_code = 500
    apires = {
      "api": None
      ,"type": None
      ,"dnnarch": None
      ,"result": None
      ,"error": "Internal Error. Exception in detection."
      ,'status_code': res_code
      ,'timings': {
        'image_read': -1
        ,'detect': -1
        ,'res_preparation': -1
        ,'tt_turnaround': -1
      }
    }

  log.debug("apires: {}".format(apires))
  # res = Response(jsonify(apires), status=res_code, mimetype='application/json')
  # res = jsonify(apires)
  # res.status_code = res_code
  res = Response(json.dumps(apires), status=res_code, mimetype='application/json')

  log.debug("res: {}".format(res))

  return res
def predict(appcfg, modelinfo, image, get_mask=False):
  """Main function for AI API for prediction 
  """
  try:
    t0 = time.time()

    image_name = secure_filename(image.filename)
    log.debug("image.filename: {}".format(image_name))

    log.debug("modelinfo: {}".format(modelinfo))
    api_model_key = modelinfo['API_MODEL_KEY']
    dnnarch = modelinfo['DNNARCH']

    if image and allowed_file( appcfg, image_name):
      image_bytes = image.read()
      im_non_numpy = Image.open(io.BytesIO(image_bytes))

      ##TODO: from config derive if need to be resized and then send the resized image to api

      model = modelinfo['MODEL']
      modelcfg = modelinfo['MODELCFG']
      detect = modelinfo['DETECT']
      detect_with_json = modelinfo['DETECT_WITH_JSON']

      cc = None
      class_names = modelcfg['classes']

      t1 = time.time()
      time_taken_imread = (t1 - t0)
      log.debug('Total time taken in time_taken_imread: %f seconds' %(time_taken_imread))

      t2 = time.time()

      jsonres = detect_with_json(model, verbose=1, modelcfg=modelcfg, image_name=image_name, im_non_numpy=im_non_numpy, get_mask=get_mask, class_names=class_names)
      t3 = time.time()
      time_taken_in_detect_with_json = (t3 - t2)

      log.debug("jsonres: {}".format(jsonres))
      log.debug('Total time taken in detect with json: %f seconds' %(time_taken_in_detect_with_json))

      t4 = time.time()

      # uid = str(uuid.uuid4())
      uid = common.createUUID('pred')
      jsonres["filename"] = image_name
      jsonres["file_attributes"]["uuid"] = uid
      via_jsonres = {}
      via_jsonres[image_name] = jsonres
      json_str = common.numpy_to_json(via_jsonres)

      t5 = time.time()
      time_taken_res_preparation = (t5 - t4)
      log.debug('Total time taken in time_taken_res_preparation: %f seconds' %(time_taken_res_preparation))

      tt_turnaround = (t5 - t0)
      log.debug('Total time taken in tt_turnaround: %f seconds' %(tt_turnaround))

      res_code = 200
      # modelkeys = api_model_key.split('-')
      apires = {
        "api": None
        ,"type": api_model_key
        ,"dnnarch": dnnarch
        # ,"org_name": modelkeys[0]
        # ,"problem_id": modelkeys[1]
        # ,"rel_num": modelkeys[2]
        # ,"image_name": image
        ,"result": json.loads(json_str)
        ,'status_code': res_code
        ,'timings': {
          'image_read': time_taken_imread
          ,'detect_with_json': time_taken_in_detect_with_json
          ,'res_preparation': time_taken_res_preparation
          ,'tt_turnaround': tt_turnaround
        }
      }
    else:
      res_code = 400
      apires = {
        "api": None
        ,"type": api_model_key
        ,"dnnarch": dnnarch
        # ,"org_name": None
        # ,"problem_id": None
        # ,"rel_num": None
        # ,"image_name": None
        ,"result": None
        ,"error": "Invalid Image Type. Allowed Image Types are: {}".format(appcfg['APP']['ALLOWED_IMAGE_TYPE'])
        ,'status_code': res_code
        ,'timings': {
          'image_read': -1
          ,'detect': -1
          ,'res_preparation': -1
          ,'tt_turnaround': -1
        }
      }
  except Exception as e:
    log.error("Exception in detection", exc_info=True)
    res_code = 500
    apires = {
      "api": None
      ,"type": None
      ,"dnnarch": None
      ,"result": None
      ,"error": "Internal Error. Exception in detection."
      ,'status_code': res_code
      ,'timings': {
        'image_read': -1
        ,'detect': -1
        ,'res_preparation': -1
        ,'tt_turnaround': -1
      }
    }

  log.debug("apires: {}".format(apires))
  # res = Response(jsonify(apires), status=res_code, mimetype='application/json')
  # res = jsonify(apires)
  # res.status_code = res_code
  res = Response(json.dumps(apires), status=res_code, mimetype='application/json')

  log.debug("res: {}".format(res))
  return res
def convert_output_to_json(outputs, image_filename, metadata):
    reverse_id_mapping = {
        v: k
        for k, v in metadata.thing_dataset_id_to_contiguous_id.items()
    }

    uid = common.createUUID('pred')

    boxes = outputs['instances'].pred_boxes.tensor.cpu().numpy()
    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
    boxes = boxes.tolist()
    scores = outputs['instances'].scores.tolist()
    category_id = outputs['instances'].pred_classes.tolist()

    classes = []
    for cat in category_id:
        cat_name = reverse_id_mapping[cat]
        classes.append(cat_name)

    num_instances = len(scores)

    print(outputs)

    if num_instances == 0:
        return []

    for k in range(num_instances):
        if k == 0:
            jsonres = {
                image_filename: {
                    "filename":
                    image_filename,
                    "size":
                    0,
                    "regions": [
                        {
                            "region_attributes": {
                                "label": classes[k],
                                "score": scores[k],
                            },
                            "shape_attributes": {
                                "name": "rect",
                                "y": boxes[k][0],
                                "x": boxes[k][1],
                                "height": boxes[k][2],
                                "width": boxes[k][3]
                            }
                        },
                    ],
                    "file_attributes": {
                        "width": 1920,
                        "height": 1280,
                        "uuid": uid
                    }
                }
            }
        else:
            jsonres[image_filename]["regions"].append({
                "region_attributes": {
                    "label": classes[k],
                    "score": scores[k],
                },
                "shape_attributes": {
                    "name": "rect",
                    "y": boxes[k][0],
                    "x": boxes[k][1],
                    "height": boxes[k][2],
                    "width": boxes[k][3]
                }
            })

    return jsonres
def create_experiment(args, cfg, db):
  log.info("----------------------------->")
  from_path = args.from_path
  if not from_path:
    raise Exception('--from not defined')
  dbid = args.to
  if not dbid:
    raise Exception('--to not defined')
  exp_type = args.exp
  if not exp_type:
    raise Exception('--exp not defined')

  if not os.path.exists(from_path) or not os.path.isfile(from_path):
    raise Exception('File does not exists: {}'.format(from_path))
  
  ##TODO: for the entire directory
  data = common.loadcfg(from_path)

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

 # teppr_items = cfg['TEPPR_ITEMS']
 #  for item in teppr_items:
 #    data[item]
  ## TODO: empty data and other sanity checks

  if exp_type in data:
    data = data[exp_type]

  if data and len(data) > 0:
    data = {k.lower():v for k,v in data.items()} 

  ## TODO: empty data and other sanity checks
  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID('uuid')
  data['uuid'] = uuid
  data['created_on'] = created_on
  data['timestamp'] = timestamp
  log_dir = os.path.join(data['dnnarch'], timestamp)
  data['log_dir'] = log_dir
  

  tblname = annonutils.get_tblname('AIDS')
  collection = db.get_collection(tblname)
  # aids_data = list(collection.find({'aids_id':from_path},{'_id':False}))

  expdata = {}
  expdata[exp_type] = data
  ## {'train':data}

  log.info("data:{}".format(expdata))
  
  ## TODO if collection does not exist raise error
  # if collection:
  collection.update_one(
    {'dbid': dbid}
    ,{'$push': expdata}
  )

  res = {
    'dbid': dbid
    ,'exp_id': uuid
  }

  return res
def create_db(cfg, args, datacfg):
  """release the AIDS database i.e. creates the PXL DB (AI Datasets)
  and create respective entries in AIDS table in annon database
  """
  log.info("-----------------------------")

  by = args.by
  db_images, db_annon, latest_release_info, lbl_ids = get_annon_data(cfg)
  aids, datacfg = prepare_aids(cfg, db_images, db_annon, lbl_ids, datacfg)

  DBCFG = cfg['DBCFG']
  mclient = MongoClient('mongodb://'+DBCFG['HOST']+':'+str(DBCFG['PORT']))
  rel_timestamp = latest_release_info['timestamp']
  DBNAME = 'PXL-'+rel_timestamp+'_'+cfg['TIMESTAMP']
  log.info("DBNAME: {}".format(DBNAME))
  db = mclient[DBNAME]

  uuid_aids = None
  if len(aids) > 0:
    uuid_aids = common.createUUID('aids')

    AIDS_SPLITS_CRITERIA = cfg['AIDS_SPLITS_CRITERIA'][cfg['AIDS_SPLITS_CRITERIA']['USE']]
    splits = AIDS_SPLITS_CRITERIA[0] ## directory names

    ## Save aids - AI Datasets
    for split in splits:
      for tbl in aids[split]:
        log.info("aids[{}][{}]".format(split, tbl))
        
        tblname = annonutils.get_tblname(tbl)
        log.info("tblname: {}".format(tblname))
        log.info("aids[split][tbl]: {}".format(type(aids[split][tbl])))
        if isinstance(aids[split][tbl], dict):
          log.info('dict')
          data = list(aids[split][tbl].values())
          # log.info(aids[split][tbl]['img-19a68326-3468-4b1e-9fc6-5a739523c6f6'])
        elif isinstance(aids[split][tbl], list):
          log.info('list')
          data = aids[split][tbl]


        log.info("tblname, type(data): {}, {}".format(tblname, type(data)))
        for doc in data:
          # if tblname == 'STATS':
          #   log.info(doc)

          doc['dbid'] = uuid_aids
          doc['timestamp'] = cfg['TIMESTAMP']
          doc['subset'] = split
          annonutils.write2db(db, tblname, doc)

    created_on = common.now()
    uuid_rel = common.createUUID('rel')

    datacfg['dbid'] = uuid_aids
    datacfg['dbname'] = DBNAME
    datacfg['created_on'] = created_on
    datacfg['modified_on'] = None
    datacfg['anndb_id'] = rel_timestamp
    datacfg['timestamp'] = cfg['TIMESTAMP']
    datacfg['anndb_rel_id'] = latest_release_info['rel_id']
    datacfg['rel_id'] = uuid_rel
    datacfg['log_dir'] = DBNAME
    datacfg['rel_type'] = 'aids'
    datacfg['creator'] = by.upper()

    tblname = annonutils.get_tblname('AIDS')
    annonutils.create_unique_index(db, tblname, 'created_on')
    collection = db.get_collection(tblname)
    collection.update_one(
      {'created_on': datacfg['created_on']}
      ,{'$setOnInsert': datacfg}
      ,upsert=True
    )

    tblname = annonutils.get_tblname('CLASSINFO')
    collection = db.get_collection(tblname)
    annonutils.write2db(db, tblname, datacfg['classinfo'], idx_col='lbl_id')

    save_to_annon_db(cfg, aidsdata=datacfg)

    ## TODO:
    ## generate STATS, STATSLABEL and respective SUMMARY csv files for AIDS
 
  mclient.close()

  return uuid_aids
Beispiel #9
0
def release_db(cfg, args):
  """Entry point to parse VIA based annotations for creating and saving basic data structures - IMAGES, ANNOTATIONS, LABELS and related data
  Implements the DRC - Design Rule Checks and acts as a gatekeeper, also reports any possible errors
  Create data structures to be parsed in 2nd pass to create the AIDS - AI Datasets with the actual splits 

  Test Cases:
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/images-p1-230119_AT1_via205_250119.json
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  """

  ## Check required args
  for d in ['from_path']:
    if d not in args:
      log.info("'{}' is not present.\n".format(d))
      sys.exit(-1)
  if not os.path.exists(args.from_path):
    raise NotADirectoryError("{}".format(args.from_path))

  dbname = None
  if 'to_path' in args and not os.path.exists(args.to_path):
    dbname = args.to_path

  from_path = args.from_path

  tic = time.time()
  log.info("\nrelease_db:-----------------------------")

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  uuid_rel = common.createUUID('rel')

  timestamp = cfg['RELEASE']['COLS']['timestamp'] = cfg['LOG']['COLS']['timestamp'] = cfg['TIMESTAMP']
  cfg['RELEASE']['COLS']['rel_id'] = cfg['LOG']['COLS']['rel_id'] = uuid_rel

  cfg['SAVE_TO_FILE'] = False

  log.info("-------")
  log.info("cfg: {}".format(cfg))

  if os.path.isdir(from_path):
    ## normalizes and takes care of path ending with slash or not as the user input
    files = glob.glob(os.path.join(base_from_path, cfg['ANNON_FILENAME_PREFIX']+'*.json'))
  else:
    files = [from_path]

  total_files = len(files)

  log.info("-------")
  log.debug("\nfiles: {}".format(files))
  log.info("-------")
  log.info("\nTotal files to process =======>: {}".format(total_files))

  total_annon_file_processed = 0
  total_annon_file_existed = 0

  DBCFG = cfg['DBCFG']
  ANNONCFG = DBCFG['ANNONCFG']
  mclient = MongoClient('mongodb://'+ANNONCFG['host']+':'+str(ANNONCFG['port']))
  dbname = ANNONCFG['dbname'] if not dbname else dbname
  log.info("dbname: {}".format(dbname))
  db = mclient[dbname]

  rel_tblname = annonutils.get_tblname('RELEASE')
  annonutils.create_unique_index(db, rel_tblname, 'rel_id')
  rel_collection = db.get_collection(rel_tblname)

  log_tblname = annonutils.get_tblname('LOG')
  annonutils.create_unique_index(db, log_tblname, 'created_on')
  log_collection = db.get_collection(log_tblname)

  for annon_filepath in files:
    log.info("-------")
    tic2 = time.time()
    annon_filename = os.path.basename(annon_filepath)

    ## check if the file is parsed: skip the processing in normal mode of the already parsed file
    # res = log_collection.find_one({'rel_filename': annon_filename})
    res = log_collection.find_one({'rel_filepath': annon_filepath})
    
    ## TODO: in update mode
    ## delete the entries of annotations and images before inserting the values of the same file again 
    if not res:
      log.info(" annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))

      created_on  = common.now()
      cfg['RELEASE']['COLS']['created_on'] = cfg['LOG']['COLS']['created_on'] = created_on
      log.info("created_on: {}".format(created_on))

      cfg['LOG']['COLS']['rel_filename'] = annon_filename
      cfg['LOG']['COLS']['rel_filepath'] = annon_filepath
      annondata = annon_parser.parse_annon_file(cfg, annon_filepath, base_from_path)
      total_annon_file_processed += 1

      save_parsed_data(cfg, annondata, db=db)

      cfg['LOG']['COLS']['modified_on'] = None

      toc2 = time.time()
      cfg['LOG']['COLS']['total_exec_time'] = '{:0.2f}s'.format(toc2 - tic)

      ## if exception occurs or terminate, save what has been processed so for in the log instead of one-shot update of log out of for loop
      ## this helps to recover from the abrupt termination and start from previous successfuly processed file 
      log_collection.update_one(
        {'created_on': created_on}
        ,{'$setOnInsert': cfg['LOG']['COLS']}
        ,upsert=True
      )

      log.info("=======> Total Execution Time: {:0.2f}s, Processed files: {}, Remaning files: {}".format(toc2 - tic2, total_annon_file_processed, total_files - total_annon_file_processed))

      ## Update the LOG table here itself
    else:
      log.info("Already Exist in Database: annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))
      log.info("Use update / delete command to process this file again")
      total_annon_file_existed += 1


  cfg['RELEASE']['COLS']['total_annon_file_processed'] = total_annon_file_processed
  # cfg['RELEASE']['COLS']['total_exec_time'] = '{:0.2f}s'.format(time.time() - tic)
  cfg['RELEASE']['COLS']['total_exec_time_in_sec'] = '{:0.2f}'.format(time.time() - tic)

  if total_annon_file_processed:
    rel_collection.update_one(
      {'rel_id': uuid_rel}
      ,{'$setOnInsert': cfg['RELEASE']['COLS']}
      ,upsert=True
    )

  log.info("total_files, total_annon_file_processed, total_annon_file_existed: {} = {} + {}".format(total_files, total_annon_file_processed, total_annon_file_existed))

  mclient.close()

  return timestamp
def create_db(cfg, args, datacfg, aids):
    """release the AIDS database i.e. creates the PXL DB (AI Datasets)
  and create respective entries in AIDS table in annon database
  """
    log.info("-----------------------------")

    by = args.by

    splits = datacfg['splits']

    DBCFG = cfg['DBCFG']
    PXLCFG = DBCFG['PXLCFG']
    mclient = MongoClient('mongodb://' + PXLCFG['host'] + ':' +
                          str(PXLCFG['port']))
    dbname = 'PXL-' + cfg['TIMESTAMP']
    log.info("dbname: {}".format(dbname))
    db = mclient[dbname]

    uuid_aids = None
    if len(aids) > 0:
        uuid_aids = common.createUUID('aids')
        ## Save aids - AI Datasets
        for split in splits:
            for tbl in aids[split]:
                # log.info("aids[{}][{}]".format(split, tbl))
                log.info("split: {}".format(split))

                if aids[split][tbl] is not None:
                    tblname = annonutils.get_tblname(tbl)
                    log.info("tblname: {}".format(tblname))
                    log.info("aids[split][tbl]: {}".format(
                        type(aids[split][tbl])))
                    if isinstance(aids[split][tbl], dict):
                        log.info('dict')
                        data = list(aids[split][tbl].values())
                        # log.info(aids[split][tbl]['img-19a68326-3468-4b1e-9fc6-5a739523c6f6'])
                    elif isinstance(aids[split][tbl], list):
                        log.info('list')
                        data = aids[split][tbl]

                    log.info(
                        "tblname, type(data), len(data): {}, {}, {}".format(
                            tblname, type(data), len(data)))
                    if len(data) > 0:
                        for doc in data:
                            doc['dbid'] = uuid_aids
                            doc['timestamp'] = cfg['TIMESTAMP']
                            doc['subset'] = split

                            if tblname == 'STATS':
                                log.info('doc: {}'.format(doc))
                            # log.debug('doc: {}'.format(doc))
                            annonutils.write2db(db, tblname, doc)

        created_on = common.now()
        uuid_rel = common.createUUID('rel')

        datacfg['dbid'] = uuid_aids
        datacfg['dbname'] = dbname
        datacfg['created_on'] = created_on
        datacfg['modified_on'] = None
        datacfg['anndb_id'] = dbname
        datacfg['timestamp'] = cfg['TIMESTAMP']
        datacfg['anndb_rel_id'] = None
        datacfg['rel_id'] = uuid_rel
        datacfg['log_dir'] = dbname
        datacfg['rel_type'] = 'aids'
        datacfg['creator'] = by.upper()

        log.info("datacfg: {}".format(datacfg))

        tblname = annonutils.get_tblname('AIDS')
        annonutils.create_unique_index(db, tblname, 'created_on')
        collection = db.get_collection(tblname)
        collection.update_one({'created_on': datacfg['created_on']},
                              {'$setOnInsert': datacfg},
                              upsert=True)

        tblname = annonutils.get_tblname('CLASSINFO')
        collection = db.get_collection(tblname)
        annonutils.write2db(db,
                            tblname,
                            datacfg['classinfo'],
                            idx_col='lbl_id')

        save_to_annon_db(cfg, aidsdata=datacfg)

        ## TODO:
        ## generate STATS, STATSLABEL and respective SUMMARY csv files for AIDS

    mclient.close()

    return dbname
def coco_to_annon(subset, metadata, dataset):
    """
  mutex to transform coco data to annon format
  """
    log.info("-----------------------------")
    image_dir = metadata['image_dir']
    annotation_file = metadata['annotation_file']

    ## CLASSINFO
    categories = {cat['name']: cat for cat in dataset['categories']}
    catid_lblid_map = {
        str(cat['id']): cat['name']
        for cat in dataset['categories']
    }

    log.info("categories: {}".format(categories))
    cats = list(categories.keys())
    cats.sort()
    log.info("cats: {}".format(cats))

    for i, cat in enumerate(cats):
        category = categories[cat]
        category['coco_id'] = category['id']
        category['lbl_id'] = category['name']
        category['source'] = 'coco'

    metadata['catid_lblid_map'] = catid_lblid_map

    ## IMAGES
    ## instances, 2014 => ['license', 'file_name', 'coco_url', 'height', 'width', 'date_captured', 'flickr_url', 'id']
    images = dataset['images']
    for i, image in enumerate(images):
        if i == 0:
            log.info("image.keys(): {}".format(image.keys()))

        uuid_img = common.createUUID('img')
        image['img_id'] = image['id']
        image['filename'] = image['file_name']
        image['subset'] = subset
        image['file_attributes'] = {'id': image['id'], 'uuid': uuid_img}
        image['size'] = 0
        image['modified_on'] = None
        image['base_dir'] = None
        image['dir'] = None
        image['file_id'] = None
        image['filepath'] = None
        image['rel_filename'] = None

    ## ANNOTATIONS
    ## instances, 2014 => ['segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id']
    annotations = dataset['annotations']
    boxmode = 'XYWH_ABS'
    for i, annotation in enumerate(annotations):
        if i == 0:
            log.info("annotation.keys(): {}".format(annotation.keys()))
            log.info("annotation.keys(): {}".format(annotation.keys()))

        uuid_ant = common.createUUID('ant')
        annotation['ant_id'] = annotation['id']
        annotation['img_id'] = annotation['image_id']
        annotation['lbl_id'] = annotation['category_id']
        ## crucial mapping
        annotation['lbl_id'] = catid_lblid_map[str(annotation['category_id'])]

        ## BoxMode.XYWH_ABS
        _bbox = {
            "ymin": annotation['bbox'][1],
            "xmin": annotation['bbox'][0],
            "ymax": None,
            "xmax": None,
            "width": annotation['bbox'][2],
            "height": annotation['bbox'][3]
        }
        annotation['annon_index'] = -1
        annotation['annotation_rel_date'] = None
        annotation['annotation_tool'] = 'coco'
        annotation['annotator_id'] = 'coco'
        # annotation['ant_type'] = 'bbox'
        # annotation['ant_type'] = 'polygon'
        annotation['filename'] = annotation['id']
        annotation['subset'] = subset
        annotation['modified_on'] = None
        annotation['maskarea'] = -1
        annotation['_bbox'] = _bbox
        annotation['boxmode'] = boxmode
        annotation['bboxarea'] = annotation['area']
        annotation['region_attributes'] = {
            'id': annotation['id'],
            'uuid': uuid_ant,
            'iscrowd': annotation['iscrowd']
        }
        annotation['dir'] = None
        annotation['file_id'] = annotation['image_id']
        annotation['filepath'] = None
        annotation['rel_filename'] = annotation_file
        annotation['image_name'] = None
        annotation['image_dir'] = image_dir
        annotation['file_attributes'] = {}
Beispiel #12
0
def create_experiment(args, cfg):
  log.info("----------------------------->")

  from_path = args.from_path
  dbname = args.to
  exp_type = args.exp

  DBCFG = cfg['DBCFG']
  PXLCFG = DBCFG['PXLCFG']
  mclient = MongoClient('mongodb://'+PXLCFG['host']+':'+str(PXLCFG['port']))

  check_args('experiment', args, cfg)
  expdata = common.loadcfg(from_path)
  if expdata and len(expdata) > 0:
    expdata = {k.lower():v for k,v in expdata.items()} 

  creator = 'AIE3'
  if 'creator' in expdata:
    creator = expdata['creator']

  if exp_type in expdata:
    expdata = expdata[exp_type]

  if expdata and len(expdata) > 0:
    expdata = {k.lower():v for k,v in expdata.items()} 

  modelinfo_abspath = os.path.join(os.getenv('AI_CFG'), 'model')
  modelinfo_filepath = os.path.join(modelinfo_abspath, expdata['model_info'])

  args.from_path = modelinfo_filepath
  check_args('modelinfo', args, cfg)

  created_on = common.now()
  timestamp = common.timestamp_from_datestring(created_on)
  uuid = common.createUUID(exp_type)
  expdata['uuid'] = uuid
  expdata['created_on'] = created_on
  expdata['timestamp'] = timestamp
  expdata['creator'] = creator
  expdata['filename'] = from_path.split(os.path.sep)[-1]
  expdata['filepath'] = from_path
  expdata['dbname'] = dbname

  log_dir = os.path.join(expdata['dnnarch'], timestamp)
  expdata['log_dir'] = log_dir

  modelinfo = common.loadcfg(modelinfo_filepath)
  if modelinfo and len(modelinfo) > 0:
    modelinfo = {k.lower():v for k,v in modelinfo.items()}

  modelinfo['uuid'] = uuid
  modelinfo['created_on'] = created_on
  modelinfo['timestamp'] = timestamp
  modelinfo['filename'] = expdata['model_info']
  modelinfo['filepath'] = modelinfo_filepath

  expdata['modelinfo'] = modelinfo

  log.info("expdata:{}".format(expdata))

  db = mclient[dbname]

  tblname = annonutils.get_tblname(exp_type.upper())
  collection = db.get_collection(tblname)
  collection.update_one(
    {'created_on': expdata['created_on']}
    ,{'$setOnInsert': expdata}
    ,upsert=True
  )

  aidsdata = {}
  aidsdata[exp_type] = uuid

  tblname = annonutils.get_tblname('AIDS')
  collection = db.get_collection(tblname)
  collection.update_one(
    {'dbname': dbname}
    ,{'$push': aidsdata}
  )

  mclient.close()

  return uuid