def classify_assets(): content = parse_post_req_content() max_requests = cfg.resolve_int(cfg.CLASSIFICATION, cfg.max_assets_per_request) if len(content.assets) > max_requests: raise InvalidUsage( 'Exceeded maximum number of assets ({}) per request!'.format( max_requests)) return handle_request(content)
def backup_persisting_files(): global round_robin_backup_index round_robin_backup_index += 1 round_robin_backup_index %= cfg.resolve_int( cfg.CRON_JOB, cfg.cron_job_round_robin_backups) # copy faiss file path = cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path) file = os.path.join(path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_file)) copyfile(file, file + '.backup_{}'.format(round_robin_backup_index)) # copy blacklist file file = os.path.join( path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_blacklist_file)) copyfile(file, file + '.backup_{}'.format(round_robin_backup_index))
def initialize_similarity_index(): global index path = cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path) if not os.path.exists(path): os.mkdir(path) file = os.path.join(path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_file)) if not os.path.exists(file): index = faiss.IndexFlatIP( cfg.resolve_int(cfg.FAISS_SETTINGS, cfg.index_size)) persist_similarity_index() else: try: index = faiss.read_index(file) logger.info("Faiss index loaded") except (OSError, TypeError, NameError): index = faiss.read_index(file) logger.error("Can't load index! Using default empty index")
def cron_job_runner(): schedule.every(cfg.resolve_int( cfg.CRON_JOB, cfg.cron_job_interval)).minutes.do(trigger_backup) while True: schedule.run_pending() time.sleep(1)
def classify_content(content): # create a generator for fetching data urls = [] for asset in content.assets: urls.append(asset.url) # prepare images for download val_generator = UrlGenerator(urls, cfg.resolve(cfg.RETINANET_MODEL, cfg.classes_file), cfg.resolve(cfg.RETINANET_MODEL, cfg.labels_file)) response = Response() # load image for i, asset in enumerate(content.assets): logger.info('Running classification on: {}'.format(asset.url)) # initialize result object result = Result() result.url = asset.url result.asset_id = asset.asset_id logger.info('Reading image bgr...') try: # fetch images image = val_generator.read_image_bgr(i) # index original image for searching if content.insert: index_original_image(image, asset) except (OSError, ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError): logger.warning('Skipped: Unable to reach resource') continue except: err = traceback.format_exc() logger.error('Could not read image: {}'.format(err)) continue # copy to draw on logger.info('Drawing cvt color...') draw = np.asarray(image.copy()) draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB) # pre-process the image for the network logger.info('Processing image...') image = val_generator.preprocess_image(image) image, scale = val_generator.resize_image(image) # classify image start = time.time() boxes, scores, labels = core.model.predict_on_batch(np.expand_dims(image, axis=0)) elapsed = time.time() - start logger.info('Processing time: {}'.format(elapsed)) result.time = str(elapsed) boxes /= scale # process and save detections idx = 0 for box, score, label in zip(boxes[0], scores[0], labels[0]): if score < cfg.resolve_float(cfg.CLASSIFICATION, cfg.min_confidence): continue # get position data box = boxes[0, idx, :4].astype(int) label_name = val_generator.label_to_name(label) # save meta-info for REST API response caption = Caption(str(label), label_name, str(score), '{};{}'.format(box[0], box[1]), # x1;y1 '{};{}'.format(box[2], box[3])) # x2;y2 result.captions.append(caption) # Crop image for extraction h = box[3] - box[1] w = box[2] - box[0] cropped_img = draw[box[1]:(box[1] + h), box[0]:(box[0] + w)] if content.insert: # update sequence to remove previous index if available remove_cropped_if_asset_exists(asset) # process cropped image fragment for searching cropped_file_name = index_cropped_image(asset, cropped_img, label_name, idx, insert=content.insert) features = extract_features(cropped_file_name) faiss_features = features.reshape((1, cfg.resolve_int(cfg.FAISS_SETTINGS, cfg.index_size))) # add or clean image if content.insert: # add feature to faiss index core.index.add(faiss_features) else: # clean temp image again os.remove(cropped_file_name) # index caption if content.insert: index_asset_meta(asset, idx, caption, features.tolist(), core.index.ntotal - 1) # find similar suggestions and handle response asset_metas = get_similar_asset_metas(faiss_features, cfg.resolve_int(cfg.FAISS_SETTINGS, cfg.index_n_similar_results)) handle_suggestion_response(result, asset.asset_id, asset_metas) idx += 1 # add result to response list response.result_list.append(result) return response
@app.route('/services/v1/shutdown', methods=['GET']) def shutdown_hook(): core.trigger_backup() sys.exit() @app.route('/services/v1/index/init', methods=['GET']) def init_similarity_index(): core.initialize_elastic_search() return Response(status=200) @app.before_first_request def initialize(): core.initialize_similarity_index() core.initialize_blacklist() core.initialize_elastic_search() core.initialize_retinanet() core.initialize_extraction_model() core.initialize_cron_job() if __name__ == '__main__': core.initialize_logging() logger.info('Server app started!') app.run(host=cfg.resolve(cfg.RETINANET_SERVER, cfg.host), port=cfg.resolve_int(cfg.RETINANET_SERVER, cfg.port), debug=cfg.resolve_bool(cfg.RETINANET_SERVER, cfg.debug), threaded=cfg.resolve_bool(cfg.RETINANET_SERVER, cfg.threaded))