예제 #1
0
def persist_similarity_index():
    if index is not None:
        file = os.path.join(cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path),
                            cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_file))
        faiss.write_index(index, file)
        logger.info("Faiss index saved to disk")
    else:
        logger.warning("Can't save, index was not loaded yet!")
예제 #2
0
def persist_blacklist_index():
    path = cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path)
    file = os.path.join(
        path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_blacklist_file))
    with open(file, 'w') as f:
        writer = csv.DictWriter(f, fieldnames=['index'])
        writer.writeheader()
        blacklist_dict = threadsafe_blacklist_operation(lambda bl: [{
            'index': id
        } for id in bl])
        writer.writerows(blacklist_dict)
예제 #3
0
def map_index_ids_to_asset_metas(indices_ids):
    num_entries = np.array(indices_ids).shape[0]
    asset_metas = []
    search = Search(index=cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) +
                          cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_asset_meta))
    search.query = Q('terms', faiss_idx=indices_ids)
    search = search[:num_entries]
    response = search.execute()
    for hit in response:
        asset_metas.append(AssetMeta(hit.asset_id, hit.cropped_id, hit.faiss_idx))
    return asset_metas if response.hits.total > 0 else []
예제 #4
0
def initialize_retinanet():
    global model
    logger.info('Loading retinanet classification model...')
    # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    logger.info('Setting keras session...')
    keras.backend.tensorflow_backend.set_session(get_session())

    logger.info('Loading model name...')
    model = models.load_model(
        cfg.resolve(cfg.RETINANET_MODEL, cfg.model_path) +
        cfg.resolve(cfg.RETINANET_MODEL, cfg.model_name),
        backbone_name=cfg.resolve(cfg.RETINANET_MODEL, cfg.backbone_name))
예제 #5
0
def initialize_elastic_search():
    global db_asset, db_cropped, db_asset_meta
    connections.create_connection(hosts=cfg.resolve(cfg.ELASTICSEARCH_SERVER,
                                                    cfg.host),
                                  port=cfg.resolve(cfg.ELASTICSEARCH_SERVER,
                                                   cfg.port),
                                  timeout=20)

    db_asset = Index(
        cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) +
        cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_asset))
    if not db_asset.exists():
        db_asset.doc_type(EsAsset)
        db_asset.create()
    db_asset_meta = Index(
        cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) +
        cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_asset_meta))
    if not db_asset_meta.exists():
        db_asset_meta.doc_type(EsAssetMeta)
        db_asset_meta.create()
    db_cropped = Index(
        cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) +
        cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_cropped))
    if not db_cropped.exists():
        db_cropped.doc_type(EsCropped)
        db_cropped.create()

    logger.info("Elastic search initialized!")
예제 #6
0
def backup_persisting_files():
    global round_robin_backup_index
    round_robin_backup_index += 1
    round_robin_backup_index %= cfg.resolve_int(
        cfg.CRON_JOB, cfg.cron_job_round_robin_backups)

    # copy faiss file
    path = cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path)
    file = os.path.join(path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_file))
    copyfile(file, file + '.backup_{}'.format(round_robin_backup_index))

    # copy blacklist file
    file = os.path.join(
        path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_blacklist_file))
    copyfile(file, file + '.backup_{}'.format(round_robin_backup_index))
예제 #7
0
def initialize_blacklist():
    global blacklist, blacklist_mutex
    blacklist_mutex = Lock()

    path = cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path)
    if not os.path.exists(path):
        os.mkdir(path)
    file = os.path.join(
        path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_blacklist_file))
    if not os.path.exists(file):
        Path(file).touch()
    with open(file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            blacklist.append(int(row['index']))
예제 #8
0
def remove_cropped_if_asset_exists(asset):
    try:
        search = Search(index=cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) +
                              cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_asset_meta))
        search.query = Q('match', asset_id=asset.asset_id)
        search.exclude()
        for hit in search:
            idx = '{}-{}'.format(asset.asset_id, hit.cropped_id)
            s = Search(index=cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) +
                             cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_cropped))
            s.query = Q('match', id=idx)
            s.delete()
        search.delete()
    except:
        print(sys.exc_info()[0])
예제 #9
0
def initialize_similarity_index():
    global index
    path = cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_path)
    if not os.path.exists(path):
        os.mkdir(path)

    file = os.path.join(path, cfg.resolve(cfg.FAISS_SETTINGS, cfg.index_file))
    if not os.path.exists(file):
        index = faiss.IndexFlatIP(
            cfg.resolve_int(cfg.FAISS_SETTINGS, cfg.index_size))
        persist_similarity_index()
    else:
        try:
            index = faiss.read_index(file)
            logger.info("Faiss index loaded")
        except (OSError, TypeError, NameError):
            index = faiss.read_index(file)
            logger.error("Can't load index! Using default empty index")
예제 #10
0
def initialize_logging():
    print('Initializing logging...')
    # set up logging to file - see previous section for more details
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
        datefmt='%m-%d %H:%M',
        filename=cfg.resolve(cfg.DEFAULT, cfg.log_dir) +
        cfg.resolve(cfg.DEFAULT, cfg.log_name),
        filemode='a')
    # define a Handler which writes INFO messages or higher to the sys.stderr
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    # set a format which is simpler for console use
    formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
    # tell the handler to use this format
    console.setFormatter(formatter)
    # add the handler to the root logger
    logging.getLogger('').addHandler(console)
예제 #11
0
def index_original_image(img, asset):
    # save original image
    original_dir = cfg.resolve(cfg.CLASSIFICATION, cfg.original_images_path)
    if not os.path.exists(original_dir):
        os.makedirs(original_dir)
        logger.info('Created new dir: {}'.format(original_dir))
    ori_file_name = '{}/{}.png'.format(original_dir, asset.asset_id)
    cv2.imwrite(ori_file_name, img)

    # insert asset into database
    logger.info('Creating validation generator...')
    es_asset = EsAsset(meta={'id': asset.asset_id},
                       asset_id=asset.asset_id,
                       path=ori_file_name)
    es_asset.save()
    return ori_file_name
예제 #12
0
def index_cropped_image(asset, img, label_name, idx, insert=False):
    # save cropped image
    extraction_dir = '{}/{}'.format(cfg.resolve(cfg.CLASSIFICATION, cfg.extracted_images_path), label_name)
    if not os.path.exists(extraction_dir):
        os.makedirs(extraction_dir)
        logger.info('Created new dir: {}'.format(extraction_dir))
    cropped_file_name = '{}/{}-{}.png'.format(extraction_dir, asset.asset_id, idx)
    logger.info('Extracted image: {}'.format(cropped_file_name))
    converted_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    cv2.imwrite(cropped_file_name, converted_img)
    # insert cropped image into database
    if insert:
        es_cropped = EsCropped(meta={'id': '{}-{}'.format(asset.asset_id, idx)},
                               asset_id=asset.asset_id,
                               path=cropped_file_name)
        es_cropped.save()
    return cropped_file_name
예제 #13
0
@app.route('/services/v1/shutdown', methods=['GET'])
def shutdown_hook():
    core.trigger_backup()
    sys.exit()


@app.route('/services/v1/index/init', methods=['GET'])
def init_similarity_index():
    core.initialize_elastic_search()
    return Response(status=200)


@app.before_first_request
def initialize():
    core.initialize_similarity_index()
    core.initialize_blacklist()
    core.initialize_elastic_search()
    core.initialize_retinanet()
    core.initialize_extraction_model()
    core.initialize_cron_job()


if __name__ == '__main__':
    core.initialize_logging()
    logger.info('Server app started!')
    app.run(host=cfg.resolve(cfg.RETINANET_SERVER, cfg.host),
            port=cfg.resolve_int(cfg.RETINANET_SERVER, cfg.port),
            debug=cfg.resolve_bool(cfg.RETINANET_SERVER, cfg.debug),
            threaded=cfg.resolve_bool(cfg.RETINANET_SERVER, cfg.threaded))
예제 #14
0
 class Meta:
     index = cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) + \
             cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_asset_meta)
예제 #15
0
 class Meta:
     index = cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) + \
             cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_cropped)
예제 #16
0
def classify_content(content):
    # create a generator for fetching data
    urls = []
    for asset in content.assets:
        urls.append(asset.url)
    # prepare images for download
    val_generator = UrlGenerator(urls,
                                 cfg.resolve(cfg.RETINANET_MODEL, cfg.classes_file),
                                 cfg.resolve(cfg.RETINANET_MODEL, cfg.labels_file))

    response = Response()
    # load image
    for i, asset in enumerate(content.assets):
        logger.info('Running classification on: {}'.format(asset.url))
        # initialize result object
        result = Result()
        result.url = asset.url
        result.asset_id = asset.asset_id

        logger.info('Reading image bgr...')
        try:
            # fetch images
            image = val_generator.read_image_bgr(i)
            # index original image for searching
            if content.insert:
                index_original_image(image, asset)
        except (OSError, ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError):
            logger.warning('Skipped: Unable to reach resource')
            continue
        except:
            err = traceback.format_exc()
            logger.error('Could not read image: {}'.format(err))
            continue

        # copy to draw on
        logger.info('Drawing cvt color...')
        draw = np.asarray(image.copy())
        draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

        # pre-process the image for the network
        logger.info('Processing image...')
        image = val_generator.preprocess_image(image)
        image, scale = val_generator.resize_image(image)

        # classify image
        start = time.time()
        boxes, scores, labels = core.model.predict_on_batch(np.expand_dims(image, axis=0))
        elapsed = time.time() - start
        logger.info('Processing time: {}'.format(elapsed))
        result.time = str(elapsed)
        boxes /= scale
        # process and save detections
        idx = 0
        for box, score, label in zip(boxes[0], scores[0], labels[0]):
            if score < cfg.resolve_float(cfg.CLASSIFICATION, cfg.min_confidence):
                continue
            # get position data
            box = boxes[0, idx, :4].astype(int)
            label_name = val_generator.label_to_name(label)
            # save meta-info for REST API response
            caption = Caption(str(label),
                              label_name,
                              str(score),
                              '{};{}'.format(box[0], box[1]),   # x1;y1
                              '{};{}'.format(box[2], box[3]))   # x2;y2
            result.captions.append(caption)
            # Crop image for extraction
            h = box[3] - box[1]
            w = box[2] - box[0]
            cropped_img = draw[box[1]:(box[1] + h), box[0]:(box[0] + w)]

            if content.insert:
                # update sequence to remove previous index if available
                remove_cropped_if_asset_exists(asset)

            # process cropped image fragment for searching
            cropped_file_name = index_cropped_image(asset, cropped_img, label_name, idx, insert=content.insert)
            features = extract_features(cropped_file_name)
            faiss_features = features.reshape((1, cfg.resolve_int(cfg.FAISS_SETTINGS, cfg.index_size)))

            # add or clean image
            if content.insert:
                # add feature to faiss index
                core.index.add(faiss_features)
            else:
                # clean temp image again
                os.remove(cropped_file_name)

            # index caption
            if content.insert:
                index_asset_meta(asset, idx, caption, features.tolist(), core.index.ntotal - 1)

            # find similar suggestions and handle response
            asset_metas = get_similar_asset_metas(faiss_features,
                                                  cfg.resolve_int(cfg.FAISS_SETTINGS, cfg.index_n_similar_results))
            handle_suggestion_response(result, asset.asset_id, asset_metas)
            idx += 1

        # add result to response list
        response.result_list.append(result)
    return response
예제 #17
0
import config_accessor as cfg
from elasticsearch_dsl import DocType, Keyword, Text

import logging
logger = logging.getLogger('celum.models_es')

search_index_prefix = cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix)


class EsAsset(DocType):
    asset_id = Keyword()
    path = Text()

    class Meta:
        index = cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_prefix) + \
                cfg.resolve(cfg.ELASTICSEARCH_SERVER, cfg.index_asset)

    def save(self, **kwargs):
        return super(EsAsset, self).save(**kwargs)


class EsAssetMeta(DocType):
    asset_id = Keyword()
    cropped_id = Keyword()
    faiss_idx = Text()
    label = Text()
    score = Text()
    top_left = Text()
    bottom_right = Text()
    feature = Text()