Ejemplo n.º 1
0
def retrieve_maskings(project, scene_ids):
    session = OrmSession(project)
    stitching_data = get_stitching_data(project, scene_ids, session)

    scene_probes = set(stitching_data.probe_id.tolist())

    # get from the project rds
    if Config.get_cloud() == Config.GCP:
        masks_from_rds = get_masks_from_rds_gcp(project,
                                                scene_probes,
                                                session=session)
    else:
        masks_from_rds = get_masks_from_rds(project,
                                            scene_probes,
                                            session=session)

    if masks_from_rds.empty:
        rds_probes = set([])
    else:
        rds_probes = set(masks_from_rds.probe_id)
    Log.info("Missing {} out of {} probes from RDS".format(
        len(scene_probes - rds_probes), len(scene_probes)))
    mongo_query_probes = scene_probes - rds_probes
    scene_probes = scene_probes - rds_probes
    if len(mongo_query_probes) > 0:
        probe_masks = get_masks(project, mongo_query_probes)
        if probe_masks.empty:
            mongo_probes = set([])
        else:
            mongo_probes = set(probe_masks.probe_id)
    else:
        mongo_probes = set([])
        probe_masks = pd.DataFrame(columns=masks_from_rds.columns)

    Log.info("Missing {} out of {} probes from Mongo".format(
        len(scene_probes - mongo_probes), len(scene_probes)))

    athena_query_probes = scene_probes - mongo_probes
    scene_probes = scene_probes - mongo_probes
    if len(athena_query_probes) > 0:
        try:
            probe_masks_athena = get_masks_archive(project,
                                                   athena_query_probes,
                                                   session)
            athena_probes = set(probe_masks_athena.probe_id)
        except:
            Log.warning('Unable to connect to AWS Athena')
            probe_masks_athena = pd.DataFrame(columns=masks_from_rds.columns)
            athena_probes = set([])
    else:
        probe_masks_athena = pd.DataFrame(columns=masks_from_rds.columns)
        athena_probes = set([])

    if len(scene_probes - athena_probes) > 0:
        Log.warning("{} Probes are missing masks in all sources: {}".format(
            len(scene_probes - athena_probes), scene_probes - athena_probes))
    probe_masks = pd.concat([masks_from_rds, probe_masks, probe_masks_athena])

    return stitching_data.merge(probe_masks, on='probe_id', how='inner')
Ejemplo n.º 2
0
def retrieve_maskings_flat(project, scene_ids):
    session = OrmSession(project)
    stitching_data = get_stitching_data(project, scene_ids, session)

    scene_probes = set(stitching_data.probe_id.tolist())
    if Config.get_cloud() == Config.GCP:
        probe_maskings = get_masks_from_rds_gcp(project,
                                                scene_probes,
                                                session=session)
    else:
        probe_maskings = get_masks_from_rds(project,
                                            scene_probes,
                                            session=session)
    for col in ['x1', 'x2', 'y1', 'y2']:
        probe_maskings[col] = probe_maskings[col].astype(float)
    return probe_maskings, stitching_data
Ejemplo n.º 3
0
def get_masks(project,
              probe_ids,
              limit=None,
              detection_mode=False,
              unique=False,
              orm_session=None,
              allow_athena=False):
    if detection_mode:
        raise Exception("MaskingEngine detector mode is not supported")

    try:
        probe_ids = set(probe_ids)
    except TypeError:
        probe_ids = set([probe_ids])

    with WithAgent(orm_session, OrmSession, project) as session:
        # get from the project rds
        if Config.get_cloud() == Config.GCP:
            masks_from_rds = get_masks_from_rds_gcp(project,
                                                    probe_ids,
                                                    session=session)
        else:
            masks_from_rds = get_masks_from_rds(project,
                                                probe_ids,
                                                session=session)

        if masks_from_rds.empty:
            rds_probes = set([])
        else:
            rds_probes = set(masks_from_rds.probe_id)

        probes_without_masks = probe_ids - rds_probes
        if len(probes_without_masks) > 0:
            Log.info("Missing masks from RDS",
                     extra={
                         'missing': len(probes_without_masks),
                         'searched': len(probe_ids)
                     })

        # get from the project mongo as fallback
        probe_ids = probes_without_masks
        if len(probe_ids) > 0:
            probe_masks = get_masks_from_mongo(project, probe_ids)
            if probe_masks.empty:
                mongo_probes = set([])
            else:
                mongo_probes = set(probe_masks.probe_id)
        else:
            mongo_probes = set([])
            probe_masks = pd.DataFrame(columns=masks_from_rds.columns)

        probes_without_masks = probe_ids - mongo_probes
        if len(probes_without_masks) > 0:
            Log.warning("Missing masks from Mongo",
                        extra={
                            'missing': len(probes_without_masks),
                            'searched': len(probe_ids)
                        })

        # get from the project athena as fallback
        probe_ids = probes_without_masks
        if allow_athena and len(probe_ids) > 0:
            probe_masks_athena = get_masks_archive(project, probe_ids, session)
            athena_probes = set(probe_masks_athena.probe_id)
            if not athena_probes:
                Log.warning(
                    "No masks were found in athena (waste of resources)")
        else:
            probe_masks_athena = pd.DataFrame(columns=masks_from_rds.columns)
            athena_probes = set([])

        probes_without_masks = probe_ids - athena_probes
        if allow_athena and len(probes_without_masks) > 0:
            Log.error("Missing masks from Athena",
                      extra={
                          'missing': len(probes_without_masks),
                          'searched': len(probe_ids)
                      })

        probe_masks = pd.concat(
            [masks_from_rds, probe_masks, probe_masks_athena])

    return probe_masks