def retrieve_maskings(project, scene_ids): session = OrmSession(project) stitching_data = get_stitching_data(project, scene_ids, session) scene_probes = set(stitching_data.probe_id.tolist()) # get from the project rds if Config.get_cloud() == Config.GCP: masks_from_rds = get_masks_from_rds_gcp(project, scene_probes, session=session) else: masks_from_rds = get_masks_from_rds(project, scene_probes, session=session) if masks_from_rds.empty: rds_probes = set([]) else: rds_probes = set(masks_from_rds.probe_id) Log.info("Missing {} out of {} probes from RDS".format( len(scene_probes - rds_probes), len(scene_probes))) mongo_query_probes = scene_probes - rds_probes scene_probes = scene_probes - rds_probes if len(mongo_query_probes) > 0: probe_masks = get_masks(project, mongo_query_probes) if probe_masks.empty: mongo_probes = set([]) else: mongo_probes = set(probe_masks.probe_id) else: mongo_probes = set([]) probe_masks = pd.DataFrame(columns=masks_from_rds.columns) Log.info("Missing {} out of {} probes from Mongo".format( len(scene_probes - mongo_probes), len(scene_probes))) athena_query_probes = scene_probes - mongo_probes scene_probes = scene_probes - mongo_probes if len(athena_query_probes) > 0: try: probe_masks_athena = get_masks_archive(project, athena_query_probes, session) athena_probes = set(probe_masks_athena.probe_id) except: Log.warning('Unable to connect to AWS Athena') probe_masks_athena = pd.DataFrame(columns=masks_from_rds.columns) athena_probes = set([]) else: probe_masks_athena = pd.DataFrame(columns=masks_from_rds.columns) athena_probes = set([]) if len(scene_probes - athena_probes) > 0: Log.warning("{} Probes are missing masks in all sources: {}".format( len(scene_probes - athena_probes), scene_probes - athena_probes)) probe_masks = pd.concat([masks_from_rds, probe_masks, probe_masks_athena]) return stitching_data.merge(probe_masks, on='probe_id', how='inner')
def retrieve_maskings_flat(project, scene_ids): session = OrmSession(project) stitching_data = get_stitching_data(project, scene_ids, session) scene_probes = set(stitching_data.probe_id.tolist()) if Config.get_cloud() == Config.GCP: probe_maskings = get_masks_from_rds_gcp(project, scene_probes, session=session) else: probe_maskings = get_masks_from_rds(project, scene_probes, session=session) for col in ['x1', 'x2', 'y1', 'y2']: probe_maskings[col] = probe_maskings[col].astype(float) return probe_maskings, stitching_data
def get_masks(project, probe_ids, limit=None, detection_mode=False, unique=False, orm_session=None, allow_athena=False): if detection_mode: raise Exception("MaskingEngine detector mode is not supported") try: probe_ids = set(probe_ids) except TypeError: probe_ids = set([probe_ids]) with WithAgent(orm_session, OrmSession, project) as session: # get from the project rds if Config.get_cloud() == Config.GCP: masks_from_rds = get_masks_from_rds_gcp(project, probe_ids, session=session) else: masks_from_rds = get_masks_from_rds(project, probe_ids, session=session) if masks_from_rds.empty: rds_probes = set([]) else: rds_probes = set(masks_from_rds.probe_id) probes_without_masks = probe_ids - rds_probes if len(probes_without_masks) > 0: Log.info("Missing masks from RDS", extra={ 'missing': len(probes_without_masks), 'searched': len(probe_ids) }) # get from the project mongo as fallback probe_ids = probes_without_masks if len(probe_ids) > 0: probe_masks = get_masks_from_mongo(project, probe_ids) if probe_masks.empty: mongo_probes = set([]) else: mongo_probes = set(probe_masks.probe_id) else: mongo_probes = set([]) probe_masks = pd.DataFrame(columns=masks_from_rds.columns) probes_without_masks = probe_ids - mongo_probes if len(probes_without_masks) > 0: Log.warning("Missing masks from Mongo", extra={ 'missing': len(probes_without_masks), 'searched': len(probe_ids) }) # get from the project athena as fallback probe_ids = probes_without_masks if allow_athena and len(probe_ids) > 0: probe_masks_athena = get_masks_archive(project, probe_ids, session) athena_probes = set(probe_masks_athena.probe_id) if not athena_probes: Log.warning( "No masks were found in athena (waste of resources)") else: probe_masks_athena = pd.DataFrame(columns=masks_from_rds.columns) athena_probes = set([]) probes_without_masks = probe_ids - athena_probes if allow_athena and len(probes_without_masks) > 0: Log.error("Missing masks from Athena", extra={ 'missing': len(probes_without_masks), 'searched': len(probe_ids) }) probe_masks = pd.concat( [masks_from_rds, probe_masks, probe_masks_athena]) return probe_masks