def generate_insert(self): result = [] # first insert fields for which we have no change record staging_alias = catalog.aliases[self.target_alias['load'] ['staging_alias']] # create a subselect from the transform query and add column # gdw_state_dts_range = all timestamp ranges for which this row is valid select_sql = sqlalchemy.alias(self.select_sql, 'source_transform') select_sql_columns = [ select_sql.corresponding_column(c) for c in self.select_sql.c ] gdw_state_dts_range = func.prioritize_ranges( func.array_agg( func.tsrange(literal_column('gdw_state_start'), literal_column('gdw_state_end'))).over( partition_by=[ literal_column(c) for c in self.object_key_columns ], order_by=self.priority_order)).label( 'gdw_state_dts_range') select_sql_columns.append(gdw_state_dts_range) result.append(staging_alias.sql_table.insert().from_select( self.stage_col_names, select(select_sql_columns))) # select all from existing dimension. look up the object key in stage and remove from # gdw_state_dts_range that already is calculated on stage select_sql = sqlalchemy.alias(self.select_sql, 'source_transform') select_sql_columns = [ select_sql.corresponding_column(c) for c in self.select_sql.c ] gdw_state_dts_range = func.prioritize_ranges( func.array_agg( func.tsrange(literal_column('gdw_state_start'), literal_column('gdw_state_end'))).over( partition_by=[ literal_column(c) for c in self.object_key_columns ], order_by=self.priority_order)).label( 'gdw_state_dts_range') select_sql_columns.append(gdw_state_dts_range) result.append(staging_alias.sql_table.insert().from_select( self.stage_col_names, select(select_sql_columns))) return result
def create_area_filter(user): if not user.has_area_filter: return None filtered_area_ids = DBSession. \ query(func.array_agg(FilterArea.area_id)). \ filter(FilterArea.user_id == user.id). \ group_by(FilterArea.user_id). \ subquery('filtered_area_ids') return DocumentChange.area_ids.op('&&')(filtered_area_ids)
def create_followed_users_filter(user): if not user.is_following_users: return None followed_users = DBSession. \ query(func.array_agg(FollowedUser.followed_user_id)). \ filter(FollowedUser.follower_user_id == user.id). \ group_by(FollowedUser.follower_user_id). \ subquery('followed_users') return DocumentChange.user_ids.op('&&')(followed_users)
def create_area_filter(user): if not user.has_area_filter: return None filtered_area_ids = ( DBSession.query(func.array_agg(FilterArea.area_id)) .filter(FilterArea.user_id == user.id) .group_by(FilterArea.user_id) .subquery("filtered_area_ids") ) return DocumentChange.area_ids.op("&&")(filtered_area_ids)
def create_followed_users_filter(user): if not user.is_following_users: return None followed_users = ( DBSession.query(func.array_agg(FollowedUser.followed_user_id)) .filter(FollowedUser.follower_user_id == user.id) .group_by(FollowedUser.follower_user_id) .subquery("followed_users") ) return DocumentChange.user_ids.op("&&")(followed_users)
def update_langs_of_changes(document_id): """Update the langs of all feed entries of the given document. """ langs = DBSession. \ query(cast( func.array_agg(DocumentLocale.lang), ArrayOfEnum(enums.lang))). \ filter(DocumentLocale.document_id == document_id). \ group_by(DocumentLocale.document_id). \ subquery('langs') DBSession.execute(DocumentChange.__table__.update().where( DocumentChange.document_id == document_id).values( langs=langs.select()))
def _get_select_users_for_routes_aggregated(): """ Returns a select which retrieves for every route the ids of associated users. """ return \ select([ DocumentTag.document_id.label('route_id'), func.array_agg( DocumentTag.user_id, type_=postgresql.ARRAY(Integer)).label('user_ids') ]). \ select_from(DocumentTag). \ group_by(DocumentTag.document_id)
def update_areas_of_changes(document): """Update the area ids of all feed entries of the given document. """ areas_select = select( [ # concatenate with empty array to avoid null values # select ARRAY[]::integer[] || array_agg(area_id) literal_column('ARRAY[]::integer[]').op('||')( func.array_agg( AreaAssociation.area_id, type_=postgresql.ARRAY(Integer))) ]).\ where(AreaAssociation.document_id == document.document_id) DBSession.execute(DocumentChange.__table__.update().where( DocumentChange.document_id == document.document_id).values( area_ids=areas_select.as_scalar()))
def _get_select_waypoints_for_routes_aggregated(): """ Returns a select which retrieves for every route the ids for the waypoints that are associated to the route. It also returns the parent and grand-parent of waypoints, so that when searching for routes for a waypoint, you also get the routes associated to child waypoints. E.g. when searching for routes for Calanques, you also get the routes associated to sub-sectors. """ all_waypoints = _get_select_waypoints_for_routes() return \ select([ all_waypoints.c.route_id.label('route_id'), func.array_agg( all_waypoints.c.waypoint_id, type_=postgresql.ARRAY(Integer)).label('waypoint_ids') ]). \ select_from(all_waypoints). \ group_by(all_waypoints.c.route_id)
def _get_select_routes_for_outings_aggregated(): """ Returns a select which retrieves for every outing the ids of associated routes. """ outing_type = text('\'' + OUTING_TYPE + '\'') route_type = text('\'' + ROUTE_TYPE + '\'') return \ select([ Association.child_document_id.label('outing_id'), func.array_agg( Association.parent_document_id, type_=postgresql.ARRAY(Integer)).label('route_ids') ]). \ select_from(Association). \ where(and_( Association.parent_document_type == route_type, Association.child_document_type == outing_type )). \ group_by(Association.child_document_id)
def update_areas_of_changes(document): """Update the area ids of all feed entries of the given document. """ areas_select = select( [ # concatenate with empty array to avoid null values # select ARRAY[]::integer[] || array_agg(area_id) literal_column('ARRAY[]::integer[]').op('||')( func.array_agg( AreaAssociation.area_id, type_=postgresql.ARRAY(Integer))) ]).\ where(AreaAssociation.document_id == document.document_id) DBSession.execute( DocumentChange.__table__.update(). where(DocumentChange.document_id == document.document_id). values(area_ids=areas_select.as_scalar()) )
def _get_select_waypoints_for_outings_aggregated(): """ Returns a select which retrieves for every outing the ids for the waypoints that are associated to routes associated to the outing. It also returns the parent and grand-parent of waypoints, so that when searching for outings for a waypoint, you also get the outings associated to child waypoints. E.g. when searching for outings in Calanques, you also get the outings associated to sub-sectors. """ outing_type = text('\'' + OUTING_TYPE + '\'') route_type = text('\'' + ROUTE_TYPE + '\'') all_waypoints_for_routes = _get_select_waypoints_for_routes() waypoints_for_outings = \ select([ Association.child_document_id.label('outing_id'), all_waypoints_for_routes.c.waypoint_id ]). \ select_from(join( Association, all_waypoints_for_routes, and_( Association.parent_document_id == all_waypoints_for_routes.c.route_id, Association.parent_document_type == route_type, Association.child_document_type == outing_type ))). \ cte('waypoints_for_outings') return \ select([ waypoints_for_outings.c.outing_id.label('outing_id'), func.array_agg( waypoints_for_outings.c.waypoint_id, type_=postgresql.ARRAY(Integer)).label('waypoint_ids') ]). \ select_from(waypoints_for_outings). \ group_by(waypoints_for_outings.c.outing_id)
def sensi_report(info_role): """ get the UUID report of a dataset .. :quickref: Metadata; """ """ get the UUID report of a dataset .. :quickref: Metadata; """ params = request.args ds_id = params["id_dataset"] dataset = TDatasets.query.get_or_404(ds_id) id_import = params.get("id_import") id_module = params.get("id_module") query = (DB.session.query( Synthese, func.taxonomie.find_cdref(Synthese.cd_nom).label("cd_ref"), func.array_agg(LAreas.area_name).label("codeDepartementCalcule"), func.ref_nomenclatures.get_cd_nomenclature( Synthese.id_nomenclature_sensitivity).label("cd_sensi"), func.ref_nomenclatures.get_nomenclature_label( Synthese.id_nomenclature_bio_status, "fr").label("occStatutBiologique"), func.min(CorSensitivitySynthese.meta_update_date).label( "sensiDateAttribution"), func.min( CorSensitivitySynthese.sensitivity_comment).label("sensiAlerte"), TNomenclatures.cd_nomenclature, TNomenclatures.label_fr).select_from(Synthese).outerjoin( CorAreaSynthese, CorAreaSynthese.id_synthese == Synthese.id_synthese).outerjoin( LAreas, LAreas.id_area == CorAreaSynthese.id_area).outerjoin( CorSensitivitySynthese, CorSensitivitySynthese.uuid_attached_row == Synthese.unique_id_sinp, ).outerjoin( TNomenclatures, TNomenclatures.id_nomenclature == Synthese.id_nomenclature_sensitivity).filter( LAreas.id_type == func.ref_geo.get_id_area_type("DEP")) ) if id_module: query = query.filter(Synthese.id_module == id_module) query = query.filter(Synthese.id_dataset == ds_id) if id_import: query = query.outerjoin( TSources, TSources.id_source == Synthese.id_source).filter( TSources.name_source == "Import(id={})".format(id_import)) data = query.group_by(Synthese.id_synthese, TNomenclatures.cd_nomenclature, TNomenclatures.label_fr).all() str_productor = "" header = "" if len(data) > 0: index_productor = -1 if dataset.cor_dataset_actor: for index, actor in enumerate(dataset.cor_dataset_actor): # cd_nomenclature producteur = 6 if actor.nomenclature_actor_role.cd_nomenclature == "6": index_productor = index productor = (dataset.cor_dataset_actor[index_productor] if index_productor != -1 else None) if productor: if not productor.organism: str_productor = productor.role.nom_complet else: str_productor = productor.organism.nom_organisme data = [{ "cdNom": row.Synthese.cd_nom, "cdRef": row.cd_ref, "codeDepartementCalcule": ", ".join(row.codeDepartementCalcule), "identifiantOrigine": row.Synthese.entity_source_pk_value, "occStatutBiologique": row.occStatutBiologique, "identifiantPermanent": row.Synthese.unique_id_sinp, "sensiAlerte": row.sensiAlerte, "sensible": "Oui" if row.cd_sensi != "0" else "Non", "sensiDateAttribution": row.sensiDateAttribution, "sensiNiveau": f"{row.cd_nomenclature} = {row.label_fr}", } for row in data] sensi_version = DB.session.query( func.gn_commons.get_default_parameter( 'ref_sensi_version')).one_or_none() if sensi_version: sensi_version = sensi_version[0] # set an header only if the rapport is on a dataset header = f""""Rapport de sensibilité" "Jeu de données";"{dataset.dataset_name}" "Identifiant interne";"{dataset.id_dataset}" "Identifiant SINP";"{dataset.unique_dataset_id}" "Organisme/personne fournisseur";"{str_productor}" "Date de création du rapport";"{dt.datetime.now().strftime("%d/%m/%Y %Hh%M")}" "Nombre de données sensibles";"{len(list(filter(lambda row: row["sensible"] == "Oui", data)))}" "Nombre de données total dans le fichier";"{len(data)}" "sensiVersionReferentiel";"{sensi_version}" """ return my_csv_resp( filename="filename", data=data, columns=[ "cdNom", "cdRef", "codeDepartementCalcule", "identifiantOrigine", "occStatutBiologique", "identifiantPermanent", "sensiAlerte", "sensible", "sensiDateAttribution", "sensiNiveau", ], _header=header, )