def test_user_insert(db_test_client): from application import db db.insert_user("oppilas", "oppilas", "Tessa", "Testaaja") with pytest.raises(IntegrityError): db.insert_user("oppilas", "oppilas", "Tessa", "Testaaja") j = db.account.join(db.role) sql = Select([func.count(db.account.c.username), db.role.c.name]).select_from(j) with db.engine.connect() as conn: rs = conn.execute(sql) row = rs.first() count = row[0] role = row[1] assert 1 == count assert "USER" == role db.insert_user("opettaja", "opettaja", "Essi", "Esimerkki", role="TEACHER") sql = Select([func.count(db.account.c.username), db.role.c.name ]).select_from(j).where(db.role.c.name == "TEACHER") with db.engine.connect() as conn: rs = conn.execute(sql) row = rs.first() count = row[0] role = row[1] assert 1 == count assert "TEACHER" == role student = db.get_user_by_id(1) teacher = db.get_user_by_id(2) null = db.get_user_by_id(3) assert student.name == "oppilas" assert teacher.name == "opettaja" assert null == None
def test_blending_2_2(self): q1 = Select( columns=[ column('ad_id'), column('impressions'), column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME) ], from_obj=table('table1'), ) df1 = Dataframe( q1, get_mocked_dataframe_columns_map(['ad_id', 'impressions']), set(), {'SF'}) q2 = Select( columns=[ column('ad_id'), column('campaign_id'), column('impressions'), column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME), ], from_obj=table('table2'), ) df2 = Dataframe( q2, get_mocked_dataframe_columns_map( ['ad_id', 'impressions', 'campaign_id']), set(), {'SF'}) blended_df = blend_dataframes(SNOWFLAKE_HUSKY_CONTEXT, [df1, df2]) self.write_test_expectations('query.sql', compile_query(blended_df.query)) expected_query = self.read_test_expectations('query.sql') self.assertEqual(expected_query, compile_query(blended_df.query)) self.assertEqual({'ad_id', 'impressions', 'campaign_id'}, set(blended_df.slug_to_column.keys()))
def test_execute_select_process_result_value(mocked_client, mocker) -> None: mocked_client.execute_statement.return_value = { 'numberOfRecordsUpdated': 0, 'records': [[{ 'longValue': 1 }, { 'stringValue': 'cat' }]], 'columnMetadata': [ { "arrayBaseColumnType": 0, "isAutoIncrement": False, "isCaseSensitive": False, "isCurrency": False, "isSigned": True, "label": "id", "name": "id", "nullable": 1, "precision": 11, "scale": 0, "schemaName": "", "tableName": "pets", "type": 4, "typeName": "INT", }, { "arrayBaseColumnType": 0, "isAutoIncrement": False, "isCaseSensitive": False, "isCurrency": False, "isSigned": False, "label": "name", "name": "name", "nullable": 1, "precision": 255, "scale": 0, "schemaName": "", "tableName": "pets", "type": 12, "typeName": "VARCHAR", }, ], } data_api = DataAPI( resource_arn='arn:aws:rds:dummy', secret_arn='dummy', database='test', client=mocked_client, ) assert list(data_api.execute(Select([Pets]))[0]) == [1, 'my_type_cat'] assert mocked_client.execute_statement.call_args == mocker.call( continueAfterTimeout=True, database='test', includeResultMetadata=True, resourceArn='arn:aws:rds:dummy', secretArn='dummy', sql="""SELECT pets.id, pets.name FROM pets""", )
async def get_exercises(workout_id): w = WorkoutExercise.__table__ e = Exercise.__table__ query = Select(columns=[*w.c, e.c.name.label('exercise_name')]) \ .select_from(w.join(e)) \ .where(w.c.workout_id == workout_id) return await db.fetch_all(query)
async def list(user_id: str): e = Exercise.__table__ w = Workout.__table__ query = Select(columns=[*e.c, w.c.date.label('last_workout_date')]) \ .select_from(e.outerjoin(w)) \ .where(e.c.user_id == user_id) \ .where(e.c.is_deleted == false()) \ .order_by(nullslast(desc(w.c.date))) return await db.fetch_all(query)
def create_single_query_mock(data_source_name): """ convenience fn to create sqlalchemy's Select clausewith some column and table. """ return Select( columns=[ column(data_source_name + '_column_mock'), column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME) ], from_obj=text(data_source_name + '_table_mock'), )
async def view_exercise_history(exercise_id: str): we = WorkoutExercise.__table__ workouts = Workout.__table__ query = Select(columns=[*we.c, workouts.c.date.label('workout_date'), workouts.c.id.label('workout_id')]) \ .select_from(we.join(workouts)) \ .where(workouts.c.is_deleted == false()) \ .where(we.c.exercise_id == exercise_id) \ .order_by(desc(workouts.c.date)) return await db.fetch_all(query)
async def check_entity_belongs_to_user(table: Table, entity_id: str, user_id: str): query = Select(columns=[func.count().label('cnt')]) \ .select_from(table) \ .where(table.c.user_id == user_id) \ .where(table.c.is_deleted == false()) \ .where(table.c.id == entity_id) row = await db.fetch_one(query) result = dict(row)['cnt'] != 0 if row is not None else False if not result: raise CustomException('Недостаточно прав для просмотра записи')
def main(**kwargs): # use ti for xcom push and pull ti = kwargs["ti"] # try to read user buffer csv # if not found set param_id = 0 user_id = 0 buffer_dir = "/Users/muhammadsyamsularifin/airflow/buffer_data/users.csv" try: df = pd.read_csv(buffer_dir) user_id = int(df.tail(1)["id"]) except FileNotFoundError: pass # select data from db, with id greater than variable "user_id" query = Select([Users]).where(Users.c.id>user_id).limit(3) result = conn.execute(query) # prepare pandas dataframe, new and empty column = [ "id", "username", "address", "is_active", "domicile", "balance", "point" ] df = pd.DataFrame(columns=column) # insert it into pandas for id, username, address, is_active, domicile, balance, point in result: new_row = { "id": id, "username": username, "address": address, "is_active": is_active, "domicile": domicile, "balance": float(balance), "point": float(point) } df = df.append(new_row, ignore_index=True) # if no data extracted, tell xcom that it is done if (len(df["id"]) == 0): ti.xcom_push(key="extract_user_done", value=1) return None # save to csv file buffer_dir = "/Users/muhammadsyamsularifin/airflow/buffer_data/users.csv" df.to_csv(buffer_dir, index=False)
def visit(self): self._expr['selectList']['key'] = 'selectList' clause = Select(self._expr['selectList']).visit() self._expr['selectList'].pop('key') for key, expr in self._expr.items(): if key == 'key': continue if key == 'selectList': continue expr['key'] = key clause = self.access(clause, expr) expr.pop('key') return clause
def test_db_file_insert_constraint(db_test_client): from application import db s="nothing" db.insert_user(s,s,s,s,role="TEACHER") user_id = db.get_user(s,s).get_id() with pytest.raises(IntegrityError): file_insert_helper(db, user_id=user_id, binary_file=io.BytesIO(b"helvetin turhia bitteja").read()) sql = Select([db.file]) with db.engine.connect() as conn: rs = conn.execute(sql) row = rs.first() assert row is None
def _project_columns( cls, query: Select, dataframe: Dataframe, return_taxons: Dict[TaxonExpressionStr, Taxon] ) -> Tuple[List[ColumnAndDataframeColumn], Select]: projected_sql_and_df_columns: List[ColumnAndDataframeColumn] = [ cls._project_column( query, taxon, dataframe.slug_to_column.get(taxon_slug_expression)) for taxon_slug_expression, taxon in return_taxons.items() ] return ( projected_sql_and_df_columns, Select(columns=sort_columns( [col for col, _ in projected_sql_and_df_columns])), )
def calculate_dataframe( cls, dimension_formulas: List[PreFormula], override_mappings_tel_data: OverrideMappingTelData, override_mapping_cte_map: Dict[OverrideMappingSlug, Select], df: Dataframe, ) -> Dataframe: select_columns = [] select_columns.extend(df.query.columns) for dim_formula in dimension_formulas: col = dim_formula.formula.label(dim_formula.label) select_columns.append(col) # add joins to relevant override mapping CTEs select_from_query = OverrideMappingSql.insert_cte_joins( df.query, override_mappings_tel_data, override_mapping_cte_map) query = Select(columns=sort_columns(select_columns)).select_from( select_from_query) return Dataframe(query, df.slug_to_column, df.used_model_names, df.used_physical_data_sources)
def render_direct_mapping(cls, mapping: OverrideMapping) -> Select: """Renders CTE for direct mapping as union of all values""" selects = [] for original, changed in mapping.definition: # using "literal" instead of "literal_column" here to force SQLAlchemy to bind constants as params (safe) if original is None: original_column = literal_column('CAST(NULL AS VARCHAR)') else: original_column = literal(original) if changed is None: changed_column = literal(cls.PANO_NULL) else: changed_column = literal(changed) selects.append( Select([ original_column.label(cls.ORIGINAL_COLUMN_NAME), changed_column.label(cls.CHANGED_COLUMN_NAME) ])) return union_all(*selects)
def change_objects_ownership(engine: Engine, database: str, target_role: str) -> None: stmt = Select( [ literal_column("table_type"), literal_column("table_schema"), literal_column("table_name"), ], from_obj=text(f"{database}.INFORMATION_SCHEMA.TABLES"), whereclause=literal_column("table_owner") == "DBT_PRODUCTION", ) with engine.begin() as tx: rp = tx.execute(stmt) objects = [( "TABLE" if object_type == "BASE TABLE" else object_type, schema, object_name, ) for object_type, schema, object_name in rp.fetchall()] for object_type, schema, object_name in objects: tx.execute( f"GRANT OWNERSHIP ON {object_type} {database}.{schema}.{object_name} TO ROLE {target_role} REVOKE CURRENT GRANTS" ).fetchall()
def query( cls, select_query: Select, taxon_model_info_map: Dict[str, TaxonModelInfo], projection_taxons: SlugExprTaxonMap, data_source: str, order_by: Optional[List[TaxonDataOrder]], limit: Optional[int], offset: Optional[int], used_physical_data_sources: Set[str], dimension_templates: Optional[List[SqlFormulaTemplate]] = None, ) -> Dataframe: """ Generates the final projected dataframe :param select_query: Original query fetching all necessary fields :param taxon_model_info_map: Map of taxon slug expression to taxon model info :param projection_taxons: List of taxons meant to be projected by the final query :param data_source: Virtual data source for this subrequest :param order_by: List of clauses for order by :param limit: Limit for the query :param offset: Offset for the query :param dimension_templates: List of dimension templates :return: Final dataframe including all requested taxons """ group_by = [] selectors = [] projected_df_columns: Dict[TaxonExpressionStr, DataframeColumn] = {} for taxon in projection_taxons.values(): # apply aggregation, if you need to agg_type = taxon.tel_metadata_aggregation_type if agg_type and agg_type in cls._AGGREGATION_FUNCTIONS_MAP: col = cls._AGGREGATION_FUNCTIONS_MAP[agg_type](column(taxon.slug_safe_sql_identifier)) else: col = column(taxon.slug_safe_sql_identifier) col = col.label(taxon.slug_safe_sql_identifier) # create appropriate dataframe column value_quality_type = ValueQuantityType.scalar if not taxon.calculation and taxon.slug_expr in taxon_model_info_map: value_quality_type = taxon_model_info_map[taxon.slug_expr].quantity_type df_column_name = TaxonExpressionStr(taxon.slug) projected_df_columns[df_column_name] = DataframeColumn(df_column_name, taxon, value_quality_type) # make sure we select this column in the query selectors.append(col) # check whether this taxon should be in group by clause if agg_type in cls._GROUP_BY_AGGREGATION_TYPES: group_by.append(col) # make sure we select all columns for dimension templates for dim_template in dimension_templates or []: col = column(dim_template.label) selectors.append(col) # we should group by all dimension templates group_by.append(col) # On purpose adding this value to emulate USING ON FALSE => PROD-8136 selectors.append(literal(data_source).label(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME)) # using literal_column here because some database engines do not like grouping by constant group_by.append(literal_column(HUSKY_QUERY_DATA_SOURCE_COLUMN_NAME)) # created this query new_query = Select( columns=sort_columns(selectors), order_by=[nullslast(ORDER_BY_FUNCTIONS[item.type](item.taxon)) for item in (order_by or [])], group_by=sort_columns(group_by), ).select_from(select_query) if limit is not None: new_query = new_query.limit(limit) if offset is not None: new_query = new_query.offset(offset) # collect names of all used models used_model_names = { model_info.model_name for model_info in taxon_model_info_map.values() if model_info.model_name is not None } return Dataframe(new_query, projected_df_columns, used_model_names, used_physical_data_sources)
query_estado_conexion = update(estado_conexion).where( estado_conexion.c.id == 1).values(estado="sin conexion") connection.execute(query_estado_conexion) # Remove from list for socket.socket() sockets_list.remove(notified_socket) # Remove from our list of users del clients[notified_socket] continue # Get user by notified socket, so we will know who sent the message user = clients[notified_socket] # Guardamos el mensaje recibido del cliente en datos datos = message["data"].decode("utf-8") # Separamos la cadena de datos por comas en sus respectivas variables temp, hum, ch1, ch2, hora = datos.split(",") # Selecciona el valor de la columna tipo de la tabla config t = Select([configuracion]) # Ejecutamos la seleccion anterior get_confi = connection.execute(t).fetchone() # Preguntamos que tipo de antena es 1: para DragonWave 2: para Siemens if (int(get_confi.tipo) == 1): actual = update(ahora).where(ahora.c.id == 1).values( temperatura=temp, humedad=hum, canal1=ch1, canal2=ch2, canal3=chan2.voltage, canal4=chan3.voltage, tempGabinete=obtenerTemp(address), hora=time.strftime("%H:%M:%S")) connection.execute(actual) print("Se actualizo la tabla ahora")
async def add_missing_logs(bot, sess): try: channels = Cs.auto_cleanup_targets.gets() except KeyError: return all_logs: LOGS = set() for channel in channels: logs: LOGS = set() try: latest = sess.execute( Select([func.min(EventLog.ts) ]).where(EventLog.channel == channel.id)).scalar() except NoResultFound: latest = None has_more = True cursor = None while has_more and len(logs) < 1600: try: resp = await bot.api.conversations.history( channel, cursor=cursor, latest=latest, ) except APICallError as e: await report(bot, exception=e) break history = resp.body if not history['ok']: break has_more = history['has_more'] if has_more: cursor = history['response_metadata']['next_cursor'] messages = {(m.get('reply_count', 0), m['ts']) for m in history['messages']} while messages: reply_count, ts = messages.pop() if reply_count: has_more_replies = True replies_cursor = None while has_more_replies: try: r = await bot.api.conversations.replies( channel, cursor=replies_cursor, ts=ts, ) except APICallError as e: await report(bot, exception=e) break replies = r.body if not replies['ok']: break has_more_replies = replies['has_more'] if has_more_replies: replies_cursor = replies['response_metadata'][ 'next_cursor'] messages |= {(m.get('reply_count', 0), m['ts']) for m in replies.get('messages', [])} logs.add((channel.id, ts)) all_logs |= logs if all_logs: with sess.begin(): sess.execute( Insert(EventLog).values([{ 'channel': c, 'ts': t } for c, t in all_logs]).on_conflict_do_nothing())
def calculate_dataframe( self, ctx: HuskyQueryContext, df: Dataframe, physical_data_sources: Set[str], grouping_sets: Optional[GroupingSets] = None, filter_clause: Optional[FilterClause] = None, ) -> Dataframe: """ Applies in this order: - pre aggregation logic - aggregation by group by or grouping sets - optional step of window function aggregation - after aggregation logic - filters. Filters are applied here to simplify the final query and apply filtering before filling date gaps. """ pre_agg_columns = [ ] # Columns with applied aggregation function in aggregation step # Columns to select from window step - columns that are not removed and dont need window step select_from_window_step: List[ColumnClause] = [] df_columns: List[DataframeColumn] = [ ] # Final df columns after all steps. group_columns = [] final_columns: List[ColumnClause] = [] for pre_formula in self.taxon_manager.plan.metric_pre: col = pre_formula.formula.label(pre_formula.label) aggregation_fn = self.AGGREGATION_FUNCTIONS_MAP.get( pre_formula.aggregation.type) if aggregation_fn: # we know the aggregation function so let's use it pre_agg_columns.append( aggregation_fn(col).label(pre_formula.label)) else: # if no aggregation function is defined, then we simply group by this formula group_columns.append(col) select_from_window_step.append(col) # taxon slugs used in group by clause dimension_taxon_slugs = { group_column.name for group_column in group_columns } for post_formula, taxon in self.taxon_manager.plan.metric_post: post_formula_sql = post_formula.render_formula( ctx.dialect, dimension_taxon_slugs) col = post_formula_sql.label(taxon.slug_safe_sql_identifier) final_columns.append(col) df_columns.append(DataframeColumn(taxon.slug_expr, taxon)) # Aggregation query with column logic. This is the first aggregation step, regular group by # or a common table expression with multiple group by statements in case of grouping sets. pre_query = self._add_aggregation(df.query, pre_agg_columns, group_columns, grouping_sets) # Post aggregation logic post_query = Select( columns=sort_columns(final_columns)).select_from(pre_query) slug_to_column = Dataframe.dataframe_columns_to_map(df_columns) if filter_clause: taxon_model_info = { str(slug): TaxonModelInfo(safe_quote_identifier(slug, ctx.dialect)) for slug in slug_to_column.keys() } post_query = FilterBuilder.augment_query(ctx, post_query, taxon_model_info, filter_clause) return Dataframe(post_query, slug_to_column, df.used_model_names, physical_data_sources)
def _add_aggregation( cls, inner_query: Select, aggregation_columns: List[ColumnClause], group_by_columns: List[ColumnClause], grouping_sets: Optional[GroupingSets] = None, ) -> Select: """ Aggregates raw metric taxons. Groups by given dimension taxons or grouping sets. :param inner_query: Query to aggregate :param aggregation_columns: List of columns with applied aggregation function :param group_by_columns: List of columns to group by :param grouping_sets: Optional list of grouping sets to group by instead :return: Aggregated query """ if grouping_sets: # Because we union _PANORAMIC_GROUPINGSETS_NULL with column that can be date(time) or number, # we must cast all group columns to text. Some DB engines fail when we do casting and grouping in one query, # thus here we need to stringify the group columns in the CTE, and not in the group by query just below... group_by_column_names = {col.name for col in group_by_columns} stringified_group_columns = [] for col in inner_query.columns: if col.name in group_by_column_names: stringified_group_columns.append( cast(col, sqlalchemy.VARCHAR).label(col.name)) else: stringified_group_columns.append(col) # common table expression reused by multiple grouping sets queries cte_query = (Select( columns=sort_columns(stringified_group_columns)).select_from( inner_query).cte('__cte_grouping_sets')) grouping_sets_queries = [] for grouping_set in grouping_sets: safe_grouping_set = [ safe_identifier(col) for col in grouping_set ] # dimensions in the grouping set, used to aggregate values with group by gs_group_columns = [ col for col in group_by_columns if col.name in safe_grouping_set ] # extra dimensions not in the grouping set, returned as custom null values gs_null_columns = [ literal_column(f"'{_PANORAMIC_GROUPINGSETS_NULL}'").label( col.name) for col in group_by_columns if col.name not in safe_grouping_set ] grouping_sets_queries.append( Select(columns=sort_columns( gs_group_columns + gs_null_columns + aggregation_columns)).select_from(cte_query).group_by( *sort_columns(gs_group_columns))) return union_all(*grouping_sets_queries) # If grouping sets are not defined, use all dimensions for grouping. return (Select(columns=sort_columns( group_by_columns + aggregation_columns)).select_from(inner_query).group_by( *sort_columns(group_by_columns)))
async def get_teams(self) -> List[int]: select = Select([self.table.c.team], distinct=True) teams = await self._connection.fetch_all(select) return [n for team in teams for n in team.values()]