from dagster_pandas import PandasColumn, create_dagster_pandas_dataframe_type from pandas import DataFrame, read_csv from dagster import OutputDefinition, pipeline, solid from dagster.utils import script_relative_path TripDataFrame = create_dagster_pandas_dataframe_type( name="TripDataFrame", columns=[ PandasColumn.integer_column("bike_id", min_value=0), PandasColumn.categorical_column("color", categories={"red", "green", "blue"}), PandasColumn.datetime_column( "start_time", min_datetime=datetime(year=2020, month=2, day=10) ), PandasColumn.datetime_column("end_time", min_datetime=datetime(year=2020, month=2, day=10)), PandasColumn.string_column("station"), PandasColumn.exists("amount_paid"), PandasColumn.boolean_column("was_member"), ], ) @solid(output_defs=[OutputDefinition(name="trip_dataframe", dagster_type=TripDataFrame)]) def load_trip_dataframe(_) -> DataFrame: return read_csv( script_relative_path("./ebike_trips.csv"), parse_dates=["start_time", "end_time"], date_parser=lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"), )
from dagster import solid, SolidExecutionContext, Field, Array, String from dagster_pandas import PandasColumn, create_dagster_pandas_dataframe_type from pandas import DataFrame from typing import Any, Optional, List, TYPE_CHECKING from azmeta.access.resource_graph import query_dataframe if TYPE_CHECKING: ResourcesDataFrame = Any # DataFrame # Pandas has no type info yet. else: ResourcesDataFrame = create_dagster_pandas_dataframe_type( name='ResourcesDataFrame', columns=[ PandasColumn.string_column('resource_id'), PandasColumn.string_column('subscription_id'), ], ) @solid( config_schema={ 'subscriptions': Field(Array(String), description='The subscriptions to query in the Resource Graph.'), 'filters': Field(String, is_required=False, description='Conditions for a KQL where operator.'), 'custom_projections': Field(String, is_required=False, description='Assignments for a KQL project operator.'),
TripDataFrame = create_dagster_pandas_dataframe_type( name='TripDataFrame', columns=[ PandasColumn.integer_column('bike_id', min_value=0), PandasColumn.categorical_column('color', categories={'red', 'green', 'blue'}), PandasColumn.datetime_column('start_time', min_datetime=datetime(year=2020, month=2, day=10)), PandasColumn.datetime_column('end_time', min_datetime=datetime(year=2020, month=2, day=10)), PandasColumn.string_column('station'), PandasColumn.exists('amount_paid'), PandasColumn.boolean_column('was_member'), ], ) @solid(output_defs=[ OutputDefinition(name='trip_dataframe', dagster_type=TripDataFrame) ]) def load_trip_dataframe(_) -> DataFrame: return read_csv( script_relative_path('./ebike_trips.csv'), parse_dates=['start_time', 'end_time'], date_parser=lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'), )
import functools from azmeta.access.monitor_logs import (PerformanceCounterSpec, query_dataframe_by_workspace_chunk, build_perf_counter_percentile_query, build_disk_percentile_query) from azmeta.access.utils.chunking import build_grouped_chunk_list from .resources import ResourcesDataFrame from .specifications import AzureComputeSpecifications if TYPE_CHECKING: UtilizationDataFrame = Any # DataFrame # Pandas has no type info yet. else: UtilizationDataFrame = create_dagster_pandas_dataframe_type( name='UtilizationDataFrame', columns=[ PandasColumn.string_column('resource_id'), PandasColumn.float_column('percentile_50th'), PandasColumn.float_column('percentile_80th'), PandasColumn.float_column('percentile_90th'), PandasColumn.float_column('percentile_95th'), PandasColumn.float_column('percentile_99th'), PandasColumn.float_column('max'), PandasColumn.integer_column('samples'), ], ) @solid(required_resource_keys={'azure_monitor'}) def query_cpu_utilization( context: SolidExecutionContext, resources: ResourcesDataFrame) -> UtilizationDataFrame:
EventMetadataEntry.text( max(dataframe["day"]), "max_day", "Maximum date of exchange rates", ), EventMetadataEntry.text( str(dataframe["day"].nunique()), "num_unique_day", "Total unique dates of exchange rates", ), EventMetadataEntry.text( str(dataframe["currency"].nunique()), "num_unique_currency", "Total unique currencies of exchange rates", ), EventMetadataEntry.text(str(len(dataframe)), "n_rows", "Number of rows seen in the dataframe"), ] ExchangeRateDataFrame = create_dagster_pandas_dataframe_type( name="ExchangeRateDataFrame", columns=[ PandasColumn.string_column("id"), PandasColumn.string_column("day"), PandasColumn.string_column("currency"), PandasColumn.numeric_column("rate"), ], event_metadata_fn=compute_exchange_rate_dataframe_summary_statistics, )