Ejemplo n.º 1
0
def hello_cereal():
    response = requests.get("https://docs.dagster.io/assets/cereal.csv")
    lines = response.text.split("\n")
    cereals = [row for row in csv.DictReader(lines)]
    get_dagster_logger().info(f"Found {len(cereals)} cereals")

    return cereals
def hello_cereal() -> List[dict]:
    """Example of a Dagster op that retrieves data from HTTP source."""
    response = requests.get("https://docs.dagster.io/assets/cereal.csv")
    lines = response.text.split("\n")
    cereals = list(csv.DictReader(lines))
    get_dagster_logger().info(f"Found {len(cereals)} cereals")
    return cereals
Ejemplo n.º 3
0
def hello_cereal(context):
    response = requests.get("https://docs.dagster.io/assets/cereal.csv")
    lines = response.text.split("\n")
    cereals = [row for row in csv.DictReader(lines)]
    date = context.op_config["date"]
    get_dagster_logger().info(
        f"Today is {date}. Found {len(cereals)} cereals.")
def find_highest_protein_cereal(cereals: List[dict]) -> str:
    """Example of a Dagster op that takes input and produces output."""
    sorted_by_protein = list(
        sorted(cereals, key=lambda cereal: cereal["protein"]))
    get_dagster_logger().info(
        f'{sorted_by_protein[-1]["name"]} is the cereal that contains the most protein'
    )
    return sorted_by_protein[-1]["name"]
Ejemplo n.º 5
0
def define_multilevel_logging_pipeline(inside, python):

    if not inside:
        outside_logger = logging.getLogger("my_logger_outside") if python else get_dagster_logger()

    @solid
    def my_solid1():
        if inside:
            logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger()
        else:
            logger = outside_logger
        for level in [
            logging.DEBUG,
            logging.INFO,
        ]:
            logger.log(level, "foobar%s", "baz")

    @solid
    def my_solid2(_in):
        if inside:
            logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger()
        else:
            logger = outside_logger
        for level in [
            logging.WARNING,
            logging.ERROR,
            logging.CRITICAL,
        ]:
            logger.log(level=level, msg="foobarbaz")

    @pipeline(mode_defs=[default_mode_def_for_test])
    def my_pipeline():
        my_solid2(my_solid1())

    return my_pipeline
Ejemplo n.º 6
0
def sort_by_calories(context, cereals):
    sorted_cereals = sorted(cereals,
                            key=lambda cereal: int(cereal["calories"]))
    least_caloric = sorted_cereals[0]["name"]
    most_caloric = sorted_cereals[-1]["name"]

    logger = get_dagster_logger()
    logger.info(f"Least caloric cereal: {least_caloric}")
    logger.info(f"Most caloric cereal: {most_caloric}")

    fieldnames = list(sorted_cereals[0].keys())
    sorted_cereals_csv_path = os.path.abspath(
        f"output/calories_sorted_{context.run_id}.csv")
    os.makedirs(os.path.dirname(sorted_cereals_csv_path), exist_ok=True)

    with open(sorted_cereals_csv_path, "w") as fd:
        writer = csv.DictWriter(fd, fieldnames)
        writer.writeheader()
        writer.writerows(sorted_cereals)

    yield AssetMaterialization(
        asset_key="sorted_cereals_csv",
        description="Cereals data frame sorted by caloric content",
        metadata={
            "sorted_cereals_csv_path":
            EventMetadata.path(sorted_cereals_csv_path)
        },
    )
    yield Output(None)
Ejemplo n.º 7
0
def sort_by_calories(cereals):
    sorted_cereals = sorted(cereals, key=lambda cereal: cereal["calories"])
    logger = get_dagster_logger()
    logger.info("Least caloric cereal: {least_caloric}".format(
        least_caloric=sorted_cereals[0]["name"]))
    logger.info("Most caloric cereal: {most_caloric}".format(
        most_caloric=sorted_cereals[-1]["name"]))
Ejemplo n.º 8
0
    def ambitious_op():
        my_logger = get_dagster_logger()
        try:
            x = 1 / 0
            return x
        except ZeroDivisionError:
            my_logger.error("Couldn't divide by zero!")

        return None
Ejemplo n.º 9
0
 def my_solid1():
     if inside:
         logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger()
     else:
         logger = outside_logger
     for level in [
         logging.DEBUG,
         logging.INFO,
     ]:
         logger.log(level, "foobar%s", "baz")
Ejemplo n.º 10
0
    def __init__(
        self,
        logger: Optional[logging.Logger] = None,
    ):
        """Constructor

        Args:
            logger (Optional[Any]): A property for injecting a logger dependency.
                Default is ``None``.
        """
        self._logger = logger or get_dagster_logger()
Ejemplo n.º 11
0
 def my_solid2(_in):
     if inside:
         logger = logging.getLogger("my_logger_inside") if python else get_dagster_logger()
     else:
         logger = outside_logger
     for level in [
         logging.WARNING,
         logging.ERROR,
         logging.CRITICAL,
     ]:
         logger.log(level=level, msg="foobarbaz")
Ejemplo n.º 12
0
    def __init__(
        self,
        api_key: str,
        api_secret: str,
        disable_schedule_on_trigger: bool = True,
        request_max_retries: int = 3,
        request_retry_delay: float = 0.25,
        log: logging.Logger = get_dagster_logger(),
    ):
        self._auth = HTTPBasicAuth(api_key, api_secret)
        self._disable_schedule_on_trigger = disable_schedule_on_trigger

        self._request_max_retries = request_max_retries
        self._request_retry_delay = request_retry_delay

        self._log = log
Ejemplo n.º 13
0
    def __init__(
            self,
            host: str,
            port: str,
            use_https: bool,
            request_max_retries: int = 3,
            request_retry_delay: float = 0.25,
            log: logging.Logger = get_dagster_logger(),
    ):
        self._host = host
        self._port = port
        self._use_https = use_https
        self._request_max_retries = request_max_retries
        self._request_retry_delay = request_retry_delay

        self._log = log
Ejemplo n.º 14
0
    def __init__(
        self,
        auth_token: str,
        account_id: int,
        disable_schedule_on_trigger: bool = True,
        request_max_retries: int = 3,
        request_retry_delay: float = 0.25,
        dbt_cloud_host: str = DBT_DEFAULT_HOST,
        log: logging.Logger = get_dagster_logger(),
        log_requests: bool = False,
    ):
        self._auth_token = auth_token
        self._account_id = account_id
        self._disable_schedule_on_trigger = disable_schedule_on_trigger

        self._request_max_retries = request_max_retries
        self._request_retry_delay = request_retry_delay

        self._dbt_cloud_host = dbt_cloud_host
        self._log = log
        self._log_requests = log_requests
Ejemplo n.º 15
0
def _load_manifest_for_project(
    project_dir: str, profiles_dir: str, target_dir: str, select: str
) -> Tuple[Mapping[str, Any], DbtCliOutput]:
    # running "dbt ls" regenerates the manifest.json, which includes a superset of the actual
    # "dbt ls" output
    cli_output = execute_cli(
        executable="dbt",
        command="ls",
        log=get_dagster_logger(),
        flags_dict={
            "project-dir": project_dir,
            "profiles-dir": profiles_dir,
            "select": select,
            "resource-type": "model",
            "output": "json",
        },
        warn_error=False,
        ignore_handled_error=False,
        target_path=target_dir,
    )
    manifest_path = os.path.join(target_dir, "manifest.json")
    with open(manifest_path, "r") as f:
        return json.load(f), cli_output
Ejemplo n.º 16
0
def find_sugariest(cereals):
    sorted_by_sugar = sorted(cereals, key=lambda cereal: cereal["sugars"])
    get_dagster_logger().info(
        f'{sorted_by_sugar[-1]["name"]} is the sugariest cereal')
Ejemplo n.º 17
0
def bad_download_csv():
    response = requests.get("https://docs.dagster.io/assets/cereal.csv")
    lines = response.text.split("\n")
    get_dagster_logger().info(f"Read {len(lines)} lines")
    return ["not_a_dict"]
Ejemplo n.º 18
0
def display_results(most_calories, most_protein):
    logger = get_dagster_logger()
    logger.info(f"Most caloric cereal: {most_calories}")
    logger.info(f"Most protein-rich cereal: {most_protein}")
Ejemplo n.º 19
0
def download_csv():
    response = requests.get("https://docs.dagster.io/assets/cereal.csv")
    lines = response.text.split("\n")
    get_dagster_logger().info(f"Read {len(lines)} lines")
    return [row for row in csv.DictReader(lines)]
def display_results(most_calories: str, most_protein: str) -> Nothing:
    """Example of a Dagster op that takes inputs but does not produce output."""
    logger = get_dagster_logger()
    logger.info(f"Most caloric cereal: {most_calories}")
    logger.info(f"Most protein-rich cereal: {most_protein}")
Ejemplo n.º 21
0
def sort_by_calories(cereals):
    sorted_cereals = sorted(cereals, key=lambda cereal: int(cereal["calories"]))

    get_dagster_logger().info(f'Most caloric cereal: {sorted_cereals[-1]["name"]}')