Exemple #1
0
     # The environment variables to be initialized in the container
     # env_vars are templated.
     env_vars={'EXAMPLE_VAR': '/example/value'},
     # If true, logs stdout output of container. Defaults to True.
     get_logs=True,
     # Determines when to pull a fresh image, if 'IfNotPresent' will cause
     # the Kubelet to skip pulling an image if it already exists. If you
     # want to always pull a new image, set it to 'Always'.
     image_pull_policy='Always',
     # Annotations are non-identifying metadata you can attach to the Pod.
     # Can be a large range of data, and can include characters that are not
     # permitted by labels.
     annotations={'key1': 'value1'},
     # Resource specifications for Pod, this will allow you to set both cpu
     # and memory limits and requirements.
     resources=pod.Resources().__dict__,
     # Specifies path to kubernetes config. If no config is specified will
     # default to '~/.kube/config'. The config_file is templated.
     config_file='/home/airflow/composer_kube_config',
     # If true, the content of /airflow/xcom/return.json from container will
     # also be pushed to an XCom when the container ends.
     xcom_push=False,
     # List of Volume objects to pass to the Pod.
     volumes=[],
     # List of VolumeMount objects to pass to the Pod.
     volume_mounts=[],
     # Affinity determines which nodes the Pod can run on based on the
     # config. For more information see:
     # https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
     affinity={})
 # [END composer_kubernetespodoperator_fullconfig]
Exemple #2
0
    # The start_date describes when a DAG is valid / can be run. Set this to a
    # fixed point in time rather than dynamically, since it is evaluated every
    # time a DAG is parsed. See:
    # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date
}

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG('node_scale_down_3',
                catchup=False,
                schedule_interval='*/1 * * * *',
                default_args=default_dag_args) as dag:

    pod_res = pod.Resources(request_memory='10Mi',
                            request_cpu='10m',
                            limit_memory='15Mi',
                            limit_cpu='15m')
    # setattr(pod_res, 'request_memory', '1Mi')
    # setattr(pod_res, 'request_cpu', None)
    # setattr(pod_res, 'limit_cpu', None)
    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    scale_down = kubernetes_pod_operator.KubernetesPodOperator(
        # The ID specified for the task.
        task_id='node-scale_down',
        # Name of task you want to run, used to generate Pod ID.
        name='scale-down',
        # resources=pod_res,
        # Entrypoint of the container, if not specified the Docker container's
        # entrypoint is used. The cmds parameter is templated.
        cmds=["echo", "I am here to scale down"],
        # The environment variables to be initialized in the container
        # env_vars are templated.
        env_vars={'EXAMPLE_VAR': '/example/value'},
        # If true, logs stdout output of container. Defaults to True.
        get_logs=True,
        # Determines when to pull a fresh image, if 'IfNotPresent' will cause
        # the Kubelet to skip pulling an image if it already exists. If you
        # want to always pull a new image, set it to 'Always'.
        image_pull_policy='Always',
        # Annotations are non-identifying metadata you can attach to the Pod.
        # Can be a large range of data, and can include characters that are not
        # permitted by labels.
        annotations={'key1': 'value1'},
        # Resource specifications for Pod, this will allow you to set both cpu
        # and memory limits and requirements.
        resources=pod.Resources(),
        # Specifies path to kubernetes config. If no config is specified will
        # default to '~/.kube/config'. The config_file is templated.
        config_file='/home/airflow/composer_kube_config',
        # If true, the content of /airflow/xcom/return.json from container will
        # also be pushed to an XCom when the container ends.
        xcom_push=False,
        # List of Volume objects to pass to the Pod.
        volumes=[],
        # List of VolumeMount objects to pass to the Pod.
        volume_mounts=[],
        # Affinity determines which nodes the Pod can run on based on the
        # config. For more information see:
        # https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
        affinity={})
}

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG('fast_dag_3',
                catchup=False,
                schedule_interval='0 */1 * * *',
                default_args=default_dag_args) as dag:

    def greeting():
        import logging
        logging.info('Hello World!')

    pod_res = pod.Resources(request_memory='500Mi',
                            request_cpu='150m',
                            limit_memory='550Mi',
                            limit_cpu='200m')
    # setattr(pod_res, 'request_memory', '1Mi')
    # setattr(pod_res, 'request_cpu', None)
    # setattr(pod_res, 'limit_cpu', None)
    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    hello_python = python_operator.PythonOperator(task_id='hello',
                                                  python_callable=greeting)

    hello_python1 = python_operator.PythonOperator(task_id='hello_1',
                                                   python_callable=greeting)
    # Likewise, the goodbye_bash task calls a Bash script.
    goodbye_bash = bash_operator.BashOperator(task_id='bye',
                                              bash_command='echo Goodbye.')
    # time a DAG is parsed. See:
    # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date
}

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with DAG('Ulta_Casper_2',
         catchup=False,
         schedule_interval='0 */1 * * *',
         concurrency=50,
         max_active_runs=1,
         default_args=default_dag_args) as dag:

    pod_res = pod.Resources(request_memory='100Mi',
                            request_cpu='0.05',
                            limit_memory='120Mi',
                            limit_cpu='0.09')

    goodbye_bash = bash_operator.BashOperator(task_id='bye',
                                              bash_command='echo Goodbye.')

    kubernetes_min_crawl = [
        kubernetes_pod_operator.KubernetesPodOperator(
            # The ID specified for the task.
            task_id='ulta-crawl-2' + str(i),
            # Name of task you want to run, used to generate Pod ID.
            name='ulta-crawl-2' + str(i),
            # resources=pod_res,
            # Entrypoint of the container, if not specified the Docker container's
            # entrypoint is used. The cmds parameter is templated.
            cmds=["scrapy", "runspider", "/home/git/app/crawl.py"],
    # fixed point in time rather than dynamically, since it is evaluated every
    # time a DAG is parsed. See:
    # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date

}

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with DAG(
        'Ulta_Casper',
        catchup=False,
        schedule_interval='0 */1 * * *',
        default_args=default_dag_args) as dag:

    pod_res = pod.Resources(request_memory='100Mi',request_cpu='0.05',limit_memory='120Mi',limit_cpu='0.09')

    goodbye_bash = bash_operator.BashOperator(
        task_id='bye',
        bash_command='echo Goodbye.')

    kubernetes_min_crawl = kubernetes_pod_operator.KubernetesPodOperator(
        # The ID specified for the task.
        task_id='ulta-crawl',
        # Name of task you want to run, used to generate Pod ID.
        name='ulta-crawl',
        # resources=pod_res,
        # Entrypoint of the container, if not specified the Docker container's
        # entrypoint is used. The cmds parameter is templated.
        cmds=["scrapy", "runspider","/home/git/app/crawl.py"],
        resources=pod_res,