# The environment variables to be initialized in the container # env_vars are templated. env_vars={'EXAMPLE_VAR': '/example/value'}, # If true, logs stdout output of container. Defaults to True. get_logs=True, # Determines when to pull a fresh image, if 'IfNotPresent' will cause # the Kubelet to skip pulling an image if it already exists. If you # want to always pull a new image, set it to 'Always'. image_pull_policy='Always', # Annotations are non-identifying metadata you can attach to the Pod. # Can be a large range of data, and can include characters that are not # permitted by labels. annotations={'key1': 'value1'}, # Resource specifications for Pod, this will allow you to set both cpu # and memory limits and requirements. resources=pod.Resources().__dict__, # Specifies path to kubernetes config. If no config is specified will # default to '~/.kube/config'. The config_file is templated. config_file='/home/airflow/composer_kube_config', # If true, the content of /airflow/xcom/return.json from container will # also be pushed to an XCom when the container ends. xcom_push=False, # List of Volume objects to pass to the Pod. volumes=[], # List of VolumeMount objects to pass to the Pod. volume_mounts=[], # Affinity determines which nodes the Pod can run on based on the # config. For more information see: # https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ affinity={}) # [END composer_kubernetespodoperator_fullconfig]
# The start_date describes when a DAG is valid / can be run. Set this to a # fixed point in time rather than dynamically, since it is evaluated every # time a DAG is parsed. See: # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date } # Define a DAG (directed acyclic graph) of tasks. # Any task you create within the context manager is automatically added to the # DAG object. with models.DAG('node_scale_down_3', catchup=False, schedule_interval='*/1 * * * *', default_args=default_dag_args) as dag: pod_res = pod.Resources(request_memory='10Mi', request_cpu='10m', limit_memory='15Mi', limit_cpu='15m') # setattr(pod_res, 'request_memory', '1Mi') # setattr(pod_res, 'request_cpu', None) # setattr(pod_res, 'limit_cpu', None) # An instance of an operator is called a task. In this case, the # hello_python task calls the "greeting" Python function. scale_down = kubernetes_pod_operator.KubernetesPodOperator( # The ID specified for the task. task_id='node-scale_down', # Name of task you want to run, used to generate Pod ID. name='scale-down', # resources=pod_res, # Entrypoint of the container, if not specified the Docker container's # entrypoint is used. The cmds parameter is templated. cmds=["echo", "I am here to scale down"],
# The environment variables to be initialized in the container # env_vars are templated. env_vars={'EXAMPLE_VAR': '/example/value'}, # If true, logs stdout output of container. Defaults to True. get_logs=True, # Determines when to pull a fresh image, if 'IfNotPresent' will cause # the Kubelet to skip pulling an image if it already exists. If you # want to always pull a new image, set it to 'Always'. image_pull_policy='Always', # Annotations are non-identifying metadata you can attach to the Pod. # Can be a large range of data, and can include characters that are not # permitted by labels. annotations={'key1': 'value1'}, # Resource specifications for Pod, this will allow you to set both cpu # and memory limits and requirements. resources=pod.Resources(), # Specifies path to kubernetes config. If no config is specified will # default to '~/.kube/config'. The config_file is templated. config_file='/home/airflow/composer_kube_config', # If true, the content of /airflow/xcom/return.json from container will # also be pushed to an XCom when the container ends. xcom_push=False, # List of Volume objects to pass to the Pod. volumes=[], # List of VolumeMount objects to pass to the Pod. volume_mounts=[], # Affinity determines which nodes the Pod can run on based on the # config. For more information see: # https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ affinity={})
} # Define a DAG (directed acyclic graph) of tasks. # Any task you create within the context manager is automatically added to the # DAG object. with models.DAG('fast_dag_3', catchup=False, schedule_interval='0 */1 * * *', default_args=default_dag_args) as dag: def greeting(): import logging logging.info('Hello World!') pod_res = pod.Resources(request_memory='500Mi', request_cpu='150m', limit_memory='550Mi', limit_cpu='200m') # setattr(pod_res, 'request_memory', '1Mi') # setattr(pod_res, 'request_cpu', None) # setattr(pod_res, 'limit_cpu', None) # An instance of an operator is called a task. In this case, the # hello_python task calls the "greeting" Python function. hello_python = python_operator.PythonOperator(task_id='hello', python_callable=greeting) hello_python1 = python_operator.PythonOperator(task_id='hello_1', python_callable=greeting) # Likewise, the goodbye_bash task calls a Bash script. goodbye_bash = bash_operator.BashOperator(task_id='bye', bash_command='echo Goodbye.')
# time a DAG is parsed. See: # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date } # Define a DAG (directed acyclic graph) of tasks. # Any task you create within the context manager is automatically added to the # DAG object. with DAG('Ulta_Casper_2', catchup=False, schedule_interval='0 */1 * * *', concurrency=50, max_active_runs=1, default_args=default_dag_args) as dag: pod_res = pod.Resources(request_memory='100Mi', request_cpu='0.05', limit_memory='120Mi', limit_cpu='0.09') goodbye_bash = bash_operator.BashOperator(task_id='bye', bash_command='echo Goodbye.') kubernetes_min_crawl = [ kubernetes_pod_operator.KubernetesPodOperator( # The ID specified for the task. task_id='ulta-crawl-2' + str(i), # Name of task you want to run, used to generate Pod ID. name='ulta-crawl-2' + str(i), # resources=pod_res, # Entrypoint of the container, if not specified the Docker container's # entrypoint is used. The cmds parameter is templated. cmds=["scrapy", "runspider", "/home/git/app/crawl.py"],
# fixed point in time rather than dynamically, since it is evaluated every # time a DAG is parsed. See: # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date } # Define a DAG (directed acyclic graph) of tasks. # Any task you create within the context manager is automatically added to the # DAG object. with DAG( 'Ulta_Casper', catchup=False, schedule_interval='0 */1 * * *', default_args=default_dag_args) as dag: pod_res = pod.Resources(request_memory='100Mi',request_cpu='0.05',limit_memory='120Mi',limit_cpu='0.09') goodbye_bash = bash_operator.BashOperator( task_id='bye', bash_command='echo Goodbye.') kubernetes_min_crawl = kubernetes_pod_operator.KubernetesPodOperator( # The ID specified for the task. task_id='ulta-crawl', # Name of task you want to run, used to generate Pod ID. name='ulta-crawl', # resources=pod_res, # Entrypoint of the container, if not specified the Docker container's # entrypoint is used. The cmds parameter is templated. cmds=["scrapy", "runspider","/home/git/app/crawl.py"], resources=pod_res,