def test_volume_mount(): with mock.patch.object(PodLauncher, 'log') as mock_logger: volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config = { 'persistentVolumeClaim': { 'claimName': 'test-volume' } } volume = Volume(name='test-volume', configs=volume_config) k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["cat /root/mount_file/test.txt"], labels={"foo": "bar"}, volume_mounts=[volume_mount], volumes=[volume], name="test", task_id="task") k.execute(None) mock_logger.info.assert_any_call(b"retrieved from mount\n")
default_args = { "catchup": False, "depends_on_past": False, "owner": "airflow", "on_failure_callback": mm_failed_task, "retries": 0, "retry_delay": timedelta(minutes=1), "sla": timedelta(hours=8), "start_date": datetime(2019, 1, 1, 0, 0, 0), } # Create the DAG dag = DAG("blapi", default_args=default_args, schedule_interval="0 * * * *") volume_config = {"persistentVolumeClaim": {"claimName": "pipelinewise-pv"}} volume = Volume(name="pipelinewise-volume", configs=volume_config) volume_mount = VolumeMount( "pipelinewise-volume", mount_path="/app/.pipelinewise", sub_path=None, read_only=False, ) if "cmds" in pod_defaults: del pod_defaults["cmds"] blapi = KubernetesPodOperator( **pod_defaults, image=PIPELINEWISE_IMAGE, task_id="blapi-import", name="blapi-import",
"/" + get_user_ids_r_script_path retrieve_user_session_activity_r_script_path = "Services/ELT/DA/getUserIDs.R" retrieve_user_session_activity_r_script_whole_path = script_root_path + \ "/" + retrieve_user_session_activity_r_script_path volume_mount = VolumeMount('git-root-path', mount_path=script_root_path, sub_path=None, read_only=False) volume_config = { "hostPath": { "path": "/home/DA_git_master/DataAnalysis.git", "type": "Directory" } } volume = Volume(name='git-root-path', configs=volume_config) start_date = "{{ ds }}" end_date = "{{ macros.ds_add(next_ds, -1) }}" get_user_ids_task = KubernetesPodOperator( namespace='default', image="bowenkuo/dump-ga-to-bq:1.0.1", cmds=["Rscript"], arguments=[ "--vanilla", get_user_ids_r_script_whole_path, start_date, end_date ], secrets=[service_account_secret_file, client_secret_secret_file], name="main", task_id="get_user_ids", volumes=[volume],
import datetime from airflow import models from airflow.contrib.operators import kubernetes_pod_operator from airflow.operators import dummy_operator from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1) volume_mount = VolumeMount('test-volume', mount_path='/root/mount_file', sub_path=None, read_only=True) volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}} volume = Volume(name='test-volume', configs=volume_config) with models.DAG(dag_id='airflow-demo', schedule_interval=datetime.timedelta(days=1), start_date=YESTERDAY) as dag: task1 = kubernetes_pod_operator.KubernetesPodOperator( task_id='t1', name='task1', namespace='airflow', image='eu.gcr.io/taiyo-239217/dag:fae4887', arguments=["AlphaVantage()"], volume=[], volume_mounts=[], in_cluster=True, xcom_push=True, is_delete_operator_pod=True)
########################### ###### Set up volume ###### ########################### volume_mount = VolumeMount('results-volume', mount_path='/outputs', sub_path=None, read_only=False) volume_config= { 'persistentVolumeClaim': { 'claimName': 'results-claim' } } volume = Volume(name='results-volume', configs=volume_config) ############################ ####### Generate DAG ####### ############################ default_args = { 'owner': 'Brandon', 'depends_on_past': False, 'start_date': days_ago(0), 'catchup': False, 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0,
# equivalent to 1 AWS vCPU, 1 GCP Core, 1 Azure vCore or 1 Hyperthread on # a bare-metal intel processor with Hyperthreading # If CPU request exceeds all of your node's capacities it will fail to ever # get scheduled. request_cpu='2', # If memory limit is exceeded the Pod goes up for termination, if no # limit is specified there is no upper bound on the amount of memory it can # use. You can also specify a default memory limit on a per-namespace basis limit_memory='100Mi', # If cpu request exceeds your node's capacity, it will fail to ever get # scheduled. The m suffix stands for milli-cpus, therefore .5 cpu and 500m # cpu are equivalent. limit_cpu='500m') # Creates a volume of type emptyDir without any configs volumes = [Volume(name='empty-vol', configs={'emptyDir', {}})] # Used to mount pod level volumes to a running container volume_mounts = [ VolumeMount(name='test-vol-mount', mount_path='/root/mount_file', sub_path='None', read_only=False) ] # Any task you create within the context manager is automatically added to the # DAG object. with models.DAG(dag_id='kubernetes-example', default_args=default_dag_args) as dag: # Only name, namespace, and image are required to create a # KubernetesPodOperator. This operator defaults to using the config file found
start = DummyOperator(task_id='run_this_first', dag=dag) volume_mount = VolumeMount('airflow-dags', mount_path='/dags', sub_path='dags', read_only=True) volume_config = { 'persistentVolumeClaim': { 'claimName': 'airflow-dags' } } volume = Volume(name='airflow-dags', configs=volume_config) file_path = "/root/kubeconfig/kubeconfig" passing = KubernetesPodOperator(namespace='airflow', image="python:3.6", cmds=["python", "/dags/test-python.py"], labels={"foo": "bar"}, name="passing-test", task_id="passing-task", volume_mounts=[volume_mount], volumes=[volume], get_logs=True, in_cluster=True, dag=dag )
import os from airflow import DAG from airflow.utils.dates import days_ago from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount ## # Persistent Volume Configuration ## ## Reference Volume input_ref_config = {'persistentVolumeClaim': {'claimName': 'pvc-references'}} input_ref_volume = Volume(name='reference-mount', configs=input_ref_config) input_ref_mount = VolumeMount(name='reference-mount', mount_path='/rnaseq/ref', sub_path='ref', read_only=True) # Input Data Volume input_data_config = {'persistentVolumeClaim': {'claimName': 'pvc-input'}} input_data_volume = Volume(name='input-mount', configs=input_data_config) input_data_mount = VolumeMount(name='input-mount', mount_path='/rnaseq/data', sub_path=None, read_only=True) ### Output Volume
from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount from airflow.operators.bash_operator import BashOperator from airflow.operators.dummy_operator import DummyOperator ## # Persistent Volume Configuration ## ## Reference Volume input_ref_mount = VolumeMount(name='reference-mount', mount_path='/mnt/references', sub_path=None, read_only=True) input_ref_volume = Volume(name='reference-mount', configs={'persistentVolumeClaim':{'claimName': 'pvc-references'}}) # Input Data Volume input_data_mount = VolumeMount(name='input-mount', mount_path='/mnt/data', sub_path=None, read_only=True) input_data_volume = Volume(name='input-mount', configs={'persistentVolumeClaim':{'claimName': 'pvc-input'}}) # Temp Data Volume temp_data_mount = VolumeMount(name='temp-mount', mount_path='/mnt/temp', sub_path=None, read_only=False) temp_data_volume = Volume(name='temp-mount', configs={'persistentVolumeClaim':{'claimName': 'pvc-airflow1datatemp'}})
from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount with DAG( dag_id="chapter11_movielens_kubernetes", description="Fetches ratings from the Movielens API using kubernetes.", start_date=airflow_utils.dates.days_ago(3), schedule_interval="@daily", ) as dag: volume_mount = VolumeMount( "data-volume", mount_path="/data", sub_path=None, read_only=False ) volume_config = {"persistentVolumeClaim": {"claimName": "data-volume"}} volume = Volume(name="data-volume", configs=volume_config) fetch_ratings = KubernetesPodOperator( task_id="fetch_ratings", image="airflowbook/movielens-fetch", cmds=["fetch_ratings.py"], arguments=[ "--start_date", "{{ds}}", "--end_date", "{{next_ds}}", "--output_path", "/data/ratings/{{ds}}.json", "--user", os.environ["MOVIELENS_USER"], "--password",
} ] # Input File Volume volume_in = VolumeMount('inpath', mount_path='/opt/talend/input_files/', sub_path=None, read_only=False) volume_config_in= { 'hostPath': { 'path': '/home/osboxes/talend_kub_airflow/talend/input_files/' } } volume1 = Volume(name='inpath', configs=volume_config_in) # Output File Volume volume_out = VolumeMount('outpath', mount_path='/opt/talend/out_files/', sub_path=None, read_only=False) volume_config_out= { 'hostPath': { 'path': '/home/osboxes/talend_kub_airflow/talend/output_file/' } } volume2 = Volume(name='outpath', configs=volume_config_out)
'start_date': datetime(2017, 11, 25), 'depends_on_past': False, 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5), 'provide_context': True } dags_volume_mount = VolumeMount('airflow-dags', mount_path='/root/airflow/dags', sub_path=None, read_only=True) dags_volume_config = {'persistentVolumeClaim': {'claimName': 'airflow-dags'}} dags_volume = Volume(name='airflow-dags', configs=dags_volume_config) logs_volume_mount = VolumeMount('airflow-logs', mount_path='/root/airflow/logs', sub_path=None, read_only=True) logs_volume_config = {'persistentVolumeClaim': {'claimName': 'airflow-logs'}} logs_volume = Volume(name='airflow-logs', configs=logs_volume_config) dag = DAG(dag_id='{}'.format(PARENT_DAG_NAME), catchup=True, default_args=default_args, schedule_interval=None) with dag:
} volume_mount_code = VolumeMount('my-volume', mount_path='/home/git/', sub_path=None, read_only=True) volume_mount_key = VolumeMount('google-cloud-key', mount_path='/var/secrets/google', sub_path=None, read_only=False) volume_config_key = {'secret': {'secretName': 'casper-worker-key'}} volume_config = {'persistentVolumeClaim': {'claimName': 'my-vol'}} volume_code = Volume(name='my-volume', configs=volume_config) volume_key = Volume(name='google-cloud-key', configs=volume_config_key) default_dag_args = { 'start_date': datetime(2018, 1, 1), 'depends_on_past': False, 'owner': 'Airflow', 'retries': 2, 'retry_delay': timedelta(minutes=1), # The start_date describes when a DAG is valid / can be run. Set this to a # fixed point in time rather than dynamically, since it is evaluated every # time a DAG is parsed. See: # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date } # Define a DAG (directed acyclic graph) of tasks.
from airflow import DAG from airflow.operators.python_operator import PythonOperator from airflow.operators import WeatherFileSensor from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.volume_mount import VolumeMount from airflow.contrib.kubernetes.volume import Volume from kubernetes import client, config volume_mount = VolumeMount('test-dir', mount_path='/data', sub_path=None, read_only=False) volume_config = {'persistentVolumeClaim': {'claimName': 'airflow'}} volume = Volume(name='test-dir', configs=volume_config) NAM_BASE_DIR = '/data/weatherdata/nam' EXECUTE_DIR = '/tmp' POD_PREFIX = 'run-ungrib' def clean_completed_pods(**context): config.load_kube_config() v1 = client.CoreV1Api() body = client.V1DeleteOptions() ret = v1.list_namespaced_pod('airflow') for pod in ret.items: if pod.metadata.name.startswith('run-ungrib'): v1.delete_namespaced_pod(pod.metadata.name, 'airflow', body)
'start_date': datetime(2018, 7, 26), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'provide_context': True, 'retry_delay': timedelta(minutes=5) } volume_mount = VolumeMount('cadc-volume', mount_path='/data/cadc', sub_path=None, read_only=False) volume_config = {'persistentVolumeClaim': {'claimName': 'cadc-volume'}} volume = Volume(name='cadc-volume', configs=volume_config) http_conn_id = 'test_netrc' redis_conn_id = 'redis_default' output_format = 'csv' collection = 'VLASS' dag_id = 'vlass_processing' datetime_format = '%Y-%m-%d %H:%M:%S' # TODO - when deploying to have this actually run, catchup=True!!!!! # and schedule_interval=timedelta(hours=1) vlass_dag = DAG(dag_id, default_args=default_args, catchup=True, schedule_interval=timedelta(hours=1))
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount from airflow.operators.dummy_operator import DummyOperator ## # Persistent Volume Configuration ## """ Configuration for PVC claim Arguments: claimName (string): Name of the PVC claim in kubernetes """ volume_config = {'persistentVolumeClaim': {'claimName': 'airflow1data'}} volume = Volume(name='airflow1data', configs=volume_config) """ Configuration for Volume Mounting location from PVC Arguments: name (string): Name of the PVC volume request mount_path (string): Mount directory in the pod sub_path (string): Sub path based on the mount directory read_only (boolean): If the mount is read only or not """ volume_mount = VolumeMount('airflow1data', mount_path='/mnt/azure', sub_path=None, read_only=False) args = {'owner': 'airflow', 'start_date': days_ago(2)}
# Persistent Volume Configuration ## """ Configuration for PVC claim Arguments: claimName (string): Name of the PVC claim in kubernetes """ volume_config= { 'persistentVolumeClaim': { 'claimName': 'pvc-blobfuse-flexvol' } } volume = Volume(name='flexvol-mount', configs=volume_config) """ Configuration for Volume Mounting location from PVC Arguments: name (string): Name of the PVC volume request mount_path (string): Mount directory in the pod sub_path (string): Sub path based on the mount directory read_only (boolean): If the mount is read only or not """ volume_mount = VolumeMount('flexvol-mount', mount_path='/mnt/azure', sub_path=None, read_only=True) args = {
def convert_volumes(pod_spec: V1PodSpec) -> List[Volume]: volumes: List[V1Volume] = pod_spec.volumes return [ Volume(name=volume.name, configs=to_swagger_dict(volume)) for volume in volumes or [] ]
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.contrib.kubernetes.volume import Volume from airflow.contrib.kubernetes.volume_mount import VolumeMount ## # Persistent Volume Configuration ## ## Input Volume volume_config = { 'persistentVolumeClaim': { 'claimName': 'pvc-competitions-airflow2' } } volume = Volume(name='input-mount', configs=volume_config) volume_mount = VolumeMount('input-mount', mount_path='/mnt/azure', sub_path=None, read_only=True) ## Output Volume volume2_config = { 'persistentVolumeClaim': { 'claimName': 'pvc-competitions-airflow3' } } volume2 = Volume(name='output-mount', configs=volume2_config) volume2_mount = VolumeMount('output-mount', mount_path='/mnt/azure2',
'depends_on_past': False, 'start_date': '2020-03-20 09:00:00', 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, 'retry_delay': timedelta(minutes=5), } dag = DAG('random_number_generator', default_args=default_args, schedule_interval='*/5 * * * *', catchup=False) volume_config = {'persistentVolumeClaim': {'claimName': 'file-store'}} volume = Volume(name='file-store', configs=volume_config) volume_mount = VolumeMount('file-store', mount_path='/mnt/file-store', sub_path=None, read_only=False) create_file = KubernetesPodOperator( namespace='airflow', image="debian:9.4", cmds=["bash", "-c"], arguments=[ 'echo $(($RANDOM%10000+10000)) >> /mnt/file-store/random_number-$(date "+%Y.%m.%d-%H.%M.%S").txt' ], name="generate-random-number", task_id="generate-random-number", volumes=[volume],
mount_path='/input-dataset', sub_path=None, read_only=False) output_volume_mount = VolumeMount('data-volume', mount_path='/output-dataset', sub_path=None, read_only=False) volume_config = { 'persistentVolumeClaim': { 'claimName': 'airflow-dags' # uses the persistentVolumeClaim given in the Kube yaml } } in_volume = Volume(name='input-dataset', configs=volume_config) out_volume = Volume(name='output-dataset', configs=volume_config) step1 = KubernetesPodOperator(namespace='airflow', image="cestum/airflow-demo:prep-input", cmds=[], arguments=["K8S-Airflow"], labels={"foo": "bar"}, name="prep-input", volumes=[in_volume], volume_mounts=[input_volume_mount], task_id="prep-input", get_logs=True, dag=dag, in_cluster=True)