Beispiel #1
0
    def test_volume_mount():
        with mock.patch.object(PodLauncher, 'log') as mock_logger:
            volume_mount = VolumeMount('test-volume',
                                       mount_path='/root/mount_file',
                                       sub_path=None,
                                       read_only=True)

            volume_config = {
                'persistentVolumeClaim': {
                    'claimName': 'test-volume'
                }
            }
            volume = Volume(name='test-volume', configs=volume_config)
            k = KubernetesPodOperator(
                namespace='default',
                image="ubuntu:16.04",
                cmds=["bash", "-cx"],
                arguments=["cat /root/mount_file/test.txt"],
                labels={"foo": "bar"},
                volume_mounts=[volume_mount],
                volumes=[volume],
                name="test",
                task_id="task")
            k.execute(None)
            mock_logger.info.assert_any_call(b"retrieved from mount\n")
default_args = {
    "catchup": False,
    "depends_on_past": False,
    "owner": "airflow",
    "on_failure_callback": mm_failed_task,
    "retries": 0,
    "retry_delay": timedelta(minutes=1),
    "sla": timedelta(hours=8),
    "start_date": datetime(2019, 1, 1, 0, 0, 0),
}

# Create the DAG
dag = DAG("blapi", default_args=default_args, schedule_interval="0 * * * *")

volume_config = {"persistentVolumeClaim": {"claimName": "pipelinewise-pv"}}
volume = Volume(name="pipelinewise-volume", configs=volume_config)
volume_mount = VolumeMount(
    "pipelinewise-volume",
    mount_path="/app/.pipelinewise",
    sub_path=None,
    read_only=False,
)

if "cmds" in pod_defaults:
    del pod_defaults["cmds"]

blapi = KubernetesPodOperator(
    **pod_defaults,
    image=PIPELINEWISE_IMAGE,
    task_id="blapi-import",
    name="blapi-import",
Beispiel #3
0
    "/" + get_user_ids_r_script_path
retrieve_user_session_activity_r_script_path = "Services/ELT/DA/getUserIDs.R"
retrieve_user_session_activity_r_script_whole_path = script_root_path + \
    "/" + retrieve_user_session_activity_r_script_path

volume_mount = VolumeMount('git-root-path',
                           mount_path=script_root_path,
                           sub_path=None,
                           read_only=False)
volume_config = {
    "hostPath": {
        "path": "/home/DA_git_master/DataAnalysis.git",
        "type": "Directory"
    }
}
volume = Volume(name='git-root-path', configs=volume_config)

start_date = "{{ ds }}"
end_date = "{{ macros.ds_add(next_ds, -1) }}"

get_user_ids_task = KubernetesPodOperator(
    namespace='default',
    image="bowenkuo/dump-ga-to-bq:1.0.1",
    cmds=["Rscript"],
    arguments=[
        "--vanilla", get_user_ids_r_script_whole_path, start_date, end_date
    ],
    secrets=[service_account_secret_file, client_secret_secret_file],
    name="main",
    task_id="get_user_ids",
    volumes=[volume],
Beispiel #4
0
import datetime
from airflow import models
from airflow.contrib.operators import kubernetes_pod_operator
from airflow.operators import dummy_operator
from airflow.contrib.kubernetes.volume import Volume
from airflow.contrib.kubernetes.volume_mount import VolumeMount

YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)

volume_mount = VolumeMount('test-volume',
                           mount_path='/root/mount_file',
                           sub_path=None,
                           read_only=True)
volume_config = {'persistentVolumeClaim': {'claimName': 'test-volume'}}
volume = Volume(name='test-volume', configs=volume_config)

with models.DAG(dag_id='airflow-demo',
                schedule_interval=datetime.timedelta(days=1),
                start_date=YESTERDAY) as dag:

    task1 = kubernetes_pod_operator.KubernetesPodOperator(
        task_id='t1',
        name='task1',
        namespace='airflow',
        image='eu.gcr.io/taiyo-239217/dag:fae4887',
        arguments=["AlphaVantage()"],
        volume=[],
        volume_mounts=[],
        in_cluster=True,
        xcom_push=True,
        is_delete_operator_pod=True)
Beispiel #5
0
###########################
###### Set up volume ######
###########################
volume_mount = VolumeMount('results-volume',
                            mount_path='/outputs',
                            sub_path=None,
                            read_only=False)

volume_config= {
    'persistentVolumeClaim':
      {
        'claimName': 'results-claim'
      }
    }

volume = Volume(name='results-volume', configs=volume_config)


############################
####### Generate DAG #######
############################

default_args = {
    'owner': 'Brandon',
    'depends_on_past': False,
    'start_date': days_ago(0),
    'catchup': False,
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
Beispiel #6
0
    # equivalent to 1 AWS vCPU, 1 GCP Core, 1 Azure vCore or 1 Hyperthread on
    # a bare-metal intel processor with Hyperthreading
    # If CPU request exceeds all of your node's capacities it will fail to ever
    # get scheduled.
    request_cpu='2',
    # If memory limit is exceeded the Pod goes up for termination, if no
    # limit is specified there is no upper bound on the amount of memory it can
    # use. You can also specify a default memory limit on a per-namespace basis
    limit_memory='100Mi',
    # If cpu request exceeds your node's capacity, it will fail to ever get
    # scheduled. The m suffix stands for milli-cpus, therefore .5 cpu and 500m
    # cpu are equivalent.
    limit_cpu='500m')

# Creates a volume of type emptyDir without any configs
volumes = [Volume(name='empty-vol', configs={'emptyDir', {}})]

# Used to mount pod level volumes to a running container
volume_mounts = [
    VolumeMount(name='test-vol-mount',
                mount_path='/root/mount_file',
                sub_path='None',
                read_only=False)
]

# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG(dag_id='kubernetes-example',
                default_args=default_dag_args) as dag:
    # Only name, namespace, and image are required to create a
    # KubernetesPodOperator. This operator defaults to using the config file found
start = DummyOperator(task_id='run_this_first', dag=dag)

volume_mount = VolumeMount('airflow-dags',
                            mount_path='/dags',
                            sub_path='dags',
                            read_only=True)

volume_config = {
    'persistentVolumeClaim':
        {
            'claimName': 'airflow-dags'
        }
}

volume = Volume(name='airflow-dags', configs=volume_config)
file_path = "/root/kubeconfig/kubeconfig"

passing = KubernetesPodOperator(namespace='airflow',
                          image="python:3.6",
                          cmds=["python", "/dags/test-python.py"],
                          labels={"foo": "bar"},
                          name="passing-test",
                          task_id="passing-task",
                          volume_mounts=[volume_mount],
                          volumes=[volume],
                          get_logs=True,
                          in_cluster=True,
                          dag=dag
                          )
Beispiel #8
0
import os

from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.volume import Volume
from airflow.contrib.kubernetes.volume_mount import VolumeMount

##
# Persistent Volume Configuration
##

## Reference Volume
input_ref_config = {'persistentVolumeClaim': {'claimName': 'pvc-references'}}

input_ref_volume = Volume(name='reference-mount', configs=input_ref_config)
input_ref_mount = VolumeMount(name='reference-mount',
                              mount_path='/rnaseq/ref',
                              sub_path='ref',
                              read_only=True)

# Input Data Volume
input_data_config = {'persistentVolumeClaim': {'claimName': 'pvc-input'}}

input_data_volume = Volume(name='input-mount', configs=input_data_config)
input_data_mount = VolumeMount(name='input-mount',
                               mount_path='/rnaseq/data',
                               sub_path=None,
                               read_only=True)

### Output Volume
Beispiel #9
0
from airflow.contrib.kubernetes.volume import Volume
from airflow.contrib.kubernetes.volume_mount import VolumeMount
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator

##
# Persistent Volume Configuration
##


## Reference Volume
input_ref_mount = VolumeMount(name='reference-mount',
                              mount_path='/mnt/references',
                              sub_path=None,
                              read_only=True)
input_ref_volume = Volume(name='reference-mount', configs={'persistentVolumeClaim':{'claimName': 'pvc-references'}})

# Input Data Volume
input_data_mount = VolumeMount(name='input-mount',
                                mount_path='/mnt/data',
                                sub_path=None,
                                read_only=True)
input_data_volume = Volume(name='input-mount', configs={'persistentVolumeClaim':{'claimName': 'pvc-input'}})

# Temp Data Volume
temp_data_mount = VolumeMount(name='temp-mount',
                                mount_path='/mnt/temp',
                                sub_path=None,
                                read_only=False)
temp_data_volume = Volume(name='temp-mount', configs={'persistentVolumeClaim':{'claimName': 'pvc-airflow1datatemp'}})
from airflow.contrib.kubernetes.volume import Volume
from airflow.contrib.kubernetes.volume_mount import VolumeMount

with DAG(
    dag_id="chapter11_movielens_kubernetes",
    description="Fetches ratings from the Movielens API using kubernetes.",
    start_date=airflow_utils.dates.days_ago(3),
    schedule_interval="@daily",
) as dag:

    volume_mount = VolumeMount(
        "data-volume", mount_path="/data", sub_path=None, read_only=False
    )

    volume_config = {"persistentVolumeClaim": {"claimName": "data-volume"}}
    volume = Volume(name="data-volume", configs=volume_config)

    fetch_ratings = KubernetesPodOperator(
        task_id="fetch_ratings",
        image="airflowbook/movielens-fetch",
        cmds=["fetch_ratings.py"],
        arguments=[
            "--start_date",
            "{{ds}}",
            "--end_date",
            "{{next_ds}}",
            "--output_path",
            "/data/ratings/{{ds}}.json",
            "--user",
            os.environ["MOVIELENS_USER"],
            "--password",
        }
    ]

    # Input File Volume
    volume_in = VolumeMount('inpath',
                            mount_path='/opt/talend/input_files/',
                            sub_path=None,
                            read_only=False)

    volume_config_in= {
    'hostPath':
          {
            'path': '/home/osboxes/talend_kub_airflow/talend/input_files/'
           }
        }
    volume1 = Volume(name='inpath', configs=volume_config_in)


    # Output File Volume
    volume_out = VolumeMount('outpath',
                            mount_path='/opt/talend/out_files/',
                            sub_path=None,
                            read_only=False)

    volume_config_out= {
    'hostPath':
          {
            'path': '/home/osboxes/talend_kub_airflow/talend/output_file/'
           }
        }
    volume2 = Volume(name='outpath', configs=volume_config_out)
    'start_date': datetime(2017, 11, 25),
    'depends_on_past': False,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
    'retry_delay': timedelta(minutes=5),
    'provide_context': True
}

dags_volume_mount = VolumeMount('airflow-dags',
                                mount_path='/root/airflow/dags',
                                sub_path=None,
                                read_only=True)

dags_volume_config = {'persistentVolumeClaim': {'claimName': 'airflow-dags'}}
dags_volume = Volume(name='airflow-dags', configs=dags_volume_config)

logs_volume_mount = VolumeMount('airflow-logs',
                                mount_path='/root/airflow/logs',
                                sub_path=None,
                                read_only=True)

logs_volume_config = {'persistentVolumeClaim': {'claimName': 'airflow-logs'}}
logs_volume = Volume(name='airflow-logs', configs=logs_volume_config)

dag = DAG(dag_id='{}'.format(PARENT_DAG_NAME),
          catchup=True,
          default_args=default_args,
          schedule_interval=None)

with dag:
}

volume_mount_code = VolumeMount('my-volume',
                                mount_path='/home/git/',
                                sub_path=None,
                                read_only=True)

volume_mount_key = VolumeMount('google-cloud-key',
                               mount_path='/var/secrets/google',
                               sub_path=None,
                               read_only=False)
volume_config_key = {'secret': {'secretName': 'casper-worker-key'}}

volume_config = {'persistentVolumeClaim': {'claimName': 'my-vol'}}

volume_code = Volume(name='my-volume', configs=volume_config)
volume_key = Volume(name='google-cloud-key', configs=volume_config_key)

default_dag_args = {
    'start_date': datetime(2018, 1, 1),
    'depends_on_past': False,
    'owner': 'Airflow',
    'retries': 2,
    'retry_delay': timedelta(minutes=1),
    # The start_date describes when a DAG is valid / can be run. Set this to a
    # fixed point in time rather than dynamically, since it is evaluated every
    # time a DAG is parsed. See:
    # https://airflow.apache.org/faq.html#what-s-the-deal-with-start-date
}

# Define a DAG (directed acyclic graph) of tasks.
Beispiel #14
0
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators import WeatherFileSensor
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.volume_mount import VolumeMount
from airflow.contrib.kubernetes.volume import Volume

from kubernetes import client, config

volume_mount = VolumeMount('test-dir',
                           mount_path='/data',
                           sub_path=None,
                           read_only=False)

volume_config = {'persistentVolumeClaim': {'claimName': 'airflow'}}
volume = Volume(name='test-dir', configs=volume_config)

NAM_BASE_DIR = '/data/weatherdata/nam'
EXECUTE_DIR = '/tmp'
POD_PREFIX = 'run-ungrib'


def clean_completed_pods(**context):
    config.load_kube_config()
    v1 = client.CoreV1Api()
    body = client.V1DeleteOptions()
    ret = v1.list_namespaced_pod('airflow')

    for pod in ret.items:
        if pod.metadata.name.startswith('run-ungrib'):
            v1.delete_namespaced_pod(pod.metadata.name, 'airflow', body)
    'start_date': datetime(2018, 7, 26),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'provide_context': True,
    'retry_delay': timedelta(minutes=5)
}

volume_mount = VolumeMount('cadc-volume',
                           mount_path='/data/cadc',
                           sub_path=None,
                           read_only=False)

volume_config = {'persistentVolumeClaim': {'claimName': 'cadc-volume'}}
volume = Volume(name='cadc-volume', configs=volume_config)

http_conn_id = 'test_netrc'
redis_conn_id = 'redis_default'
output_format = 'csv'
collection = 'VLASS'
dag_id = 'vlass_processing'
datetime_format = '%Y-%m-%d %H:%M:%S'

# TODO - when deploying to have this actually run, catchup=True!!!!!
# and schedule_interval=timedelta(hours=1)

vlass_dag = DAG(dag_id,
                default_args=default_args,
                catchup=True,
                schedule_interval=timedelta(hours=1))
Beispiel #16
0
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.volume import Volume
from airflow.contrib.kubernetes.volume_mount import VolumeMount
from airflow.operators.dummy_operator import DummyOperator

##
# Persistent Volume Configuration
##
"""
Configuration for PVC claim
Arguments:
claimName (string): Name of the PVC claim in kubernetes
"""
volume_config = {'persistentVolumeClaim': {'claimName': 'airflow1data'}}

volume = Volume(name='airflow1data', configs=volume_config)
"""
Configuration for Volume Mounting location from PVC
Arguments:
name (string): Name of the PVC volume request
mount_path (string): Mount directory in the pod
sub_path (string): Sub path based on the mount directory
read_only (boolean): If the mount is read only or not
"""
volume_mount = VolumeMount('airflow1data',
                           mount_path='/mnt/azure',
                           sub_path=None,
                           read_only=False)

args = {'owner': 'airflow', 'start_date': days_ago(2)}
# Persistent Volume Configuration
##

"""
Configuration for PVC claim
Arguments:
claimName (string): Name of the PVC claim in kubernetes
"""
volume_config= {
    'persistentVolumeClaim':
      {
        'claimName': 'pvc-blobfuse-flexvol'
      }
    }

volume = Volume(name='flexvol-mount', configs=volume_config)

"""
Configuration for Volume Mounting location from PVC
Arguments:
name (string): Name of the PVC volume request
mount_path (string): Mount directory in the pod
sub_path (string): Sub path based on the mount directory
read_only (boolean): If the mount is read only or not
"""
volume_mount = VolumeMount('flexvol-mount',
                            mount_path='/mnt/azure',
                            sub_path=None,
                            read_only=True)

args = {
Beispiel #18
0
def convert_volumes(pod_spec: V1PodSpec) -> List[Volume]:
    volumes: List[V1Volume] = pod_spec.volumes
    return [
        Volume(name=volume.name, configs=to_swagger_dict(volume)) for volume in volumes or []
    ]
Beispiel #19
0
from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
from airflow.contrib.kubernetes.volume import Volume
from airflow.contrib.kubernetes.volume_mount import VolumeMount

##
# Persistent Volume Configuration
##

## Input Volume
volume_config = {
    'persistentVolumeClaim': {
        'claimName': 'pvc-competitions-airflow2'
    }
}

volume = Volume(name='input-mount', configs=volume_config)
volume_mount = VolumeMount('input-mount',
                           mount_path='/mnt/azure',
                           sub_path=None,
                           read_only=True)

## Output Volume
volume2_config = {
    'persistentVolumeClaim': {
        'claimName': 'pvc-competitions-airflow3'
    }
}

volume2 = Volume(name='output-mount', configs=volume2_config)
volume2_mount = VolumeMount('output-mount',
                            mount_path='/mnt/azure2',
Beispiel #20
0
    'depends_on_past': False,
    'start_date': '2020-03-20 09:00:00',
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('random_number_generator',
          default_args=default_args,
          schedule_interval='*/5 * * * *',
          catchup=False)

volume_config = {'persistentVolumeClaim': {'claimName': 'file-store'}}
volume = Volume(name='file-store', configs=volume_config)
volume_mount = VolumeMount('file-store',
                           mount_path='/mnt/file-store',
                           sub_path=None,
                           read_only=False)

create_file = KubernetesPodOperator(
    namespace='airflow',
    image="debian:9.4",
    cmds=["bash", "-c"],
    arguments=[
        'echo $(($RANDOM%10000+10000)) >> /mnt/file-store/random_number-$(date "+%Y.%m.%d-%H.%M.%S").txt'
    ],
    name="generate-random-number",
    task_id="generate-random-number",
    volumes=[volume],
Beispiel #21
0
                                 mount_path='/input-dataset',
                                 sub_path=None,
                                 read_only=False)

output_volume_mount = VolumeMount('data-volume',
                                  mount_path='/output-dataset',
                                  sub_path=None,
                                  read_only=False)
volume_config = {
    'persistentVolumeClaim': {
        'claimName':
        'airflow-dags'  # uses the persistentVolumeClaim given in the Kube yaml
    }
}

in_volume = Volume(name='input-dataset', configs=volume_config)
out_volume = Volume(name='output-dataset', configs=volume_config)

step1 = KubernetesPodOperator(namespace='airflow',
                              image="cestum/airflow-demo:prep-input",
                              cmds=[],
                              arguments=["K8S-Airflow"],
                              labels={"foo": "bar"},
                              name="prep-input",
                              volumes=[in_volume],
                              volume_mounts=[input_volume_mount],
                              task_id="prep-input",
                              get_logs=True,
                              dag=dag,
                              in_cluster=True)