Beispiel #1
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes(
         [
             Attribute(
                 attribute_name="module",
                 parent_fields=["task_parameters"],
                 comment="Name of the module to import from. E.g.: airflow.opeartors.python_operator",
             ),
             Attribute(
                 attribute_name="class_name",
                 parent_fields=["task_parameters"],
                 comment="Name of the operator class. E.g.: PythonBranchOperator",
             ),
             Attribute(
                 attribute_name="python",
                 parent_fields=["task_parameters"],
                 required=False,
                 comment="Relative path to python file that implements the function",
             ),
             Attribute(
                 attribute_name="function",
                 parent_fields=["task_parameters"],
                 required=False,
                 comment="Name of the function",
             ),
         ]
     )
Beispiel #2
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(attribute_name="database_type",
                   comment="mysql, postgresql, etc"),
         Attribute(attribute_name="conn_id"),
         Attribute(attribute_name="table"),
     ])
Beispiel #3
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes(
         [
             Attribute(
                 attribute_name="schema", comment="Leave it empty for system tables"
             ),
             Attribute(attribute_name="table"),
         ]
     )
Beispiel #4
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="folder",
             format_help=
             "ID shown at the URL address of the Google Drive folder",
         ),
         Attribute(attribute_name="file_name"),
     ])
Beispiel #5
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="s3_protocol",
             required=False,
             comment="S3 protocol: s3a/s3/s3n",
         ),
         Attribute(attribute_name="bucket"),
         Attribute(attribute_name="path"),
     ])
Beispiel #6
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(attribute_name="type", auto_value=orig_cls.ref_name),
         Attribute(attribute_name="description"),
         Attribute(
             attribute_name="inputs",
             format_help="list",
             comment="Use dagger init-io cli",
         ),
         Attribute(
             attribute_name="outputs",
             format_help="list",
             comment="Use dagger init-io cli",
         ),
         Attribute(attribute_name="pool", required=False),
         Attribute(attribute_name="timeout_in_seconds",
                   required=False,
                   format_help="int",
                   validator=int),
         Attribute(
             attribute_name="airflow_task_parameters",
             nullable=True,
             format_help="dictionary",
         ),
         Attribute(
             attribute_name="template_parameters",
             nullable=True,
             format_help="dictionary",
         ),
         Attribute(attribute_name="task_parameters", nullable=True),
     ])
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="sql",
             parent_fields=["task_parameters"],
             comment="Relative path to sql file",
         ),
         Attribute(
             attribute_name="postgres_conn_id",
             required=False,
             parent_fields=["task_parameters"],
         ),
     ])
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="sql",
             parent_fields=["task_parameters"],
             comment="Relative path to sql file",
         ),
         Attribute(
             attribute_name="aws_conn_id",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="s3_tmp_results_location",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="s3_output_bucket",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="s3_output_path",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="workgroup",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="is_incremental",
             required=True,
             validator=bool,
             comment="""If set yes then SQL going to be an INSERT INTO\
                            statement, otherwise a DROP TABLE; CTAS statement""",
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="partitioned_by",
             required=False,
             validator=list,
             comment=
             "The list of fields to partition by. These fields should come last in the select statement",
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="output_format",
             required=False,
             validator=str,
             comment="Output file format. One of PARQUET/ORC/JSON/CSV",
             parent_fields=["task_parameters"],
         )
     ])
Beispiel #9
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes(
         [
             Attribute(
                 attribute_name="executable",
                 parent_fields=["task_parameters"],
                 comment="E.g.: my_code.py",
             ),
             Attribute(
                 attribute_name="executable_prefix",
                 nullable=True,
                 parent_fields=["task_parameters"],
                 comment="E.g.: python",
             ),
             Attribute(attribute_name="job_name", parent_fields=["task_parameters"], required=False),
             Attribute(attribute_name="absolute_job_name", parent_fields=["task_parameters"], required=False),
             Attribute(
                 attribute_name="overrides",
                 parent_fields=["task_parameters"],
                 required=False,
                 validator=dict,
                 comment="Batch overrides dictionary: https://docs.aws.amazon.com/sdkforruby/api/Aws/Batch/Types/ContainerOverrides.html",
             ),
             Attribute(
                 attribute_name="aws_conn_id",
                 parent_fields=["task_parameters"],
                 required=False,
             ),
             Attribute(
                 attribute_name="region_name",
                 parent_fields=["task_parameters"],
                 required=False,
             ),
             Attribute(
                 attribute_name="cluster_name",
                 parent_fields=["task_parameters"],
                 required=False,
             ),
             Attribute(
                 attribute_name="job_queue",
                 parent_fields=["task_parameters"],
                 required=False,
             ),
             Attribute(
                 attribute_name="max_retries",
                 parent_fields=["task_parameters"],
                 required=False,
             ),
         ]
     )
Beispiel #10
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes(
         [
             Attribute(
                 attribute_name="python",
                 parent_fields=["task_parameters"],
                 comment="Relative path to python file that implements the function",
             ),
             Attribute(
                 attribute_name="function",
                 parent_fields=["task_parameters"],
                 comment="Name of the function",
             ),
         ]
     )
Beispiel #11
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="channel",
             validator=str,
             comment=
             "Name of slack channel or slack id of user E.g.: #airflow-jobs or UN01EL1RU",
         ),
         Attribute(
             attribute_name="mentions",
             validator=list,
             nullable=True,
             comment=
             "List of slack user ids or slack groups. E.g.: <@UN01EL1RU> for user, @data-eng for slack group",
         ),
     ])
Beispiel #12
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(attribute_name="type", auto_value=orig_cls.ref_name),
         Attribute(attribute_name="name"),
         Attribute(
             attribute_name="has_dependency",
             required=False,
             comment=
             "Weather this i/o should be added to the dependency graph or not. Default is True",
         ),
         Attribute(
             attribute_name="follow_external_dependency",
             required=False,
             comment=
             "Weather an external task sensor should be created if this dataset"
             "is created in another pipeline. Default is False",
         ),
     ])
Beispiel #13
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="sql",
             nullable=True,
             parent_fields=["task_parameters"],
             comment=
             "Relative path to sql file. If not present default is SELECT * FROM <input_table>",
         ),
         Attribute(
             attribute_name="iam_role",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="allow_overwrite",
             required=False,
             parent_fields=["task_parameters"],
             format_help="on/off",
             comment="Default is on",
         ),
         Attribute(
             attribute_name="postgres_conn_id",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="extra_unload_parameters",
             required=True,
             nullable=True,
             parent_fields=["task_parameters"],
             format_help="dictionary",
             comment=
             "Any additional parameter will be added like <key value> \
                       Check https://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD.html#unload-parameters",
         ),
     ])
Beispiel #14
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="sql",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="where",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="columns",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="num_mappers",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="split_by",
             parent_fields=["task_parameters"],
             required=True,
         ),
         Attribute(
             attribute_name="delete_target_dir",
             parent_fields=["task_parameters"],
             required=False,
             validator=bool,
         ),
         Attribute(
             attribute_name="format",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="emr_master",
             parent_fields=["task_parameters"],
             required=False,
         ),
     ])
Beispiel #15
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="owner",
             validator=str,
             format_help="<team|person>@domain.com",
         ),
         Attribute(attribute_name="description", validator=str),
         Attribute(attribute_name="schedule",
                   format_help="crontab e.g.: 0 3 * * *"),
         Attribute(
             attribute_name="start_date",
             format_help="2019-11-01T03:00",
             validator=lambda x: datetime.strptime(x, "%Y-%m-%dT%H:%M"),
         ),
         Attribute(attribute_name="airflow_parameters"),
         Attribute(
             attribute_name="default_args",
             required=True,
             nullable=True,
             validator=dict,
             parent_fields=["airflow_parameters"],
             format_help="dictionary",
         ),
         Attribute(
             attribute_name="dag_parameters",
             required=True,
             nullable=True,
             validator=dict,
             parent_fields=["airflow_parameters"],
             format_help="dictionary",
         ),
         Attribute(
             attribute_name="alerts",
             required=True,
             nullable=True,
             validator=list,
             format_help="list",
         ),
     ])
Beispiel #16
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="spark_engine",
             parent_fields=["task_parameters"],
             required=True,
             comment=
             "Where to run spark job. Accepted values: emr, batch, glue",
         ),
         Attribute(attribute_name="job_file",
                   parent_fields=["task_parameters"],
                   required=True),
         Attribute(attribute_name="cluster_name",
                   parent_fields=["task_parameters"],
                   required=False),
         Attribute(attribute_name="job_bucket",
                   parent_fields=["task_parameters"],
                   required=False),
         Attribute(
             attribute_name="spark_args",
             parent_fields=["task_parameters"],
             required=False,
             format_help="Dictionary",
         ),
         Attribute(
             attribute_name="spark_conf_args",
             parent_fields=["task_parameters"],
             required=False,
             format_help="Dictionary",
         ),
         Attribute(
             attribute_name="job_file",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="extra_py_files",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="emr_conn_id",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="overrides",
             parent_fields=["task_parameters"],
             required=False,
             validator=dict,
             comment=
             "Batch overrides dictionary: https://docs.aws.amazon.com/sdkforruby/api/Aws/Batch/Types/ContainerOverrides.html",
         ),
         Attribute(
             attribute_name="aws_conn_id",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="region_name",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="job_queue",
             parent_fields=["task_parameters"],
             required=False,
         ),
         Attribute(
             attribute_name="max_retries",
             parent_fields=["task_parameters"],
             required=False,
         ),
     ])
Beispiel #17
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes(
         [Attribute(attribute_name="type", auto_value=orig_cls.ref_name)])
Beispiel #18
0
 def init_attributes(cls, orig_cls):
     cls.add_config_attributes([
         Attribute(
             attribute_name="iam_role",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="columns",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="incremental",
             required=True,
             parent_fields=["task_parameters"],
             validator=bool,
             format_help="on/off/yes/no/true/false",
             auto_value="true",
         ),
         Attribute(
             attribute_name="delete_condition",
             required=True,
             nullable=True,
             parent_fields=["task_parameters"],
             format_help="SQL where statement",
             comment="Recommended when doing incremental load",
         ),
         Attribute(
             attribute_name="max_errors",
             required=False,
             parent_fields=["task_parameters"],
             comment="Default is 0",
         ),
         Attribute(
             attribute_name="postgres_conn_id",
             required=False,
             parent_fields=["task_parameters"],
         ),
         Attribute(
             attribute_name="extra_load_parameters",
             required=True,
             nullable=True,
             parent_fields=["task_parameters"],
             format_help="dictionary",
             comment=
             "Any additional parameter will be added like <key value> \
                   Check https://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html",
         ),
         Attribute(
             attribute_name="tmp_table_prefix",
             required=False,
             parent_fields=["task_parameters"],
             format_help="string",
             comment=
             "Only valid if job is truncated. If set table will be loaded into a tmp table prefixed "
             "<tmp_table_prefix> and than it will be moved to it's final destination",
         ),
         Attribute(
             attribute_name="create_table_ddl",
             required=False,
             parent_fields=["task_parameters"],
             format_help="string",
             comment="Path to the file which contains the create table ddl",
         ),
         Attribute(
             attribute_name="copy_ddl_from",
             required=False,
             parent_fields=["task_parameters"],
             format_help="string {schema}.{table}",
             comment=
             "If you have the schema of the table e.g.: in spectrum you can copy the ddl from there",
         ),
         Attribute(
             attribute_name="sort_keys",
             required=False,
             parent_fields=["task_parameters"],
             format_help="Comma separated list of strings. {col1,col2}",
             comment=
             "Redshift sort keys. If this is set, interleaved sort_keys must be null.",
         ),
     ])