Ejemplo n.º 1
0
    def parse_from_str(self, input):
        """
        Parse a task_famly using the :class:`~dbnd._core.register.Register`
        """

        from dbnd._core.settings.env import EnvConfig

        if isinstance(self.config_cls, EnvConfig):
            return get_settings().get_env_config(input)
        return build_task_from_config(input)
Ejemplo n.º 2
0
    def terminate_engine(cls):
        from airflow.contrib.hooks.gcp_dataproc_hook import DataProcHook
        from airflow.contrib.operators import dataproc_operator

        dataproc_config = DataprocConfig()

        gcp_conn_id = get_settings().get_env_config(CloudType.gcp).conn_id

        cluster_hook = DataProcHook(gcp_conn_id=gcp_conn_id)
        delete_cluster = dataproc_operator.DataprocClusterDeleteOperator(
            task_id="delete_dataproc_cluster",
            cluster_name=dataproc_config.cluster,
            project_id=cluster_hook.project_id,
            gcp_conn_id=gcp_conn_id,
            region=dataproc_config.region,
        )

        return delete_cluster
Ejemplo n.º 3
0
    def create_engine(cls):
        from airflow.contrib.hooks.gcp_dataproc_hook import DataProcHook
        from airflow.contrib.operators import dataproc_operator

        from dbnd._core.current import get_settings

        cloud = get_settings().get_env_config(CloudType.gcp)

        gcp_conn_id = cloud.conn_id

        dataproc_config = DataprocConfig()
        cluster_hook = DataProcHook(gcp_conn_id=gcp_conn_id)

        return dataproc_operator.DataprocClusterCreateOperator(
            task_id="create_dataproc_cluster",
            project_id=cluster_hook.project_id,
            cluster_name=dataproc_config.cluster,
            gcp_conn_id=gcp_conn_id,
            num_workers=dataproc_config.num_workers,
            zone=dataproc_config.zone,
            network_uri=dataproc_config.network_uri,
            subnetwork_uri=dataproc_config.subnetwork_uri,
            tags=dataproc_config.tags,
            storage_bucket=dataproc_config.storage_bucket,
            init_actions_uris=dataproc_config.init_actions_uris,
            init_action_timeout=dataproc_config.init_action_timeout,
            metadata=dataproc_config.metadata,
            image_version=dataproc_config.image_version,
            properties=dataproc_config.properties,
            master_machine_type=dataproc_config.master_machine_type,
            master_disk_size=dataproc_config.master_disk_size,
            worker_machine_type=dataproc_config.worker_machine_type,
            worker_disk_size=dataproc_config.worker_disk_size,
            num_preemptible_workers=dataproc_config.num_preemptible_workers,
            labels=dataproc_config.labels,
            delegate_to=dataproc_config.delegate_to,
            service_account=dataproc_config.service_account,
            service_account_scopes=dataproc_config.service_account_scopes,
            idle_delete_ttl=dataproc_config.idle_delete_ttl,
            auto_delete_time=dataproc_config.auto_delete_time,
            auto_delete_ttl=dataproc_config.auto_delete_ttl,
        )
Ejemplo n.º 4
0
def should_flatten(operator, attr_name):
    flatten_config = get_settings().tracking.flatten_operator_fields
    for op_name in flatten_config:
        if is_instance_by_class_name(operator, op_name):
            return attr_name in flatten_config[op_name]
    return False