def change_import_paths_to_deprecated():
    from bowler import LN, TOKEN, Capture, Filename, Query
    from fissix.pytree import Leaf

    def remove_tags_modifier(node: LN, capture: Capture,
                             filename: Filename) -> None:
        for node in capture['function_arguments'][0].post_order():
            if isinstance(
                    node,
                    Leaf) and node.value == "tags" and node.type == TOKEN.NAME:
                if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                    node.parent.next_sibling.remove()
                node.parent.remove()

    def pure_airflow_models_filter(node: LN, capture: Capture,
                                   filename: Filename) -> bool:
        """Check if select is exactly [airflow, . , models]"""
        return len([ch for ch in node.children[1].leaves()]) == 3

    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "python_virtualenv.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "python_virtualenv.py"))
    (qry.select_module("airflow.utils.python_virtualenv").rename(
        "airflow.providers.google.cloud.utils.python_virtualenv"))
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "process_utils.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "process_utils.py"))
    (qry.select_module("airflow.utils.process_utils").rename(
        "airflow.providers.google.cloud.utils.process_utils"))

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix KubernetesPodOperator imports to use old path
    qry.select_module(
        "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename(
            "airflow.contrib.operators.kubernetes_pod_operator")

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(
        pure_airflow_models_filter).rename("airflow.models.baseoperator")

    qry.execute(write=True, silent=False, interactive=False)
def change_import_paths_to_deprecated():
    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"),
        os.path.join(dirname(__file__), "airflow", "providers",
                     "google", "cloud", "utils", "python_virtualenv.py"
                     )
    )
    (
        qry
        .select_module("airflow.utils.python_virtualenv")
        .rename("airflow.providers.google.cloud.utils.python_virtualenv")
    )
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"),
        os.path.join(dirname(__file__), "airflow", "providers",
                     "google", "cloud", "utils", "process_utils.py"
                     )
    )
    (
        qry
        .select_module("airflow.utils.process_utils")
        .rename("airflow.providers.google.cloud.utils.process_utils")
    )

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix KubernetesPodOperator imports to use old path
    qry.select_module(
        "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename(
        "airflow.contrib.operators.kubernetes_pod_operator"
    )

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(pure_airflow_models_filter).rename(
        "airflow.models.baseoperator")

    qry.execute(write=True, silent=False, interactive=False)
Esempio n. 3
0
class RefactorTool:
    def __init__(self, input, nobackups, show_diffs):
        """
        Args:
            input: path to file to be refactored.
            nobackups: If true no backup '.bak' files will be created for those
                files that are being refactored.
            show_diffs: Should diffs of the refactoring be printed to stdout?
        """
        self.nobackups = nobackups
        self.show_diffs = show_diffs
        self.fn = input

        self.query = Query([self.fn])

    def rename_methods(self):
        for old_name, new_name in methods:
            self.query.select_method(old_name).rename(new_name)

    def fix_imports(self):
        # Fix old style: from playwright import sync_playwright
        self.query.select_module("sync_playwright").select_module(
            "playwright").rename("playwright.sync_api")
        pass

    def output_diffs(self):
        self.query.diff()

    def write_file(self):
        if not self.nobackups:
            # Make a backup before refactor
            backup = self.fn + ".bak"
            if os.path.lexists(backup):
                try:
                    os.remove(backup)
                except OSError as err:
                    self.log_message("Cannot remove backup %s" % backup)
            try:
                shutil.copyfile(self.fn, backup)
            except OSError as err:
                self.log_message("Cannot copy %s to %s" % (self.fn, backup))
        self.query.write()

    def log_message(self, msg):
        print("Info: " + msg)

    def log_error(self, msg):
        print("Error: " + msg)
class RefactorBackportPackages:
    """
    Refactors the code of providers, so that it works in 1.10.

    """
    def __init__(self):
        self.qry = Query()

    def remove_class(self, class_name) -> None:
        """
        Removes class altogether. Example diff generated:


        .. code-block:: diff

            --- ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            +++ ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            @@ -179,86 +179,3 @@
            -
            -class GKEStartPodOperator(KubernetesPodOperator):
            -
            - ...

        :param class_name: name to remove
        """
        def _remover(node: LN, capture: Capture, filename: Filename) -> None:
            node.remove()

        self.qry.select_class(class_name).modify(_remover)

    def rename_deprecated_modules(self) -> None:
        """
        Renames back to deprecated modules imported. Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/dingding/operators/dingding.py
            +++ ./airflow/providers/dingding/operators/dingding.py
            @@ -16,7 +16,7 @@
             # specific language governing permissions and limitations
             # under the License.

            -from airflow.operators.bash import BaseOperator
            +from airflow.operators.bash_operator import BaseOperator
             from airflow.providers.dingding.hooks.dingding import DingdingHook
             from airflow.utils.decorators import apply_defaults

        """
        changes = [
            ("airflow.operators.bash", "airflow.operators.bash_operator"),
            ("airflow.operators.python", "airflow.operators.python_operator"),
            ("airflow.utils.session", "airflow.utils.db"),
            ("airflow.providers.cncf.kubernetes.operators.kubernetes_pod",
             "airflow.contrib.operators.kubernetes_pod_operator"),
        ]
        for new, old in changes:
            self.qry.select_module(new).rename(old)

    def add_provide_context_to_python_operators(self) -> None:
        """

        Adds provide context to usages of Python/BranchPython Operators - mostly in example_dags.
        Note that those changes  apply to example DAGs not to the operators/hooks erc.
        We package the example DAGs together with the provider classes and they should serve as
        examples independently on the version of Airflow it will be installed in.
        Provide_context feature in Python operators was feature added 2.0.0 and we are still
        using the "Core" operators from the Airflow version that the provider packages are installed
        in - the "Core" operators do not have (for now) their own provider package.

        The core operators are:

            * Python
            * BranchPython
            * Bash
            * Branch
            * Dummy
            * LatestOnly
            * ShortCircuit
            * PythonVirtualEnv


        Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/amazon/aws/example_dags/example_google_api_to_s3_transfer_advanced.py
            +++ ./airflow/providers/amazon/aws/example_dags/example_google_api_to_s3_transfer_advanced.py
            @@ -105,7 +105,8 @@
                         task_video_ids_to_s3.google_api_response_via_xcom,
                         task_video_ids_to_s3.task_id
                     ],
            -        task_id='check_and_transform_video_ids'
            +        task_id='check_and_transform_video_ids',
            +        provide_context=True
                 )

        """
        def add_provide_context_to_python_operator(node: LN, capture: Capture,
                                                   filename: Filename) -> None:
            fn_args = capture['function_arguments'][0]
            if len(fn_args.children) > 0 and (
                    not isinstance(fn_args.children[-1], Leaf)
                    or fn_args.children[-1].type != token.COMMA):
                fn_args.append_child(Comma())

            provide_context_arg = KeywordArg(Name('provide_context'),
                                             Name('True'))
            provide_context_arg.prefix = fn_args.children[0].prefix
            fn_args.append_child(provide_context_arg)

        (self.qry.select_function("PythonOperator").is_call().modify(
            add_provide_context_to_python_operator))
        (self.qry.select_function("BranchPythonOperator").is_call().modify(
            add_provide_context_to_python_operator))

    def remove_super_init_call(self):
        r"""
        Removes super().__init__() call from Hooks.

        In airflow 1.10 almost none of the Hooks call super().init(). It was always broken in Airflow 1.10 -
        the BaseHook() has it's own __init__() which is wrongly implemented and requires source
        parameter to be passed::

        .. code-block:: python

            def __init__(self, source):
                pass

        We fixed it in 2.0, but for the entire 1.10 line calling super().init() is not a good idea -
        and it basically does nothing even if you do. And it's bad because it does not initialize
        LoggingMixin (BaseHook derives from LoggingMixin). And it is the main reason why Hook
        logs are not working as they are supposed to sometimes:

        .. code-block:: python

            class LoggingMixin(object):
                \"\"\"
                Convenience super-class to have a logger configured with the class name
                \"\"\"
                def __init__(self, context=None):
                    self._set_context(context)


        There are two Hooks in 1.10 that call super.__init__ :

        .. code-block:: python

               super(CloudSqlDatabaseHook, self).__init__(source=None)
               super(MongoHook, self).__init__(source='mongo')

        Not that it helps with anything because init in BaseHook does nothing. So we remove
        the super().init() in Hooks when backporting to 1.10.

        Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/apache/druid/hooks/druid.py
            +++ ./airflow/providers/apache/druid/hooks/druid.py
            @@ -49,7 +49,7 @@
                         timeout=1,
                         max_ingestion_time=None):

            -        super().__init__()
            +
                     self.druid_ingest_conn_id = druid_ingest_conn_id
                     self.timeout = timeout
                     self.max_ingestion_time = max_ingestion_time

        """
        def remove_super_init_call_modifier(node: LN, capture: Capture,
                                            filename: Filename) -> None:
            for ch in node.post_order():
                if isinstance(ch, Leaf) and ch.value == "super":
                    if any(c.value for c in ch.parent.post_order()
                           if isinstance(c, Leaf)):
                        ch.parent.remove()

        self.qry.select_subclass("BaseHook").modify(
            remove_super_init_call_modifier)

    def remove_tags(self):
        """
        Removes tags from execution of the operators (in example_dags). Note that those changes
        apply to example DAGs not to the operators/hooks erc. We package the example DAGs together
        with the provider classes and they should serve as examples independently on the version
        of Airflow it will be installed in. The tags are feature added in 1.10.10 and occasionally
        we will want to run example DAGs as system tests in pre-1.10.10 version so we want to
        remove the tags here.


        Example diff generated:

        .. code-block:: diff


            -- ./airflow/providers/amazon/aws/example_dags/example_datasync_2.py
            +++ ./airflow/providers/amazon/aws/example_dags/example_datasync_2.py
            @@ -83,8 +83,7 @@
             with models.DAG(
                 "example_datasync_2",
                 default_args=default_args,
            -    schedule_interval=None,  # Override to match your needs
            -    tags=['example'],
            +    schedule_interval=None,
             ) as dag:

        """
        def remove_tags_modifier(_: LN, capture: Capture,
                                 filename: Filename) -> None:
            for node in capture['function_arguments'][0].post_order():
                if isinstance(
                        node, Leaf
                ) and node.value == "tags" and node.type == TOKEN.NAME:
                    if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                        node.parent.next_sibling.remove()
                    node.parent.remove()

        # Remove tags
        self.qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    def remove_poke_mode_only_decorator(self):
        r"""
        Removes @poke_mode_only decorator. The decorator is only available in Airflow 2.0.

        Example diff generated:

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/sensors/gcs.py
            +++ ./airflow/providers/google/cloud/sensors/gcs.py
            @@ -189,7 +189,6 @@
                 return datetime.now()


            -@poke_mode_only
             class GCSUploadSessionCompleteSensor(BaseSensorOperator):
                 \"\"\"
                Checks for changes in the number of objects at prefix in Google Cloud Storage

        """
        def find_and_remove_poke_mode_only_import(node: LN):
            for child in node.children:
                if isinstance(
                        child, Leaf
                ) and child.type == 1 and child.value == 'poke_mode_only':
                    import_node = child.parent
                    # remove the import by default
                    skip_import_remove = False
                    if isinstance(child.prev_sibling,
                                  Leaf) and child.prev_sibling.value == ",":
                        # remove coma before the whole import
                        child.prev_sibling.remove()
                        # do not remove if there are other imports
                        skip_import_remove = True
                    if isinstance(child.next_sibling,
                                  Leaf) and child.prev_sibling.value == ",":
                        # but keep the one after and do not remove the whole import
                        skip_import_remove = True
                    # remove the import
                    child.remove()
                    if not skip_import_remove:
                        # remove import of there were no sibling
                        import_node.remove()
                else:
                    find_and_remove_poke_mode_only_import(child)

        def find_root_remove_import(node: LN):
            current_node = node
            while current_node.parent:
                current_node = current_node.parent
            find_and_remove_poke_mode_only_import(current_node)

        def is_poke_mode_only_decorator(node: LN) -> bool:
            return node.children and len(node.children) >= 2 and \
                isinstance(node.children[0], Leaf) and node.children[0].value == '@' and \
                isinstance(node.children[1], Leaf) and node.children[1].value == 'poke_mode_only'

        def remove_poke_mode_only_modifier(node: LN, capture: Capture,
                                           filename: Filename) -> None:
            for child in capture['node'].parent.children:
                if is_poke_mode_only_decorator(child):
                    find_root_remove_import(child)
                    child.remove()

        self.qry.select_subclass("BaseSensorOperator").modify(
            remove_poke_mode_only_modifier)

    def refactor_amazon_package(self):
        """
        Fixes to "amazon" providers package.

        Copies some of the classes used from core Airflow to "common.utils" package of
        the provider and renames imports to use them from there.

        We copy typing_compat.py and change import as in example diff:

        .. code-block:: diff

            --- ./airflow/providers/amazon/aws/operators/ecs.py
            +++ ./airflow/providers/amazon/aws/operators/ecs.py
            @@ -24,7 +24,7 @@
             from airflow.models import BaseOperator
             from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
             from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
            -from airflow.typing_compat import Protocol, runtime_checkable
            +from airflow.providers.amazon.common.utils.typing_compat import Protocol, runtime_checkable
             from airflow.utils.decorators import apply_defaults

        """
        def amazon_package_filter(node: LN, capture: Capture,
                                  filename: Filename) -> bool:
            return filename.startswith("./airflow/providers/amazon/")

        os.makedirs(os.path.join(get_target_providers_package_folder("amazon"),
                                 "common", "utils"),
                    exist_ok=True)
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "__init__.py"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "utils", "__init__.py"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow",
                         "typing_compat.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "utils", "typing_compat.py"))
        (self.qry.select_module("airflow.typing_compat").filter(
            callback=amazon_package_filter).rename(
                "airflow.providers.amazon.common.utils.typing_compat"))

        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "email.py"),
            os.path.join(get_target_providers_package_folder("amazon"),
                         "common", "utils", "email.py"))
        (self.qry.select_module("airflow.utils.email").filter(
            callback=amazon_package_filter).rename(
                "airflow.providers.amazon.common.utils.email"))

    def refactor_google_package(self):
        r"""
        Fixes to "google" providers package.

        Copies some of the classes used from core Airflow to "common.utils" package of the
        the provider and renames imports to use them from there. Note that in this case we also rename
        the imports in the copied files.

        For example we copy python_virtualenv.py, process_utils.py and change import as in example diff:

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            +++ ./airflow/providers/google/cloud/operators/kubernetes_engine.py
            @@ -28,11 +28,11 @@

             from airflow.exceptions import AirflowException
             from airflow.models import BaseOperator
            -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator
            +from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator
             from airflow.providers.google.cloud.hooks.kubernetes_engine import GKEHook
             from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
             from airflow.utils.decorators import apply_defaults
            -from airflow.utils.process_utils import execute_in_subprocess, patch_environ
            +from airflow.providers.google.common.utils.process_utils import execute_in_subprocess


        And in the copied python_virtualenv.py we also change import to process_utils.py. This happens
        automatically and is solved by Pybowler.


        .. code-block:: diff

            --- ./airflow/providers/google/common/utils/python_virtualenv.py
            +++ ./airflow/providers/google/common/utils/python_virtualenv.py
            @@ -21,7 +21,7 @@
             \"\"\"
            from typing import List, Optional

            -from airflow.utils.process_utils import execute_in_subprocess
            +from airflow.providers.google.common.utils.process_utils import execute_in_subprocess


            def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool)


        We also rename Base operator links to deprecated names:


        .. code-block:: diff

            --- ./airflow/providers/google/cloud/operators/mlengine.py
            +++ ./airflow/providers/google/cloud/operators/mlengine.py
            @@ -24,7 +24,7 @@
             from typing import List, Optional

             from airflow.exceptions import AirflowException
            -from airflow.models import BaseOperator, BaseOperatorLink
            +from airflow.models.baseoperator import BaseOperator, BaseOperatorLink
             from airflow.models.taskinstance import TaskInstance
             from airflow.providers.google.cloud.hooks.mlengine import MLEngineHook
             from airflow.utils.decorators import apply_defaults


        We remove GKEStartPodOperator (example in remove_class method)


        We also copy (google.common.utils) and rename imports to the helpers.

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            +++ ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            @@ -37,7 +37,7 @@
                 CloudDataCatalogUpdateTagTemplateOperator,
             )
             from airflow.utils.dates import days_ago
            -from airflow.utils.helpers import chain
            +from airflow.providers.google.common.utils.helpers import chain

             default_args = {"start_date": days_ago(1)}

        And also module_loading  which is used by helpers

        .. code-block:: diff

            --- ./airflow/providers/google/common/utils/helpers.py
            +++ ./airflow/providers/google/common/utils/helpers.py
            @@ -26,7 +26,7 @@
             from jinja2 import Template

             from airflow.exceptions import AirflowException
            -from airflow.utils.module_loading import import_string
            +from airflow.providers.google.common.utils.module_loading import import_string

             KEY_REGEX = re.compile(r'^[\\w.-]+$')

        """
        def google_package_filter(node: LN, capture: Capture,
                                  filename: Filename) -> bool:
            return filename.startswith("./airflow/providers/google/")

        def pure_airflow_models_filter(node: LN, capture: Capture,
                                       filename: Filename) -> bool:
            """Check if select is exactly [airflow, . , models]"""
            return len(list(node.children[1].leaves())) == 3

        os.makedirs(os.path.join(get_target_providers_package_folder("google"),
                                 "common", "utils"),
                    exist_ok=True)
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "__init__.py"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "python_virtualenv.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "python_virtualenv.py"))

        copy_helper_py_file(
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "helpers.py"))

        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "module_loading.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "module_loading.py"))
        (self.qry.select_module("airflow.utils.python_virtualenv").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.python_virtualenv"))
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "process_utils.py"),
            os.path.join(get_target_providers_package_folder("google"),
                         "common", "utils", "process_utils.py"))
        (self.qry.select_module("airflow.utils.process_utils").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.process_utils"))

        (self.qry.select_module("airflow.utils.helpers").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.helpers"))

        (self.qry.select_module("airflow.utils.module_loading").filter(
            callback=google_package_filter).rename(
                "airflow.providers.google.common.utils.module_loading"))

        (
            # Fix BaseOperatorLinks imports
            self.qry.select_module("airflow.models").is_filename(
                include=r"bigquery\.py|mlengine\.py"
            ).filter(callback=google_package_filter
                     ).filter(pure_airflow_models_filter).rename(
                         "airflow.models.baseoperator"))
        self.remove_class("GKEStartPodOperator")
        (self.qry.select_class("GKEStartPodOperator").filter(
            callback=google_package_filter).is_filename(
                include=r"example_kubernetes_engine\.py").rename(
                    "GKEPodOperator"))

    def refactor_odbc_package(self):
        """
        Fixes to "odbc" providers package.

        Copies some of the classes used from core Airflow to "common.utils" package of the
        the provider and renames imports to use them from there.

        We copy helpers.py and change import as in example diff:

        .. code-block:: diff

            --- ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            +++ ./airflow/providers/google/cloud/example_dags/example_datacatalog.py
            @@ -37,7 +37,7 @@
                 CloudDataCatalogUpdateTagTemplateOperator,
             )
             from airflow.utils.dates import days_ago
            -from airflow.utils.helpers import chain
            +from airflow.providers.odbc.utils.helpers import chain

             default_args = {"start_date": days_ago(1)}


        """
        def odbc_package_filter(node: LN, capture: Capture,
                                filename: Filename) -> bool:
            return filename.startswith("./airflow/providers/odbc/")

        os.makedirs(os.path.join(get_target_providers_folder(), "odbc",
                                 "utils"),
                    exist_ok=True)
        copyfile(
            os.path.join(get_source_airflow_folder(), "airflow", "utils",
                         "__init__.py"),
            os.path.join(get_target_providers_package_folder("odbc"), "utils",
                         "__init__.py"))
        copy_helper_py_file(
            os.path.join(get_target_providers_package_folder("odbc"), "utils",
                         "helpers.py"))

        (self.qry.select_module("airflow.utils.helpers").filter(
            callback=odbc_package_filter).rename(
                "airflow.providers.odbc.utils.helpers"))

    def do_refactor(self, in_process: bool = False) -> None:  # noqa
        self.rename_deprecated_modules()
        self.refactor_amazon_package()
        self.refactor_google_package()
        self.refactor_odbc_package()
        self.remove_tags()
        self.remove_super_init_call()
        self.add_provide_context_to_python_operators()
        self.remove_poke_mode_only_decorator()
        # In order to debug Bowler - set in_process to True
        self.qry.execute(write=True,
                         silent=False,
                         interactive=False,
                         in_process=in_process)
def change_import_paths_to_deprecated():
    from bowler import LN, TOKEN, Capture, Filename, Query
    from fissix.pytree import Leaf
    from fissix.fixer_util import KeywordArg, Name, Comma

    def remove_tags_modifier(node: LN, capture: Capture, filename: Filename) -> None:
        for node in capture['function_arguments'][0].post_order():
            if isinstance(node, Leaf) and node.value == "tags" and node.type == TOKEN.NAME:
                if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                    node.parent.next_sibling.remove()
                node.parent.remove()

    def pure_airflow_models_filter(node: LN, capture: Capture, filename: Filename) -> bool:
        """Check if select is exactly [airflow, . , models]"""
        return len([ch for ch in node.children[1].leaves()]) == 3

    def remove_super_init_call(node: LN, capture: Capture, filename: Filename) -> None:
        for ch in node.post_order():
            if isinstance(ch, Leaf) and ch.value == "super":
                if any(c.value for c in ch.parent.post_order() if isinstance(c, Leaf)):
                    ch.parent.remove()

    def add_provide_context_to_python_operator(node: LN, capture: Capture, filename: Filename) -> None:
        fn_args = capture['function_arguments'][0]
        fn_args.append_child(Comma())

        provide_context_arg = KeywordArg(Name('provide_context'), Name('True'))
        provide_context_arg.prefix = fn_args.children[0].prefix
        fn_args.append_child(provide_context_arg)

    def remove_class(qry, class_name) -> None:
        def _remover(node: LN, capture: Capture, filename: Filename) -> None:
            if node.type not in (300, 311):  # remove only definition
                node.remove()

        qry.select_class(class_name).modify(_remover)

    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
        (
            "airflow.providers.cncf.kubernetes.operators.kubernetes_pod",
            "airflow.contrib.operators.kubernetes_pod_operator"
        ),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"),
        os.path.join(
            dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "python_virtualenv.py"
        )
    )
    (
        qry
        .select_module("airflow.utils.python_virtualenv")
        .rename("airflow.providers.google.cloud.utils.python_virtualenv")
    )
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"),
        os.path.join(
            dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "process_utils.py"
        )
    )
    (
        qry
        .select_module("airflow.utils.process_utils")
        .rename("airflow.providers.google.cloud.utils.process_utils")
    )

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix AWS import in Google Cloud Transfer Service
    (
        qry
        .select_module("airflow.providers.amazon.aws.hooks.base_aws")
        .is_filename(include=r"cloud_storage_transfer_service\.py")
        .rename("airflow.contrib.hooks.aws_hook")
    )

    (
        qry
        .select_class("AwsBaseHook")
        .is_filename(include=r"cloud_storage_transfer_service\.py")
        .filter(lambda n, c, f: n.type == 300)
        .rename("AwsHook")
    )

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(pure_airflow_models_filter).rename(
        "airflow.models.baseoperator")

    # Fix super().__init__() call in hooks
    qry.select_subclass("BaseHook").modify(remove_super_init_call)

    (
        qry.select_function("PythonOperator")
        .is_call()
        .is_filename(include=r"mlengine_operator_utils.py$")
        .modify(add_provide_context_to_python_operator)
    )

    (
        qry.select_function("BranchPythonOperator")
        .is_call()
        .is_filename(include=r"example_google_api_to_s3_transfer_advanced.py$")
        .modify(add_provide_context_to_python_operator)
    )

    # Remove new class and rename usages of old
    remove_class(qry, "GKEStartPodOperator")
    (
        qry
        .select_class("GKEStartPodOperator")
        .is_filename(include=r"example_kubernetes_engine\.py")
        .rename("GKEPodOperator")
    )

    qry.execute(write=True, silent=False, interactive=False)

    # Add old import to GKE
    gke_path = os.path.join(
        dirname(__file__), "airflow", "providers", "google", "cloud", "operators", "kubernetes_engine.py"
    )
    with open(gke_path, "a") as f:
        f.writelines(["", "from airflow.contrib.operators.gcp_container_operator import GKEPodOperator"])

    gke_path = os.path.join(
        dirname(__file__), "airflow", "providers", "google", "cloud", "operators", "kubernetes_engine.py"
    )
Esempio n. 6
0
def change_import_paths_to_deprecated():
    from bowler import LN, TOKEN, Capture, Filename, Query
    from fissix.pytree import Leaf
    from fissix.fixer_util import KeywordArg, Name, Comma

    def remove_tags_modifier(node: LN, capture: Capture,
                             filename: Filename) -> None:
        for node in capture['function_arguments'][0].post_order():
            if isinstance(
                    node,
                    Leaf) and node.value == "tags" and node.type == TOKEN.NAME:
                if node.parent.next_sibling and node.parent.next_sibling.value == ",":
                    node.parent.next_sibling.remove()
                node.parent.remove()

    def pure_airflow_models_filter(node: LN, capture: Capture,
                                   filename: Filename) -> bool:
        """Check if select is exactly [airflow, . , models]"""
        return len([ch for ch in node.children[1].leaves()]) == 3

    def remove_super_init_call(node: LN, capture: Capture,
                               filename: Filename) -> None:
        for ch in node.post_order():
            if isinstance(ch, Leaf) and ch.value == "super":
                if any(c.value for c in ch.parent.post_order()
                       if isinstance(c, Leaf)):
                    ch.parent.remove()

    def add_provide_context_to_python_operator(node: LN, capture: Capture,
                                               filename: Filename) -> None:
        fn_args = capture['function_arguments'][0]
        fn_args.append_child(Comma())

        provide_context_arg = KeywordArg(Name('provide_context'), Name('True'))
        provide_context_arg.prefix = fn_args.children[0].prefix
        fn_args.append_child(provide_context_arg)

    def remove_class(qry, class_name) -> None:
        def _remover(node: LN, capture: Capture, filename: Filename) -> None:
            if node.type == 300:
                for ch in node.post_order():
                    if isinstance(ch, Leaf) and ch.value == class_name:
                        if ch.next_sibling and ch.next_sibling.value == ",":
                            ch.next_sibling.remove()
                        ch.remove()
            elif node.type == 311:
                node.parent.remove()
            else:
                node.remove()

        qry.select_class(class_name).modify(_remover)

    changes = [
        ("airflow.operators.bash", "airflow.operators.bash_operator"),
        ("airflow.operators.python", "airflow.operators.python_operator"),
        ("airflow.utils.session", "airflow.utils.db"),
    ]

    qry = Query()
    for new, old in changes:
        qry.select_module(new).rename(old)

    # Move and refactor imports for Dataflow
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "python_virtualenv.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "python_virtualenv.py"))
    (qry.select_module("airflow.utils.python_virtualenv").rename(
        "airflow.providers.google.cloud.utils.python_virtualenv"))
    copyfile(
        os.path.join(dirname(__file__), os.pardir, "airflow", "utils",
                     "process_utils.py"),
        os.path.join(dirname(__file__), "airflow", "providers", "google",
                     "cloud", "utils", "process_utils.py"))
    (qry.select_module("airflow.utils.process_utils").rename(
        "airflow.providers.google.cloud.utils.process_utils"))

    # Remove tags
    qry.select_method("DAG").is_call().modify(remove_tags_modifier)

    # Fix KubernetesPodOperator imports to use old path
    qry.select_module(
        "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename(
            "airflow.contrib.operators.kubernetes_pod_operator")

    # Fix BaseOperatorLinks imports
    files = r"bigquery\.py|mlengine\.py"  # noqa
    qry.select_module("airflow.models").is_filename(include=files).filter(
        pure_airflow_models_filter).rename("airflow.models.baseoperator")

    # Fix super().__init__() call in hooks
    qry.select_subclass("BaseHook").modify(remove_super_init_call)

    (qry.select_function("PythonOperator").is_call().is_filename(
        include=r"mlengine_operator_utils.py$").modify(
            add_provide_context_to_python_operator))

    remove_class(qry, "GKEStartPodOperator")

    qry.execute(write=True, silent=False, interactive=False)