def _patch_imports(self, resolver, output_path: Path) -> int: # select modules to patch imports query = Query() query.paths = [] for package in resolver.graph.metainfo.package.packages: for module_path in package: query.paths.append(str(module_path)) # patch vendors if it's outside of main package package_path = resolver.graph.metainfo.package.packages[0].path if package_path.resolve() not in output_path.resolve().parents: query.paths.append(str(output_path)) # set renamings root = Path(self.config['project']) for library in output_path.iterdir(): if library.name in self.config['vendor']['exclude']: continue library_module = '.'.join(library.resolve().relative_to( str(root)).parts) self.logger.debug('patch imports', extra=dict( old_name=library.name, new_name=library_module, )) query = transform_imports( query=query, old_name=library.name, new_name=library_module, ) # execute renaming query.execute(interactive=False, write=True, silent=True) return len(query.paths)
def run_bowler_modifier( self, input_text, selector=None, modifier=None, selector_func=None, modifier_func=None, in_process=True, ): """Returns the modified text.""" if not (selector or selector_func): raise ValueError("Pass selector") if not (modifier or modifier_func): raise ValueError("Pass modifier") exception_queue = multiprocessing.Queue() def local_modifier(node, capture, filename): # When in_process=False, this runs in another process. See notes below. try: return modifier(node, capture, filename) except Exception as e: exception_queue.put(e) with tempfile.NamedTemporaryFile(suffix=".py") as f: # TODO: I'm almost certain this will not work on Windows, since # NamedTemporaryFile has it already open for writing. Consider # using mktemp directly? with open(f.name, "w") as fw: fw.write(input_text + "\n") if selector_func: query = selector_func([f.name]) else: query = Query([f.name]).select(selector) if modifier_func: # N.b. exceptions may not work query = modifier_func(query) else: query = query.modify(local_modifier) # We require the in_process parameter in order to record coverage properly, # but it also helps in bubbling exceptions and letting tests read state set # by modifiers. query.execute(interactive=False, write=True, silent=False, in_process=in_process) # In the case of in_process=False (mirroring normal use of the tool) we use # the queue to ship back exceptions from local_process, which can actually # fail the test. Normally exceptions in modifiers are not printed unless # you pass --debug. if not exception_queue.empty(): raise AssertionError from exception_queue.get() with open(f.name, "r") as fr: return fr.read().rstrip()
def change_import_paths_to_deprecated(): from bowler import LN, TOKEN, Capture, Filename, Query from fissix.pytree import Leaf def remove_tags_modifier(node: LN, capture: Capture, filename: Filename) -> None: for node in capture['function_arguments'][0].post_order(): if isinstance( node, Leaf) and node.value == "tags" and node.type == TOKEN.NAME: if node.parent.next_sibling and node.parent.next_sibling.value == ",": node.parent.next_sibling.remove() node.parent.remove() def pure_airflow_models_filter(node: LN, capture: Capture, filename: Filename) -> bool: """Check if select is exactly [airflow, . , models]""" return len([ch for ch in node.children[1].leaves()]) == 3 changes = [ ("airflow.operators.bash", "airflow.operators.bash_operator"), ("airflow.operators.python", "airflow.operators.python_operator"), ("airflow.utils.session", "airflow.utils.db"), ] qry = Query() for new, old in changes: qry.select_module(new).rename(old) # Move and refactor imports for Dataflow copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"), os.path.join(dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "python_virtualenv.py")) (qry.select_module("airflow.utils.python_virtualenv").rename( "airflow.providers.google.cloud.utils.python_virtualenv")) copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"), os.path.join(dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "process_utils.py")) (qry.select_module("airflow.utils.process_utils").rename( "airflow.providers.google.cloud.utils.process_utils")) # Remove tags qry.select_method("DAG").is_call().modify(remove_tags_modifier) # Fix KubernetesPodOperator imports to use old path qry.select_module( "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename( "airflow.contrib.operators.kubernetes_pod_operator") # Fix BaseOperatorLinks imports files = r"bigquery\.py|mlengine\.py" # noqa qry.select_module("airflow.models").is_filename(include=files).filter( pure_airflow_models_filter).rename("airflow.models.baseoperator") qry.execute(write=True, silent=False, interactive=False)
def change_import_paths_to_deprecated(): changes = [ ("airflow.operators.bash", "airflow.operators.bash_operator"), ("airflow.operators.python", "airflow.operators.python_operator"), ("airflow.utils.session", "airflow.utils.db"), ] qry = Query() for new, old in changes: qry.select_module(new).rename(old) # Move and refactor imports for Dataflow copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"), os.path.join(dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "python_virtualenv.py" ) ) ( qry .select_module("airflow.utils.python_virtualenv") .rename("airflow.providers.google.cloud.utils.python_virtualenv") ) copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"), os.path.join(dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "process_utils.py" ) ) ( qry .select_module("airflow.utils.process_utils") .rename("airflow.providers.google.cloud.utils.process_utils") ) # Remove tags qry.select_method("DAG").is_call().modify(remove_tags_modifier) # Fix KubernetesPodOperator imports to use old path qry.select_module( "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename( "airflow.contrib.operators.kubernetes_pod_operator" ) # Fix BaseOperatorLinks imports files = r"bigquery\.py|mlengine\.py" # noqa qry.select_module("airflow.models").is_filename(include=files).filter(pure_airflow_models_filter).rename( "airflow.models.baseoperator") qry.execute(write=True, silent=False, interactive=False)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--log-level", dest="log_level", type=str, choices=["DEBUG", "INFO", "WARNING", "ERROR"], help="set log level, default is INFO") parser.add_argument("--no-log-file", dest="no_log_file", action='store_true', default=False, help="don't log to file") parser.add_argument("--log-filepath", dest="log_filepath", type=str, help='set log file path, default is "report.log"') parser.add_argument("--inpath", required=True, type=str, help='the file or directory path you want to upgrade.') parser.add_argument("--backup", type=str, nargs='?', default=None, const=None, help='backup directory, default is the "~/.paddle1to2/".') parser.add_argument("--write", action='store_true', default=False, help='modify files in-place.') parser.add_argument("--no-confirm", dest="no_confirm", action='store_true', default=False, help='write files in-place without confirm, ignored without --write.') parser.add_argument("--refactor", action='append', choices=refactor.__all__, help='this is a debug option. Specify refactor you want to run. If none, all refactors will be run.') parser.add_argument("--print-match", action='store_true', default=False, help='this is a debug option. Print matched code and node for each file.') args = parser.parse_args() if args.refactor: args.refactor = set(args.refactor) if args.backup is None: home = os.path.expanduser('~') args.backup = os.path.join(home, '.paddle1to2') else: args.backup = os.path.expanduser(args.backup) if args.log_level: logger.setLevel(args.log_level) if not args.no_log_file: log_to_file(args.log_filepath) if not should_convert(args.inpath): logger.error("convert abort!") sys.exit(1) # refactor code via "Query" step by step. q = Query(args.inpath) for fn in refactor.__all__: refactor_func = getattr(refactor, fn) if args.refactor and fn not in args.refactor: continue assert callable(refactor_func), "{} is not callable.".format(fn) logger.debug("run refactor: {}".format(fn)) if args.print_match: refactor_func(q, change_spec).filter(filters.print_match) else: refactor_func(q, change_spec) if args.write: # backup args.inpath backup = backup_inpath(args.inpath, args.backup) # print diff to stdout, and modify file in place. if utils.is_windows(): q.execute(write=True, silent=False, need_confirm=not args.no_confirm, backup=backup, in_process=True) else: q.execute(write=True, silent=False, need_confirm=not args.no_confirm, backup=backup) else: # print diff to stdout if utils.is_windows(): q.execute(write=False, silent=False, in_process=True) else: q.execute(write=False, silent=False) click.secho('Refactor finished without touching source files, add "--write" to modify source files in-place if everything is ok.', fg="red", bold=True)
class RefactorBackportPackages: """ Refactors the code of providers, so that it works in 1.10. """ def __init__(self): self.qry = Query() def remove_class(self, class_name) -> None: """ Removes class altogether. Example diff generated: .. code-block:: diff --- ./airflow/providers/google/cloud/operators/kubernetes_engine.py +++ ./airflow/providers/google/cloud/operators/kubernetes_engine.py @@ -179,86 +179,3 @@ - -class GKEStartPodOperator(KubernetesPodOperator): - - ... :param class_name: name to remove """ def _remover(node: LN, capture: Capture, filename: Filename) -> None: node.remove() self.qry.select_class(class_name).modify(_remover) def rename_deprecated_modules(self) -> None: """ Renames back to deprecated modules imported. Example diff generated: .. code-block:: diff --- ./airflow/providers/dingding/operators/dingding.py +++ ./airflow/providers/dingding/operators/dingding.py @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -from airflow.operators.bash import BaseOperator +from airflow.operators.bash_operator import BaseOperator from airflow.providers.dingding.hooks.dingding import DingdingHook from airflow.utils.decorators import apply_defaults """ changes = [ ("airflow.operators.bash", "airflow.operators.bash_operator"), ("airflow.operators.python", "airflow.operators.python_operator"), ("airflow.utils.session", "airflow.utils.db"), ("airflow.providers.cncf.kubernetes.operators.kubernetes_pod", "airflow.contrib.operators.kubernetes_pod_operator"), ] for new, old in changes: self.qry.select_module(new).rename(old) def add_provide_context_to_python_operators(self) -> None: """ Adds provide context to usages of Python/BranchPython Operators - mostly in example_dags. Note that those changes apply to example DAGs not to the operators/hooks erc. We package the example DAGs together with the provider classes and they should serve as examples independently on the version of Airflow it will be installed in. Provide_context feature in Python operators was feature added 2.0.0 and we are still using the "Core" operators from the Airflow version that the provider packages are installed in - the "Core" operators do not have (for now) their own provider package. The core operators are: * Python * BranchPython * Bash * Branch * Dummy * LatestOnly * ShortCircuit * PythonVirtualEnv Example diff generated: .. code-block:: diff --- ./airflow/providers/amazon/aws/example_dags/example_google_api_to_s3_transfer_advanced.py +++ ./airflow/providers/amazon/aws/example_dags/example_google_api_to_s3_transfer_advanced.py @@ -105,7 +105,8 @@ task_video_ids_to_s3.google_api_response_via_xcom, task_video_ids_to_s3.task_id ], - task_id='check_and_transform_video_ids' + task_id='check_and_transform_video_ids', + provide_context=True ) """ def add_provide_context_to_python_operator(node: LN, capture: Capture, filename: Filename) -> None: fn_args = capture['function_arguments'][0] if len(fn_args.children) > 0 and ( not isinstance(fn_args.children[-1], Leaf) or fn_args.children[-1].type != token.COMMA): fn_args.append_child(Comma()) provide_context_arg = KeywordArg(Name('provide_context'), Name('True')) provide_context_arg.prefix = fn_args.children[0].prefix fn_args.append_child(provide_context_arg) (self.qry.select_function("PythonOperator").is_call().modify( add_provide_context_to_python_operator)) (self.qry.select_function("BranchPythonOperator").is_call().modify( add_provide_context_to_python_operator)) def remove_super_init_call(self): r""" Removes super().__init__() call from Hooks. In airflow 1.10 almost none of the Hooks call super().init(). It was always broken in Airflow 1.10 - the BaseHook() has it's own __init__() which is wrongly implemented and requires source parameter to be passed:: .. code-block:: python def __init__(self, source): pass We fixed it in 2.0, but for the entire 1.10 line calling super().init() is not a good idea - and it basically does nothing even if you do. And it's bad because it does not initialize LoggingMixin (BaseHook derives from LoggingMixin). And it is the main reason why Hook logs are not working as they are supposed to sometimes: .. code-block:: python class LoggingMixin(object): \"\"\" Convenience super-class to have a logger configured with the class name \"\"\" def __init__(self, context=None): self._set_context(context) There are two Hooks in 1.10 that call super.__init__ : .. code-block:: python super(CloudSqlDatabaseHook, self).__init__(source=None) super(MongoHook, self).__init__(source='mongo') Not that it helps with anything because init in BaseHook does nothing. So we remove the super().init() in Hooks when backporting to 1.10. Example diff generated: .. code-block:: diff --- ./airflow/providers/apache/druid/hooks/druid.py +++ ./airflow/providers/apache/druid/hooks/druid.py @@ -49,7 +49,7 @@ timeout=1, max_ingestion_time=None): - super().__init__() + self.druid_ingest_conn_id = druid_ingest_conn_id self.timeout = timeout self.max_ingestion_time = max_ingestion_time """ def remove_super_init_call_modifier(node: LN, capture: Capture, filename: Filename) -> None: for ch in node.post_order(): if isinstance(ch, Leaf) and ch.value == "super": if any(c.value for c in ch.parent.post_order() if isinstance(c, Leaf)): ch.parent.remove() self.qry.select_subclass("BaseHook").modify( remove_super_init_call_modifier) def remove_tags(self): """ Removes tags from execution of the operators (in example_dags). Note that those changes apply to example DAGs not to the operators/hooks erc. We package the example DAGs together with the provider classes and they should serve as examples independently on the version of Airflow it will be installed in. The tags are feature added in 1.10.10 and occasionally we will want to run example DAGs as system tests in pre-1.10.10 version so we want to remove the tags here. Example diff generated: .. code-block:: diff -- ./airflow/providers/amazon/aws/example_dags/example_datasync_2.py +++ ./airflow/providers/amazon/aws/example_dags/example_datasync_2.py @@ -83,8 +83,7 @@ with models.DAG( "example_datasync_2", default_args=default_args, - schedule_interval=None, # Override to match your needs - tags=['example'], + schedule_interval=None, ) as dag: """ def remove_tags_modifier(_: LN, capture: Capture, filename: Filename) -> None: for node in capture['function_arguments'][0].post_order(): if isinstance( node, Leaf ) and node.value == "tags" and node.type == TOKEN.NAME: if node.parent.next_sibling and node.parent.next_sibling.value == ",": node.parent.next_sibling.remove() node.parent.remove() # Remove tags self.qry.select_method("DAG").is_call().modify(remove_tags_modifier) def remove_poke_mode_only_decorator(self): r""" Removes @poke_mode_only decorator. The decorator is only available in Airflow 2.0. Example diff generated: .. code-block:: diff --- ./airflow/providers/google/cloud/sensors/gcs.py +++ ./airflow/providers/google/cloud/sensors/gcs.py @@ -189,7 +189,6 @@ return datetime.now() -@poke_mode_only class GCSUploadSessionCompleteSensor(BaseSensorOperator): \"\"\" Checks for changes in the number of objects at prefix in Google Cloud Storage """ def find_and_remove_poke_mode_only_import(node: LN): for child in node.children: if isinstance( child, Leaf ) and child.type == 1 and child.value == 'poke_mode_only': import_node = child.parent # remove the import by default skip_import_remove = False if isinstance(child.prev_sibling, Leaf) and child.prev_sibling.value == ",": # remove coma before the whole import child.prev_sibling.remove() # do not remove if there are other imports skip_import_remove = True if isinstance(child.next_sibling, Leaf) and child.prev_sibling.value == ",": # but keep the one after and do not remove the whole import skip_import_remove = True # remove the import child.remove() if not skip_import_remove: # remove import of there were no sibling import_node.remove() else: find_and_remove_poke_mode_only_import(child) def find_root_remove_import(node: LN): current_node = node while current_node.parent: current_node = current_node.parent find_and_remove_poke_mode_only_import(current_node) def is_poke_mode_only_decorator(node: LN) -> bool: return node.children and len(node.children) >= 2 and \ isinstance(node.children[0], Leaf) and node.children[0].value == '@' and \ isinstance(node.children[1], Leaf) and node.children[1].value == 'poke_mode_only' def remove_poke_mode_only_modifier(node: LN, capture: Capture, filename: Filename) -> None: for child in capture['node'].parent.children: if is_poke_mode_only_decorator(child): find_root_remove_import(child) child.remove() self.qry.select_subclass("BaseSensorOperator").modify( remove_poke_mode_only_modifier) def refactor_amazon_package(self): """ Fixes to "amazon" providers package. Copies some of the classes used from core Airflow to "common.utils" package of the provider and renames imports to use them from there. We copy typing_compat.py and change import as in example diff: .. code-block:: diff --- ./airflow/providers/amazon/aws/operators/ecs.py +++ ./airflow/providers/amazon/aws/operators/ecs.py @@ -24,7 +24,7 @@ from airflow.models import BaseOperator from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook -from airflow.typing_compat import Protocol, runtime_checkable +from airflow.providers.amazon.common.utils.typing_compat import Protocol, runtime_checkable from airflow.utils.decorators import apply_defaults """ def amazon_package_filter(node: LN, capture: Capture, filename: Filename) -> bool: return filename.startswith("./airflow/providers/amazon/") os.makedirs(os.path.join(get_target_providers_package_folder("amazon"), "common", "utils"), exist_ok=True) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "__init__.py"), os.path.join(get_target_providers_package_folder("amazon"), "common", "__init__.py")) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "__init__.py"), os.path.join(get_target_providers_package_folder("amazon"), "common", "utils", "__init__.py")) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "typing_compat.py"), os.path.join(get_target_providers_package_folder("amazon"), "common", "utils", "typing_compat.py")) (self.qry.select_module("airflow.typing_compat").filter( callback=amazon_package_filter).rename( "airflow.providers.amazon.common.utils.typing_compat")) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "email.py"), os.path.join(get_target_providers_package_folder("amazon"), "common", "utils", "email.py")) (self.qry.select_module("airflow.utils.email").filter( callback=amazon_package_filter).rename( "airflow.providers.amazon.common.utils.email")) def refactor_google_package(self): r""" Fixes to "google" providers package. Copies some of the classes used from core Airflow to "common.utils" package of the the provider and renames imports to use them from there. Note that in this case we also rename the imports in the copied files. For example we copy python_virtualenv.py, process_utils.py and change import as in example diff: .. code-block:: diff --- ./airflow/providers/google/cloud/operators/kubernetes_engine.py +++ ./airflow/providers/google/cloud/operators/kubernetes_engine.py @@ -28,11 +28,11 @@ from airflow.exceptions import AirflowException from airflow.models import BaseOperator -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator +from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator from airflow.providers.google.cloud.hooks.kubernetes_engine import GKEHook from airflow.providers.google.common.hooks.base_google import GoogleBaseHook from airflow.utils.decorators import apply_defaults -from airflow.utils.process_utils import execute_in_subprocess, patch_environ +from airflow.providers.google.common.utils.process_utils import execute_in_subprocess And in the copied python_virtualenv.py we also change import to process_utils.py. This happens automatically and is solved by Pybowler. .. code-block:: diff --- ./airflow/providers/google/common/utils/python_virtualenv.py +++ ./airflow/providers/google/common/utils/python_virtualenv.py @@ -21,7 +21,7 @@ \"\"\" from typing import List, Optional -from airflow.utils.process_utils import execute_in_subprocess +from airflow.providers.google.common.utils.process_utils import execute_in_subprocess def _generate_virtualenv_cmd(tmp_dir: str, python_bin: str, system_site_packages: bool) We also rename Base operator links to deprecated names: .. code-block:: diff --- ./airflow/providers/google/cloud/operators/mlengine.py +++ ./airflow/providers/google/cloud/operators/mlengine.py @@ -24,7 +24,7 @@ from typing import List, Optional from airflow.exceptions import AirflowException -from airflow.models import BaseOperator, BaseOperatorLink +from airflow.models.baseoperator import BaseOperator, BaseOperatorLink from airflow.models.taskinstance import TaskInstance from airflow.providers.google.cloud.hooks.mlengine import MLEngineHook from airflow.utils.decorators import apply_defaults We remove GKEStartPodOperator (example in remove_class method) We also copy (google.common.utils) and rename imports to the helpers. .. code-block:: diff --- ./airflow/providers/google/cloud/example_dags/example_datacatalog.py +++ ./airflow/providers/google/cloud/example_dags/example_datacatalog.py @@ -37,7 +37,7 @@ CloudDataCatalogUpdateTagTemplateOperator, ) from airflow.utils.dates import days_ago -from airflow.utils.helpers import chain +from airflow.providers.google.common.utils.helpers import chain default_args = {"start_date": days_ago(1)} And also module_loading which is used by helpers .. code-block:: diff --- ./airflow/providers/google/common/utils/helpers.py +++ ./airflow/providers/google/common/utils/helpers.py @@ -26,7 +26,7 @@ from jinja2 import Template from airflow.exceptions import AirflowException -from airflow.utils.module_loading import import_string +from airflow.providers.google.common.utils.module_loading import import_string KEY_REGEX = re.compile(r'^[\\w.-]+$') """ def google_package_filter(node: LN, capture: Capture, filename: Filename) -> bool: return filename.startswith("./airflow/providers/google/") def pure_airflow_models_filter(node: LN, capture: Capture, filename: Filename) -> bool: """Check if select is exactly [airflow, . , models]""" return len(list(node.children[1].leaves())) == 3 os.makedirs(os.path.join(get_target_providers_package_folder("google"), "common", "utils"), exist_ok=True) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "__init__.py"), os.path.join(get_target_providers_package_folder("google"), "common", "utils", "__init__.py")) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "python_virtualenv.py"), os.path.join(get_target_providers_package_folder("google"), "common", "utils", "python_virtualenv.py")) copy_helper_py_file( os.path.join(get_target_providers_package_folder("google"), "common", "utils", "helpers.py")) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "module_loading.py"), os.path.join(get_target_providers_package_folder("google"), "common", "utils", "module_loading.py")) (self.qry.select_module("airflow.utils.python_virtualenv").filter( callback=google_package_filter).rename( "airflow.providers.google.common.utils.python_virtualenv")) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "process_utils.py"), os.path.join(get_target_providers_package_folder("google"), "common", "utils", "process_utils.py")) (self.qry.select_module("airflow.utils.process_utils").filter( callback=google_package_filter).rename( "airflow.providers.google.common.utils.process_utils")) (self.qry.select_module("airflow.utils.helpers").filter( callback=google_package_filter).rename( "airflow.providers.google.common.utils.helpers")) (self.qry.select_module("airflow.utils.module_loading").filter( callback=google_package_filter).rename( "airflow.providers.google.common.utils.module_loading")) ( # Fix BaseOperatorLinks imports self.qry.select_module("airflow.models").is_filename( include=r"bigquery\.py|mlengine\.py" ).filter(callback=google_package_filter ).filter(pure_airflow_models_filter).rename( "airflow.models.baseoperator")) self.remove_class("GKEStartPodOperator") (self.qry.select_class("GKEStartPodOperator").filter( callback=google_package_filter).is_filename( include=r"example_kubernetes_engine\.py").rename( "GKEPodOperator")) def refactor_odbc_package(self): """ Fixes to "odbc" providers package. Copies some of the classes used from core Airflow to "common.utils" package of the the provider and renames imports to use them from there. We copy helpers.py and change import as in example diff: .. code-block:: diff --- ./airflow/providers/google/cloud/example_dags/example_datacatalog.py +++ ./airflow/providers/google/cloud/example_dags/example_datacatalog.py @@ -37,7 +37,7 @@ CloudDataCatalogUpdateTagTemplateOperator, ) from airflow.utils.dates import days_ago -from airflow.utils.helpers import chain +from airflow.providers.odbc.utils.helpers import chain default_args = {"start_date": days_ago(1)} """ def odbc_package_filter(node: LN, capture: Capture, filename: Filename) -> bool: return filename.startswith("./airflow/providers/odbc/") os.makedirs(os.path.join(get_target_providers_folder(), "odbc", "utils"), exist_ok=True) copyfile( os.path.join(get_source_airflow_folder(), "airflow", "utils", "__init__.py"), os.path.join(get_target_providers_package_folder("odbc"), "utils", "__init__.py")) copy_helper_py_file( os.path.join(get_target_providers_package_folder("odbc"), "utils", "helpers.py")) (self.qry.select_module("airflow.utils.helpers").filter( callback=odbc_package_filter).rename( "airflow.providers.odbc.utils.helpers")) def do_refactor(self, in_process: bool = False) -> None: # noqa self.rename_deprecated_modules() self.refactor_amazon_package() self.refactor_google_package() self.refactor_odbc_package() self.remove_tags() self.remove_super_init_call() self.add_provide_context_to_python_operators() self.remove_poke_mode_only_decorator() # In order to debug Bowler - set in_process to True self.qry.execute(write=True, silent=False, interactive=False, in_process=in_process)
def change_import_paths_to_deprecated(): from bowler import LN, TOKEN, Capture, Filename, Query from fissix.pytree import Leaf from fissix.fixer_util import KeywordArg, Name, Comma def remove_tags_modifier(node: LN, capture: Capture, filename: Filename) -> None: for node in capture['function_arguments'][0].post_order(): if isinstance(node, Leaf) and node.value == "tags" and node.type == TOKEN.NAME: if node.parent.next_sibling and node.parent.next_sibling.value == ",": node.parent.next_sibling.remove() node.parent.remove() def pure_airflow_models_filter(node: LN, capture: Capture, filename: Filename) -> bool: """Check if select is exactly [airflow, . , models]""" return len([ch for ch in node.children[1].leaves()]) == 3 def remove_super_init_call(node: LN, capture: Capture, filename: Filename) -> None: for ch in node.post_order(): if isinstance(ch, Leaf) and ch.value == "super": if any(c.value for c in ch.parent.post_order() if isinstance(c, Leaf)): ch.parent.remove() def add_provide_context_to_python_operator(node: LN, capture: Capture, filename: Filename) -> None: fn_args = capture['function_arguments'][0] fn_args.append_child(Comma()) provide_context_arg = KeywordArg(Name('provide_context'), Name('True')) provide_context_arg.prefix = fn_args.children[0].prefix fn_args.append_child(provide_context_arg) def remove_class(qry, class_name) -> None: def _remover(node: LN, capture: Capture, filename: Filename) -> None: if node.type not in (300, 311): # remove only definition node.remove() qry.select_class(class_name).modify(_remover) changes = [ ("airflow.operators.bash", "airflow.operators.bash_operator"), ("airflow.operators.python", "airflow.operators.python_operator"), ("airflow.utils.session", "airflow.utils.db"), ( "airflow.providers.cncf.kubernetes.operators.kubernetes_pod", "airflow.contrib.operators.kubernetes_pod_operator" ), ] qry = Query() for new, old in changes: qry.select_module(new).rename(old) # Move and refactor imports for Dataflow copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"), os.path.join( dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "python_virtualenv.py" ) ) ( qry .select_module("airflow.utils.python_virtualenv") .rename("airflow.providers.google.cloud.utils.python_virtualenv") ) copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"), os.path.join( dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "process_utils.py" ) ) ( qry .select_module("airflow.utils.process_utils") .rename("airflow.providers.google.cloud.utils.process_utils") ) # Remove tags qry.select_method("DAG").is_call().modify(remove_tags_modifier) # Fix AWS import in Google Cloud Transfer Service ( qry .select_module("airflow.providers.amazon.aws.hooks.base_aws") .is_filename(include=r"cloud_storage_transfer_service\.py") .rename("airflow.contrib.hooks.aws_hook") ) ( qry .select_class("AwsBaseHook") .is_filename(include=r"cloud_storage_transfer_service\.py") .filter(lambda n, c, f: n.type == 300) .rename("AwsHook") ) # Fix BaseOperatorLinks imports files = r"bigquery\.py|mlengine\.py" # noqa qry.select_module("airflow.models").is_filename(include=files).filter(pure_airflow_models_filter).rename( "airflow.models.baseoperator") # Fix super().__init__() call in hooks qry.select_subclass("BaseHook").modify(remove_super_init_call) ( qry.select_function("PythonOperator") .is_call() .is_filename(include=r"mlengine_operator_utils.py$") .modify(add_provide_context_to_python_operator) ) ( qry.select_function("BranchPythonOperator") .is_call() .is_filename(include=r"example_google_api_to_s3_transfer_advanced.py$") .modify(add_provide_context_to_python_operator) ) # Remove new class and rename usages of old remove_class(qry, "GKEStartPodOperator") ( qry .select_class("GKEStartPodOperator") .is_filename(include=r"example_kubernetes_engine\.py") .rename("GKEPodOperator") ) qry.execute(write=True, silent=False, interactive=False) # Add old import to GKE gke_path = os.path.join( dirname(__file__), "airflow", "providers", "google", "cloud", "operators", "kubernetes_engine.py" ) with open(gke_path, "a") as f: f.writelines(["", "from airflow.contrib.operators.gcp_container_operator import GKEPodOperator"]) gke_path = os.path.join( dirname(__file__), "airflow", "providers", "google", "cloud", "operators", "kubernetes_engine.py" )
def change_import_paths_to_deprecated(): from bowler import LN, TOKEN, Capture, Filename, Query from fissix.pytree import Leaf from fissix.fixer_util import KeywordArg, Name, Comma def remove_tags_modifier(node: LN, capture: Capture, filename: Filename) -> None: for node in capture['function_arguments'][0].post_order(): if isinstance( node, Leaf) and node.value == "tags" and node.type == TOKEN.NAME: if node.parent.next_sibling and node.parent.next_sibling.value == ",": node.parent.next_sibling.remove() node.parent.remove() def pure_airflow_models_filter(node: LN, capture: Capture, filename: Filename) -> bool: """Check if select is exactly [airflow, . , models]""" return len([ch for ch in node.children[1].leaves()]) == 3 def remove_super_init_call(node: LN, capture: Capture, filename: Filename) -> None: for ch in node.post_order(): if isinstance(ch, Leaf) and ch.value == "super": if any(c.value for c in ch.parent.post_order() if isinstance(c, Leaf)): ch.parent.remove() def add_provide_context_to_python_operator(node: LN, capture: Capture, filename: Filename) -> None: fn_args = capture['function_arguments'][0] fn_args.append_child(Comma()) provide_context_arg = KeywordArg(Name('provide_context'), Name('True')) provide_context_arg.prefix = fn_args.children[0].prefix fn_args.append_child(provide_context_arg) def remove_class(qry, class_name) -> None: def _remover(node: LN, capture: Capture, filename: Filename) -> None: if node.type == 300: for ch in node.post_order(): if isinstance(ch, Leaf) and ch.value == class_name: if ch.next_sibling and ch.next_sibling.value == ",": ch.next_sibling.remove() ch.remove() elif node.type == 311: node.parent.remove() else: node.remove() qry.select_class(class_name).modify(_remover) changes = [ ("airflow.operators.bash", "airflow.operators.bash_operator"), ("airflow.operators.python", "airflow.operators.python_operator"), ("airflow.utils.session", "airflow.utils.db"), ] qry = Query() for new, old in changes: qry.select_module(new).rename(old) # Move and refactor imports for Dataflow copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "python_virtualenv.py"), os.path.join(dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "python_virtualenv.py")) (qry.select_module("airflow.utils.python_virtualenv").rename( "airflow.providers.google.cloud.utils.python_virtualenv")) copyfile( os.path.join(dirname(__file__), os.pardir, "airflow", "utils", "process_utils.py"), os.path.join(dirname(__file__), "airflow", "providers", "google", "cloud", "utils", "process_utils.py")) (qry.select_module("airflow.utils.process_utils").rename( "airflow.providers.google.cloud.utils.process_utils")) # Remove tags qry.select_method("DAG").is_call().modify(remove_tags_modifier) # Fix KubernetesPodOperator imports to use old path qry.select_module( "airflow.providers.cncf.kubernetes.operators.kubernetes_pod").rename( "airflow.contrib.operators.kubernetes_pod_operator") # Fix BaseOperatorLinks imports files = r"bigquery\.py|mlengine\.py" # noqa qry.select_module("airflow.models").is_filename(include=files).filter( pure_airflow_models_filter).rename("airflow.models.baseoperator") # Fix super().__init__() call in hooks qry.select_subclass("BaseHook").modify(remove_super_init_call) (qry.select_function("PythonOperator").is_call().is_filename( include=r"mlengine_operator_utils.py$").modify( add_provide_context_to_python_operator)) remove_class(qry, "GKEStartPodOperator") qry.execute(write=True, silent=False, interactive=False)
def main(): parser = argparse.ArgumentParser( description= "Converts GDAL's test assertions to be pytest-style where possible.") parser.add_argument( "--no-input", dest="interactive", default=True, action="store_false", help="Non-interactive mode", ) parser.add_argument( "--no-write", dest="write", default=True, action="store_false", help= "Don't write the changes to the source file, just output a diff to stdout", ) parser.add_argument( "--debug", default=False, action="store_true", help="Spit out debugging information", ) parser.add_argument( "--silent", default=False, action="store_true", help="Don't spit out a diff, just write changes to files", ) parser.add_argument("--step", default=False, action="store", type=int, help="Which step to run") parser.add_argument("files", nargs="+", help="The python source file(s) to operate on.") args = parser.parse_args() # No way to pass this to .modify() callables, so we just set it at module level flags["debug"] = args.debug query = Query(*args.files) steps = { # Rename all tests `test_*` 0: lambda q: q.select(""" expr_stmt< "gdaltest_list" "=" atom< "[" testnames=( listmaker | NAME ) "]" > > """).modify(rename_tests), # `if x() != 'success'` --> `x()` (the 'success' return value gets removed further down) 1: lambda q: q.select(""" if_stmt< "if" comparison< x=any "!=" ( "'success'" | '"success"' ) > ":" suite< any any [ simple_stmt< power< ( "gdaltest" trailer< "." "post_reason" > | "post_reason" ) trailer< "(" reason=( "'failure'" | "'fail'" | "'failed'" | '"fail"' | '"failed"' | '"failure"' ) ")" > > any > ] simple_stmt< return_stmt< "return" returntype=any > any > dedent=any > > """).modify(callback=remove_success_expectations), # Remove useless `post_reason('fail')` calls 2: lambda q: q.select(""" simple_stmt< power< ( "gdaltest" trailer< "." "post_reason" > | "post_reason" ) trailer< "(" reason=STRING ")" > > any > """).modify(callback=remove_useless_post_reason_calls), # Turn basic if/post_reason clauses into assertions 3: lambda q: q.select(f""" if_stmt< "if" condition=any ":" suite< any any reason_candidates=( simple_stmt< ( power< ( "gdaltest" trailer< "." "post_reason" > | "post_reason" ) trailer > | power< "print" trailer > ) any > )* simple_stmt< return_call=return_stmt< "return" returntype=STRING > any > dedent=any > > """.format( print= '''print=simple_stmt< power< "print" trailer< "(" reason=any ")" > > any >''' )).modify(callback=gdaltest_fail_reason_to_assert), # Replace further post_reason calls and skip/fail returns 4: lambda q: ( q.select(""" any< any* post_reason_call=simple_stmt< power< ( "gdaltest" trailer< "." "post_reason" > | "post_reason" ) trailer< "(" reason=any ")" > > any > any* return_stmt=simple_stmt< return_call=return_stmt< "return" returntype=STRING > any > any* > """).modify(callback=gdaltest_other_skipfails) # (still part of step 4) # same as above, but get the reason from `print(reason)` # if we didn't find a post_reason clause. # (and, now, the reason is optional) .select(""" any< any* [ post_reason_call=simple_stmt< power< ( "print" ) trailer< "(" reason=any ")" > > any > any* ] return_stmt=simple_stmt< return_call=return_stmt< "return" returntype=STRING > any > any* > """).modify(callback=gdaltest_other_skipfails)), # Remove all `return 'success'`, or convert ternary ones to asserts. 5: lambda q: q.select(""" simple_stmt< return_call=return_stmt< "return" ( test< true_result=STRING "if" comparison=any "else" false_result=STRING > | returnvalue=STRING ) > any > """).modify(callback=remove_return_success), # Remove gdaltest_list from each test module 6: lambda q: q.select(""" simple_stmt< expr_stmt< "gdaltest_list" "=" atom< "[" [ testnames=( listmaker | NAME ) ] "]" > > any > """).modify(remove_test_lists), # Remove the __main__ block from each test module 7: lambda q: q.select(""" if_stmt< "if" comparison< "__name__" "==" "'__main__'" > any* > """).modify(remove_main_block), # Find pytest.fail() inside `try` blocks # where the 'except' bit is just "pass", # and turn them into `with pytest.raises(...)` blocks 8: lambda q: q.select(""" try_stmt< "try" ":" try_suite=suite< any any any* fail_stmt=simple_stmt< power< "pytest" trailer< "." "fail" > trailer< "(" reason=any* ")" > > any > any > ("except" | except_clause< "except" exc_class=NAME any* > ) ":" suite< any any simple_stmt< "pass" any > dedent=any > > """).modify(make_pytest_raises_blocks), } if args.step is not None: query = steps[args.step](query) else: for i in sorted(steps.keys()): query = steps[i](query) query.execute( # interactive diff implies write (for the bits the user says 'y' to) interactive=(args.interactive and args.write), write=args.write, silent=args.silent, )