def test_pipeline_tree_creation(parsed_ordered_dict, sample_metadata, sample_image_metadata): pipeline_json = _read_pipeline_resource(PIPELINE_FILE) ordered_dict = parsed_ordered_dict assert len(ordered_dict.keys()) == len(pipeline_json['pipelines'][0]['nodes']) # Verify tree structure is correct assert not ordered_dict['cded6818-e601-4fd8-b6b9-c9fdf1fd1fca'].get('parent_operations') assert ordered_dict['bb9606ca-29ec-4133-a36a-67bd2a1f6dc3'].get( 'parent_operations').pop() == 'cded6818-e601-4fd8-b6b9-c9fdf1fd1fca' assert ordered_dict['6f5c2ece-1977-48a1-847f-099b327c6ed1'].get( 'parent_operations').pop() == 'cded6818-e601-4fd8-b6b9-c9fdf1fd1fca' assert ordered_dict['4ef63a48-a27c-4d1e-a0ee-2fbbdbe3be74'].get( 'parent_operations').pop() == 'cded6818-e601-4fd8-b6b9-c9fdf1fd1fca' assert ordered_dict['4f7ae91b-682e-476c-8664-58412336b31f'].get( 'parent_operations').pop() == 'bb9606ca-29ec-4133-a36a-67bd2a1f6dc3' assert ordered_dict['f82c4699-b392-4a3e-92b0-45d9e11126fe'].get( 'parent_operations').pop() == 'bb9606ca-29ec-4133-a36a-67bd2a1f6dc3' assert ordered_dict['137d3d2f-4224-42d9-b8c6-cbee9ff2872d'].get( 'parent_operations') == ['4ef63a48-a27c-4d1e-a0ee-2fbbdbe3be74', '0a7eff92-fe2a-411c-92a6-73d6f3810516'] assert not ordered_dict['779c2630-64bf-47ca-8a98-9ac8a60e85f7'].get('parent_operations') assert ordered_dict['0a7eff92-fe2a-411c-92a6-73d6f3810516'].get( 'parent_operations').pop() == '779c2630-64bf-47ca-8a98-9ac8a60e85f7' assert ordered_dict['92a7a247-1131-489c-8c3e-1e2389d4c673'].get( 'parent_operations') == ['f82c4699-b392-4a3e-92b0-45d9e11126fe', "137d3d2f-4224-42d9-b8c6-cbee9ff2872d", '6f5c2ece-1977-48a1-847f-099b327c6ed1'] for key in ordered_dict.keys(): for node in pipeline_json['pipelines'][0]['nodes']: if node['id'] == key: assert ordered_dict[key]['runtime_image'] == node['app_data']['runtime_image'] for image in sample_image_metadata: if ordered_dict[key]['runtime_image'] == image.metadata['image_name']: assert ordered_dict[key]['image_pull_policy'] == image.metadata['pull_policy'] assert ordered_dict[key]['filename'] == node['app_data']['filename'] for env in node['app_data']['env_vars']: var, value = env.split("=") assert ordered_dict[key]['pipeline_envs'][var] == value assert ordered_dict[key]['cos_endpoint'] == sample_metadata['cos_endpoint'] assert ordered_dict[key]['cos_bucket'] == sample_metadata['cos_bucket'] assert ordered_dict[key]['pipeline_envs']['AWS_ACCESS_KEY_ID'] == sample_metadata['cos_username'] assert ordered_dict[key]['pipeline_envs']['AWS_SECRET_ACCESS_KEY'] == sample_metadata['cos_password'] for arg in ["inputs", "outputs"]: if node['app_data'].get(arg): for file in node['app_data'][arg]: assert file in ordered_dict[key]["pipeline_" + arg]
def parsed_pipeline(): pipeline_resource = _read_pipeline_resource(PIPELINE_FILE) return PipelineParser().parse(pipeline_definitions=pipeline_resource)
def test_create_file(monkeypatch, processor, parsed_pipeline, parsed_ordered_dict, sample_metadata): pipeline_json = _read_pipeline_resource(PIPELINE_FILE) export_pipeline_name = "some-name" export_file_type = "py" mocked_runtime = Metadata(name="test-metadata", display_name="test", schema_name="airflow", metadata=sample_metadata) monkeypatch.setattr(processor, "_get_metadata_configuration", lambda name=None, namespace=None: mocked_runtime) monkeypatch.setattr(processor, "_upload_dependencies_to_object_store", lambda x, y, z: True) monkeypatch.setattr(processor, "_cc_pipeline", lambda x, y: parsed_ordered_dict) with tempfile.TemporaryDirectory() as temp_dir: export_pipeline_output_path = os.path.join( temp_dir, f'{export_pipeline_name}.py') response = processor.create_pipeline_file( parsed_pipeline, pipeline_export_format=export_file_type, pipeline_export_path=export_pipeline_output_path, pipeline_name=export_pipeline_name) assert export_pipeline_output_path == response assert os.path.isfile(export_pipeline_output_path) file_as_lines = open(response).read().splitlines() # Check DAG project name for i in range(len(file_as_lines)): if "args = {" == file_as_lines[i]: assert "project_id" == read_key_pair(file_as_lines[i + 1], sep=':')['key'] assert export_pipeline_name == read_key_pair(file_as_lines[i + 1], sep=':')['value'] # For every node in the original pipeline json for node in pipeline_json['pipelines'][0]['nodes']: for i in range(len(file_as_lines)): # Matches an op with a node ID if "notebook_op_" + node['id'].replace( "-", "_") + " = NotebookOp(" in file_as_lines[i]: sub_list_line_counter = 0 # Gets sub-list slice starting where the Notebook Op starts for line in file_as_lines[i + 1:]: if 'namespace=' in line: assert sample_metadata[ 'user_namespace'] == read_key_pair( line)['value'] elif 'cos_endpoint=' in line: assert sample_metadata[ 'cos_endpoint'] == read_key_pair(line)['value'] elif 'cos_bucket=' in line: assert sample_metadata[ 'cos_bucket'] == read_key_pair(line)['value'] elif 'name=' in line: assert node['app_data']['ui_data'][ 'label'] == read_key_pair(line)['value'] elif 'notebook=' in line: assert node['app_data'][ 'filename'] == read_key_pair(line)['value'] elif 'image=' in line: assert node['app_data'][ 'runtime_image'] == read_key_pair( line)['value'] elif 'env_vars=' in line: for env in node['app_data']['env_vars']: var, value = env.split("=") # Gets sub-list slice starting where the env vars starts for env_line in file_as_lines[ i + sub_list_line_counter + 2:]: if "AWS_ACCESS_KEY_ID" in env_line: assert sample_metadata[ 'cos_username'] == read_key_pair( env_line, sep=':')['value'] elif "AWS_SECRET_ACCESS_KEY" in env_line: assert sample_metadata[ 'cos_password'] == read_key_pair( env_line, sep=':')['value'] elif var in env_line: assert var == read_key_pair( env_line, sep=':')['key'] assert value == read_key_pair( env_line, sep=':')['value'] elif env_line.strip( ) == '},': # end of env vars break elif 'pipeline_inputs=' in line and node[ 'app_data'].get('inputs'): for input in node['app_data']['inputs']: assert input in string_to_list( read_key_pair(line)['value']) elif 'pipeline_outputs=' in line and node[ 'app_data'].get('outputs'): for output in node['app_data']['outputs']: assert output in string_to_list( read_key_pair(line)['value']) elif line == ')': # End of this Notebook Op break sub_list_line_counter += 1
def pipeline(): pipeline_resource = _read_pipeline_resource( 'resources/sample_pipelines/pipeline_3_node_sample.json') return PipelineParser.parse(pipeline_resource)