Ejemplo n.º 1
0
    def test_execute(self, mock_hook):
        operator = BigQueryDeleteDatasetOperator(task_id=TASK_ID,
                                                 dataset_id=TEST_DATASET,
                                                 project_id=TEST_PROJECT_ID)

        operator.execute(None)
        mock_hook.return_value \
            .get_conn() \
            .cursor() \
            .delete_dataset \
            .assert_called_once_with(
                dataset_id=TEST_DATASET,
                project_id=TEST_PROJECT_ID
            )
    def test_execute(self, mock_hook):
        operator = BigQueryDeleteDatasetOperator(
            task_id=TASK_ID,
            dataset_id=TEST_DATASET,
            project_id=TEST_GCP_PROJECT_ID
        )

        operator.execute(None)
        mock_hook.return_value \
            .get_conn() \
            .cursor() \
            .delete_dataset \
            .assert_called_once_with(
                dataset_id=TEST_DATASET,
                project_id=TEST_GCP_PROJECT_ID
            )
Ejemplo n.º 3
0
    patch_dataset = BigQueryPatchDatasetOperator(
        task_id="patch-dataset",
        dataset_id=DATASET_NAME,
        dataset_resource={
            "friendlyName": "Patched Dataset",
            "description": "Patched dataset"
        },
    )

    update_dataset = BigQueryUpdateDatasetOperator(
        task_id="update-dataset",
        dataset_id=DATASET_NAME,
        dataset_resource={"description": "Updated dataset"})

    delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete-dataset",
                                                   dataset_id=DATASET_NAME,
                                                   delete_contents=True)

    delete_dataset_with_location = BigQueryDeleteDatasetOperator(
        task_id="delete_dataset_with_location",
        dataset_id=LOCATION_DATASET_NAME,
        delete_contents=True)

    create_dataset >> execute_query_save >> delete_dataset
    create_dataset >> create_table >> delete_dataset
    create_dataset >> get_dataset >> delete_dataset
    create_dataset >> patch_dataset >> update_dataset >> delete_dataset
    execute_query_save >> get_data >> get_dataset_result
    get_data >> delete_dataset
    create_dataset >> create_external_table >> execute_query_external_table >> \
        copy_from_selected_data >> delete_dataset
Ejemplo n.º 4
0
    )

    patch_dataset = BigQueryPatchDatasetOperator(
        task_id="patch-dataset",
        dataset_id=DATASET_NAME,
        dataset_resource={
            "friendlyName": "Patchet Dataset",
            "description": "Patched dataset"
        },
    )

    update_dataset = BigQueryUpdateDatasetOperator(
        task_id="update-dataset",
        dataset_id=DATASET_NAME,
        dataset_resource={"description": "Updated dataset"})

    delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete-dataset",
                                                   dataset_id=DATASET_NAME,
                                                   delete_contents=True)

    create_dataset >> execute_query_save >> delete_dataset
    create_dataset >> create_table >> delete_dataset
    create_dataset >> get_dataset >> delete_dataset
    create_dataset >> patch_dataset >> update_dataset >> delete_dataset
    execute_query_save >> get_data >> get_dataset_result
    get_data >> delete_dataset
    create_dataset >> create_external_table >> execute_query_external_table >> \
        copy_from_selected_data >> delete_dataset
    execute_query_external_table >> bigquery_to_gcs >> delete_dataset
    create_table >> delete_table >> delete_dataset
Ejemplo n.º 5
0
# path to temporary GCS decompressed files (t3 >> t4 )and path to GCS aggregated query result (t8 >> t9)
deletePaths = [aggQuery_tempFolder_gcs , tempGCS_dir_temp]

'''
CLean up tasks : 
	-- deleting GCS temporary stored data (from Aggregation Queries to BigTable staging and decompressed API data from local filesystem;not the Archived API data!!)
	-- local filesystem data from API download and decompression
    -- deleting BigQuery dataset hosting aggregation query results and initial decompressed API data upload from GCS 

t10_UploadToBigTable >> [t11_cleanBQ, t11_cleanGCS, t11_cleanLocalFS]
'''

BigQueryCLeanUp_task = BigQueryDeleteDatasetOperator(task_id='DeleteHostingBQDataset_{}'.format(datetime.today().strftime("%Y%m%d_%H_%M")),
	dataset_id=dataset_id,
	project_id=project_id,
	bigquery_conn_id=bigquery_conn_id,
	dag=dag)



GCSCleanUp_task = PythonOperator(task_id="deleteGCStempData_{}".format(datetime.today().strftime("%Y%m%d_%H_%M")),
	python_callable=deleleGCSdata,
	op_args=deletePaths ,
	dag=dag)
 
localFSCLeanUp_task = BashOperator(task_id='deleteLocalFS_{}'.format(datetime.today().strftime("%Y%m%d_%H_%M")),
	bash_command="rm -r {} && rm -r {}".format(apiDownloadPath, decompressedPath),
	dag=dag)