def publish() -> str: with TemporaryDirectory() as workdir: workdir = Path(workdir) tables_folder = workdir / "tables" public_folder = workdir / "public" tables_folder.mkdir(parents=True, exist_ok=True) public_folder.mkdir(parents=True, exist_ok=True) # Download all the combined tables into our local storage download_folder(GCS_BUCKET_TEST, "tables", tables_folder) # Prepare all files for publishing and add them to the public folder copy_tables(tables_folder, public_folder) print("Output tables copied to public folder") # Create the joint main table for all records main_table_path = public_folder / "main.csv" make_main_table(tables_folder, main_table_path) print("Main table created") # Create subsets for easy API-like access to slices of data list(create_table_subsets(main_table_path, public_folder)) print("Table subsets created") # Upload the results to the prod bucket upload_folder(GCS_BUCKET_PROD, "v2", public_folder) return "OK"
def publish_subset_tables() -> Response: with temporary_directory() as workdir: input_folder = workdir / "input" output_folder = workdir / "output" input_folder.mkdir(parents=True, exist_ok=True) output_folder.mkdir(parents=True, exist_ok=True) # Download the main table only download_folder(GCS_BUCKET_PROD, "v2", input_folder, lambda x: str(x) == "main.csv") # Create subsets for easy API-like access to slices of data main_table_path = input_folder / "main.csv" list(create_table_subsets(main_table_path, output_folder)) logger.log_info("Table subsets created") # Upload the results to the prod bucket upload_folder(GCS_BUCKET_PROD, "v2", output_folder) return Response("OK", status=200)