def main(ipppssoot, input_uri_prefix, output_uri_prefix): """Generates previews based on input and output directories according to specified args """ output_path = messages.get_local_outpath(output_uri_prefix, ipppssoot) msg = messages.Messages(output_uri_prefix, output_path, ipppssoot) msg.preview_message() # processing logger = log.CaldpLogger(enable_console=False, log_file="preview.txt") input_dir = file_ops.get_input_path(input_uri_prefix, ipppssoot) # append process.txt to trailer file # file_ops.append_trailer(input_dir, output_path, ipppssoot) input_paths = get_inputs(ipppssoot, input_dir) instr = process.get_instrument(ipppssoot) preview_inputs = get_preview_inputs(instr, input_paths) # create previews previews = create_previews(input_dir, preview_inputs) # upload/copy previews log.info("Saving previews...") if output_uri_prefix.startswith("s3"): preview_output = process.get_output_path("file:outputs", ipppssoot) + "/previews" os.makedirs(preview_output, exist_ok=True) copy_previews(previews, preview_output) log.info("Preparing files for s3 upload...") file_ops.tar_outputs(ipppssoot, input_uri_prefix, output_uri_prefix) elif output_uri_prefix.startswith("file"): preview_output = process.get_output_path(output_uri_prefix, ipppssoot) + "/previews" os.makedirs(preview_output, exist_ok=True) copy_previews(previews, preview_output) else: return del logger
def check_failed_job_tarball(ipppssoot, input_uri, output_uri): """In the case of a processing error, tar the input files and upload to s3 for debugging. test case: iacs01t4q, astroquery:, file:outputs Note: if caldp fails during processing, the .fits and .tra files are never copied over to /outputs folder but the (partially) processed input files are available in /inputs. """ if ipppssoot == "j8f54obeq" and input_uri.startswith("astroquery"): working_dir = os.getcwd() fail_outputs = dict(FAIL_OUTPUTS) expected = {} for (name, size) in parse_results(fail_outputs[ipppssoot]): expected[name] = size # manually search and delete output files so it's forced to use the inputs output_dir = file_ops.get_output_dir(output_uri) os.chdir(output_dir) output_files = file_ops.find_output_files(ipppssoot) # assert len(output_files) == 6 if len(output_files) > 0: print("Removing outputs for failed job test:") for f in output_files: print(f) os.remove(f) empty_outputs = file_ops.find_output_files(ipppssoot) print("Files remaining in outputs dir: ", len(empty_outputs)) assert len(empty_outputs) == 0 os.chdir(working_dir) tar, file_list = file_ops.tar_outputs(ipppssoot, input_uri, output_uri) assert len(file_list) == 7 assert os.path.exists(os.path.join("inputs", tar)) actual = list_inputs(ipppssoot, input_uri) log_path = os.path.join("outputs", ipppssoot, "logs") assert os.path.exists(log_path) actual.update(list_logs(log_path)) check_outputs(output_uri, expected, actual)
def check_tarball_out(ipppssoot, input_uri, output_uri): """Create a tarfile from outputs - only runs for a single dataset (test_io) Workaround to improve test coverage when s3 bucket access is unavailable. """ if output_uri.startswith("file"): tar, file_list = file_ops.tar_outputs(ipppssoot, output_uri) assert len(file_list) > 0 tarpath = os.path.join("outputs", tar) assert os.path.exists(tarpath) all_files = list_files(os.path.dirname(tarpath), ipppssoot) actual_tarfiles = {} for name, size in all_files.items(): if name.endswith(".tar.gz"): actual_tarfiles[name] = size return actual_tarfiles
def check_tarball_out(ipppssoot, input_uri, output_uri): """Create a tarfile from outputs - only runs for a single dataset (test_io) Workaround to improve test coverage when s3 bucket access is unavailable. """ if output_uri.startswith("file"): """this call to tar_outputs will actually test file_ops.clean_up so there's technically no need to do it further below the problem with doing it here is we need a lot of logic to find "all" of the files to cleanup, and that logic in and of itself is what really needs to be tested... meaning it should be caldp, not in the test """ tar, file_list = file_ops.tar_outputs(ipppssoot, input_uri, output_uri) assert len(file_list) > 0 tarpath = os.path.join("outputs", tar) assert os.path.exists(tarpath) all_files = list_files(os.path.dirname(tarpath), ipppssoot) actual_tarfiles = {} for name, size in all_files.items(): if name.endswith(".tar.gz"): actual_tarfiles[name] = size return actual_tarfiles