Ejemplo n.º 1
0
def main(ipppssoot, input_uri_prefix, output_uri_prefix):
    """Generates previews based on input and output directories
    according to specified args
    """
    output_path = messages.get_local_outpath(output_uri_prefix, ipppssoot)
    msg = messages.Messages(output_uri_prefix, output_path, ipppssoot)
    msg.preview_message()  # processing
    logger = log.CaldpLogger(enable_console=False, log_file="preview.txt")
    input_dir = file_ops.get_input_path(input_uri_prefix, ipppssoot)
    # append process.txt to trailer file
    # file_ops.append_trailer(input_dir, output_path, ipppssoot)
    input_paths = get_inputs(ipppssoot, input_dir)
    instr = process.get_instrument(ipppssoot)
    preview_inputs = get_preview_inputs(instr, input_paths)
    # create previews
    previews = create_previews(input_dir, preview_inputs)
    # upload/copy previews
    log.info("Saving previews...")
    if output_uri_prefix.startswith("s3"):
        preview_output = process.get_output_path("file:outputs",
                                                 ipppssoot) + "/previews"
        os.makedirs(preview_output, exist_ok=True)
        copy_previews(previews, preview_output)
        log.info("Preparing files for s3 upload...")
        file_ops.tar_outputs(ipppssoot, input_uri_prefix, output_uri_prefix)
    elif output_uri_prefix.startswith("file"):
        preview_output = process.get_output_path(output_uri_prefix,
                                                 ipppssoot) + "/previews"
        os.makedirs(preview_output, exist_ok=True)
        copy_previews(previews, preview_output)
    else:
        return
    del logger
Ejemplo n.º 2
0
def check_failed_job_tarball(ipppssoot, input_uri, output_uri):
    """In the case of a processing error, tar the input files and upload to s3 for debugging.
    test case: iacs01t4q, astroquery:, file:outputs
    Note: if caldp fails during processing, the .fits and .tra files are never copied over to /outputs folder but the (partially) processed input files are available in /inputs.
    """
    if ipppssoot == "j8f54obeq" and input_uri.startswith("astroquery"):
        working_dir = os.getcwd()
        fail_outputs = dict(FAIL_OUTPUTS)
        expected = {}
        for (name, size) in parse_results(fail_outputs[ipppssoot]):
            expected[name] = size
        # manually search and delete output files so it's forced to use the inputs
        output_dir = file_ops.get_output_dir(output_uri)
        os.chdir(output_dir)
        output_files = file_ops.find_output_files(ipppssoot)
        # assert len(output_files) == 6
        if len(output_files) > 0:
            print("Removing outputs for failed job test:")
            for f in output_files:
                print(f)
                os.remove(f)
            empty_outputs = file_ops.find_output_files(ipppssoot)
            print("Files remaining in outputs dir: ", len(empty_outputs))
            assert len(empty_outputs) == 0
        os.chdir(working_dir)
        tar, file_list = file_ops.tar_outputs(ipppssoot, input_uri, output_uri)
        assert len(file_list) == 7
        assert os.path.exists(os.path.join("inputs", tar))
        actual = list_inputs(ipppssoot, input_uri)
        log_path = os.path.join("outputs", ipppssoot, "logs")
        assert os.path.exists(log_path)
        actual.update(list_logs(log_path))
        check_outputs(output_uri, expected, actual)
Ejemplo n.º 3
0
def check_tarball_out(ipppssoot, input_uri, output_uri):
    """Create a tarfile from outputs - only runs for a single dataset (test_io)
    Workaround to improve test coverage when s3 bucket access is unavailable.
    """
    if output_uri.startswith("file"):
        tar, file_list = file_ops.tar_outputs(ipppssoot, output_uri)
        assert len(file_list) > 0
        tarpath = os.path.join("outputs", tar)
        assert os.path.exists(tarpath)
        all_files = list_files(os.path.dirname(tarpath), ipppssoot)
        actual_tarfiles = {}
        for name, size in all_files.items():
            if name.endswith(".tar.gz"):
                actual_tarfiles[name] = size
        return actual_tarfiles
Ejemplo n.º 4
0
def check_tarball_out(ipppssoot, input_uri, output_uri):
    """Create a tarfile from outputs - only runs for a single dataset (test_io)
    Workaround to improve test coverage when s3 bucket access is unavailable.
    """
    if output_uri.startswith("file"):
        """this call to tar_outputs will actually test file_ops.clean_up
        so there's technically no need to do it further below
        the problem with doing it here is we need a lot of logic
        to find "all" of the files to cleanup, and that logic
        in and of itself is what really needs to be tested...
        meaning it should be caldp, not in the test
        """
        tar, file_list = file_ops.tar_outputs(ipppssoot, input_uri, output_uri)
        assert len(file_list) > 0
        tarpath = os.path.join("outputs", tar)
        assert os.path.exists(tarpath)
        all_files = list_files(os.path.dirname(tarpath), ipppssoot)
        actual_tarfiles = {}
        for name, size in all_files.items():
            if name.endswith(".tar.gz"):
                actual_tarfiles[name] = size
        return actual_tarfiles