Example #1
0
def test_driver(tmp_path_factory):
    # Act: Process the input data by executing runsinglehap - time consuming activity

    current_dt = datetime.datetime.now()
    print(str(current_dt))

    # Read the "poller file" and download the input files, as necessary
    input_names = gather_data_for_processing(tmp_path_factory)

    # Construct the manifest filename for later
    manifest_filename = construct_manifest_filename(input_names)

    # Run the SVM processing
    path = os.path.join(os.path.dirname(__file__), POLLER_FILE)
    try:
        status = runsinglehap.perform(path, log_level="debug")

        output_files = gather_output_data(manifest_filename)

        # Check the output primary WCSNAME includes FIT_SVM_GAIA as part of the string value
        tdp_files = [
            files for files in output_files if files.lower().find("total") > -1
            and files.lower().endswith(".fits")
        ]

        for tdp in tdp_files:
            wcsname = fits.getval(tdp, "WCSNAME", ext=1).upper()
            print("\ntest_svm_wcs.  WCSNAME: {} Output file: {}".format(
                wcsname, tdp))
            assert WCS_SUB_NAME in wcsname, f"WCSNAME is not as expected for file {tdp}."

    # Catch anything that happens and report it.  This is meant to catch unexpected errors and
    # generate sufficient output exception information so algorithmic problems can be addressed.
    except Exception as except_details:
        print(except_details)
        pytest.fail("\nsvm_setup. Exception Visit: {}\n", path)

    current_dt = datetime.datetime.now()
    print(str(current_dt))
Example #2
0
def svm_setup(gather_data_for_processing):
    # Act: Process the input data by executing runsinglehap - time consuming activity

    current_dt = datetime.datetime.now()
    print(str(current_dt))
    print("\nsvm_setup fixture")

    # Read the "poller file" and download the input files, as necessary
    input_names = gather_data_for_processing

    # Run the SVM processing
    path = os.path.join(os.path.dirname(__file__), POLLER_FILE)
    try:
        status = runsinglehap.perform(path)

    # Catch anything that happens and report it.  This is meant to catch unexpected errors and
    # generate sufficient output exception information so algorithmic problems can be addressed.
    except Exception as except_details:
        print(except_details)
        pytest.fail("\nsvm_setup. Exception Visit: {}\n", path)

    current_dt = datetime.datetime.now()
    print(str(current_dt))
Example #3
0
def test_run_svmpoller(tmpdir, dataset):
    """ Tests to read a series of poller files and process the contents of each as Single Visit Mosaic

        Characteristics of these tests:

        Success Criteria:
            The SVM processing returns a value of 0: Success or 1: Failure

        The input master_list file is a list of poller filenames, one filename per line.
        Each poller file must be obtained from a specified directory and read to obtain the
        names of the data files which need to be processed.

        This test file can be executed in the following manner:
            $ pytest -n # -s --basetemp=/internal/hladata/yourUniqueDirectoryHere --bigdata --slow
              --master_list /internal/hladata/input/master_poller_list.txt test_run_svmpoller.py >&
              test_svmpoller_output.txt &
            $ tail -f test_svmpoller_output.txt
          * The `-n #` option can be used to run tests in parallel if `pytest-xdist` has
            been installed where `#` is the number of cpus to use. THIS IS NOT ADVISED FOR USE.
          * Note: When running this test, the `--basetemp` directory should be set to a unique
            existing directory to avoid deleting previous test output.
          * A default master list exists in the tests/hla directory and contains 121 datasets.  This
            is probably NOT the list you want to use, but it allows you to see what this file should
            contain.

    """
    print("TEST_RUN_SVMPOLLER. Dataset: ", dataset)

    current_dt = datetime.datetime.now()
    print(str(current_dt))

    subdir = ""
    prevdir = os.getcwd()

    # create working directory specified for the test
    if not tmpdir.ensure(subdir, dir=True):
        curdir = tmpdir.mkdir(subdir).strpath
    else:
        curdir = tmpdir.join(subdir).strpath
    os.chdir(curdir)

    return_value = 1

    try:

        # The dataset variable is a fully qualified poller file (CSV) with no header
        # Copy the poller file to the current working directory
        shutil.copy2(dataset, ".")

        # Get the poller file path, as well as its simple name
        poller_path = os.path.dirname(dataset)
        poller_file = os.path.basename(dataset)
        table = Table.read(poller_file, format="ascii")

        # Column "col8" contains fully qualified constituent filenames for the single visit, and
        # the fully qualified path designated in the poller file is the shared cache.
        file_list = table["col8"].tolist()

        # Check if the files to be processed are in the same directory as poller
        # directory, otherwise they need to be copied from the on-line cache
        for full_filename in file_list:
            filename = os.path.basename(full_filename)
            log.info("Looking for file {}".format(filename))
            local_path = os.path.join(poller_path, filename)
            if os.path.exists(local_path):
                shutil.copy2(local_path, ".")
            else:
                shutil.copy2(full_filename, ".")

        log.info("Obtained all input files for dataset {}.".format(dataset))

        # Run SVM pipeline processing
        return_value = runsinglehap.perform(poller_file)

    # Catch anything that happens as this dataset will be considered a failure, but
    # the processing of datasets should continue.  This is meant to catch
    # unexpected errors and generate sufficient output exception
    # information so algorithmic problems can be addressed.
    except Exception as except_details:
        traceback.print_exc()
        pytest.fail("TEST_RUN_SVMPOLLER. Exception Dataset: {}\n", dataset)
        return_value = 1

    assert return_value == 0

    # Return to original directory
    os.chdir(prevdir)
Example #4
0
def test_run_svmpoller(tmpdir, dataset):
    """ Tests to read a series of poller files and process the contents of each as Single Visit Mosaic

        Characteristics of these tests:

        Success Criteria:
            The SVM processing returns a value of 0: Success or 1: Failure

        The input svm_list file is a list of poller filenames, one filename per line.
        Each poller file must be obtained from a specified directory and read to obtain the
        names of the data files which need to be processed.

        This test file can be executed in the following manner:
            $ pytest -n # -s --basetemp=/internal/hladata/yourUniqueDirectoryHere --bigdata --slow
              --svm_list svm_input.lst test_run_svmpoller.py >& test_svmpoller_output.txt &
            $ tail -f test_svmpoller_output.txt
          * The `-n #` option can be used to run tests in parallel if `pytest-xdist` has
            been installed where `#` is the number of cpus to use. THIS IS NOT ADVISED FOR USE.
          * Note: When running this test, the `--basetemp` directory should be set to a unique
            existing directory to avoid deleting previous test output.
          * A default master list, svm_input.lst, exists in the tests/hla directory and contains 3 datasets.
            This specific list may NOT the list you want to use, but it allows you to see what this file
            should contain.  Please note the PyTests should be kept to runtimes which are not
            excessive.

    """
    print("TEST_RUN_SVMPOLLER. Dataset: ", dataset)

    current_dt = datetime.datetime.now()
    print(str(current_dt))

    subdir = ""
    prevdir = os.getcwd()

    # create working directory specified for the test
    if not tmpdir.ensure(subdir, dir=True):
        curdir = tmpdir.mkdir(subdir).strpath
    else:
        curdir = tmpdir.join(subdir).strpath
    os.chdir(curdir)

    return_value = 1

    try:

        # Read the CSV poller file residing in the tests directory to extract the individual visit FLT/FLC filenames
        path = os.path.join(os.path.dirname(__file__), dataset)
        table = ascii.read(path, format="no_header")
        filename_column = table.colnames[0]
        filenames = list(table[filename_column])
        print("\nread_csv_for_filenames. Filesnames from poller: {}".format(
            filenames))

        # Establish FLC/FLT lists and obtain the requested data
        flc_flag = ""
        flt_flag = ""
        # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which
        # may be part of an ASN, use only IPPPSS with a wildcard.  The unwanted images have to be removed
        # after-the-fact.
        for fn in filenames:
            if fn.lower().endswith("flc.fits") and flc_flag == "":
                flc_flag = fn[0:6] + "*"
            elif fn.lower().endswith("flt.fits") and flt_flag == "":
                flt_flag = fn[0:6] + "*"

            # If both flags have been set, then break out the loop early.  It may be
            # that all files have to be checked which means the for loop continues
            # until its natural completion.
            if flc_flag and flt_flag:
                break

        # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files
        # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to
        # avoid downloading too many images which are not needed for processing.
        flcfiles = []
        fltfiles = []
        if flc_flag:
            flcfiles = aqutils.retrieve_observation(flc_flag,
                                                    suffix=["FLC"],
                                                    product_type="pipeline")
        if flt_flag:
            fltfiles = aqutils.retrieve_observation(flt_flag,
                                                    suffix=["FLT"],
                                                    product_type="pipeline")

        flcfiles.extend(fltfiles)

        # Keep only the files which exist in BOTH lists for processing
        files_to_process = set(filenames).intersection(set(flcfiles))

        # Identify unwanted files from the download list and remove from disk
        files_to_remove = set(filenames).symmetric_difference(set(flcfiles))

        try:
            for ftr in files_to_remove:
                os.remove(ftr)
        except Exception as x_cept:
            print("")
            print("Exception encountered: {}.".format(x_cept))
            print("The file {} could not be deleted from disk. ".format(ftr))
            print(
                "Remove files which are not used for processing from disk manually."
            )

        # Run the SVM processing
        path = os.path.join(os.path.dirname(__file__), dataset)

        return_value = runsinglehap.perform(path)

    # Catch anything that happens and report it.  This is meant to catch unexpected errors and
    # generate sufficient output exception information so algorithmic problems can be addressed.
    except Exception as except_details:
        traceback.print_exc()
        pytest.fail("TEST_RUN_SVMPOLLER. Exception Dataset: {}\n", dataset)
        return_value = 1

    assert return_value == 0

    current_dt = datetime.datetime.now()
    print(str(current_dt))

    # Return to original directory
    os.chdir(prevdir)