Example #1
0
def main(arguments = None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:] # ignore argv[0], the script name
    (options, args) = parser.parse_args(args = arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format = style.LOG_FORMAT, level = logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output directory
    if len(args) < 2:
        parser.print_help()
        return 2     # 2 is generally used for command line syntax errors
    else:
        input_paths = args[:-1]
        output_dir = args[-1]

    # No index can be within the search area if the radius is not > 0
    if options.radius <= 0:
        msg = "%sError: --radius must a positive number of degrees"
        print msg % style.prefix
        sys.exit(style.error_exit_message)

    # Make sure that the output directory exists; create it if it doesn't.
    methods.determine_output_dir(output_dir)

    print "%sUsing a local build of Astrometry.net." % style.prefix
    msg = "%sDoing astrometry on the %d paths given as input."
    print msg % (style.prefix, len(input_paths))

    pool = multiprocessing.Pool(options.ncores)
    map_async_args = ((path, output_dir, options) for path in input_paths)
    result = pool.map_async(parallel_astrometry, map_async_args)

    while not result.ready():
        time.sleep(1)
        methods.show_progress(queue.qsize() / len(input_paths) * 100)
        # Do not update the progress bar when debugging; instead, print it
        # on a new line each time. This prevents the next logging message,
        # if any, from being printed on the same line that the bar.
        if logging_level < logging.WARNING:
            print

    result.get() # reraise exceptions of the remote call, if any
    methods.show_progress(100) # in case the queue was ready too soon
    print

    # Results in the process shared queue were only necessary to accurately
    # update the progress bar. They are no longer needed, so empty it now.
    queue.clear()

    print "%sYou're done ^_^" % style.prefix
    return 0
Example #2
0
def main(arguments = None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:] # ignore argv[0], the script name
    (options, args) = parser.parse_args(args = arguments)

    # Print the help message and abort the execution if there are not two
    # positional arguments left after parsing the options, as the user must
    # specify the path to both the input and output directories.

    if len(args) < 2:
        parser.print_help()
        return 2  # 2 is generally used for command line syntax errors
    else:
        input_dirs = args[:-1]
        output_dir = args[-1]

    # Make sure that all the input directories exist, abort otherwise.
    for path in input_dirs:
        if not os.path.exists(path):
            print "%sThe input directory, '%s', does not exist. Exiting." % \
                  (style.prefix, path)
            return 1

    # The input and output directories must be different, as otherwise some
    # files (especially if the filename of the output files is automatically
    # detected) could be overwritten.
    for path in input_dirs:
        if os.path.abspath(path) == os.path.abspath(output_dir):
            print "%s[INPUT_DIRS] and OUTPUT_DIR must be different. " \
                  "Exiting." % style.prefix
            return 1

    # Make sure that the output directory exists, create it otherwise
    methods.determine_output_dir(output_dir)

    # Recursively walk down the input directories, obtaining a list of all the
    # regular files. Then, and while a progress bar is shown to let the user
    # estimate how much longer it is, detect which among them are FITS files.

    print "%sIndexing regular files within directory trees starting at " \
          "INPUT_DIRS..." % style.prefix ,
    files_paths = fitsimage.find_files(input_dirs,
                                       followlinks = options.followlinks,
                                       pattern = options.pattern)
    print 'done.'

    print "%sDetecting FITS images among the %d indexed regular files..." % \
          (style.prefix, len(files_paths))

    images_set = set()
    methods.show_progress(0.0)
    for path_index, path in enumerate(files_paths):
        try:
            images_set.add(fitsimage.FITSImage(path))
            fraction = (path_index + 1) / len(files_paths) * 100
            methods.show_progress(fraction)
        except fitsimage.NonStandardFITS:
            pass
    else:
        methods.show_progress(100)
        print

    if not len(images_set):
        print "%sNo FITS files were found. Exiting." % style.prefix
        return 1
    else:
        print "%s%d FITS files detected." % (style.prefix, len(images_set))

    # All the images must have the same size; otherwise, only those with the
    # most common dimensions will be imported, while the rest will be ignored
    print style.prefix
    print "%sChecking the sizes of the detected images..." % style.prefix,
    img_sizes = collections.defaultdict(int) # dimensions counter
    for img in images_set:
        img_sizes[img.size] += 1
    print 'done.'

    # The most common size is the only one element in case len(img_sizes) == 1
    x_size, y_size = max(img_sizes.iterkeys(), key = img_sizes.get)[:2]

    if len(img_sizes) == 1:
        print "%sAll the FITS images have the same size: %d x %d pixels" % \
              (style.prefix, x_size, y_size)
    else:

        print "%sMultiple sizes were detected among the FITS images." % style.prefix
        print "%sDiscarding images with a size other than %d x %d pixels, " \
              "the most common..." % (style.prefix, x_size, y_size) ,
        old_size = len(images_set)
        images_set = set(img for img in images_set if img.size == (x_size, y_size))
        print 'done.'

        if not images_set:
            print "%sThere are no FITS files left. Exiting." % style.prefix
            return 1
        else:
            print "%s%d FITS files were discarded because of their size, " \
                  "%s remain." % (style.prefix, old_size - len(images_set),
                                  len(images_set))

    # Those FITS images whose object names do not match any of the given
    # patterns, or which do not even have the keyword which contains the
    # name for the object observed, are discarded.
    print style.prefix
    print "%sImporting only those FITS files whose %s keyword can be found " \
          "and matches" % (style.prefix, options.objectk)
    print "%sone of the following Unix patterns: %s ..." % \
          (style.prefix, options.objectn)

    # We first test that the keyword exists (hence the pass for the KeyError
    # exception, which means that the image is filtered out) and, after that,
    # check whether its value matches one of the regular expressions which
    # define the object names to be imported.
    object_set = set()

    # Keep the track of how many images are ignored for each reason
    saturated_excluded = 0
    non_match_excluded = 0

    for img in images_set:

        try:
            object_name = img.read_keyword(options.objectk)
            for pattern in options.objectn:
                regexp = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
                if regexp.match(object_name):
                    # Even if the object name matchs, the median number of
                    # counts must still be below the threshold, if any. If the
                    # number of ADUs is irrelevant we can avoid having to
                    # unnecessarily compute it.
                    if options.max_counts:
                        with pyfits.open(img.path, readonly = True) as hdu:
                            median_counts = numpy.median(hdu[0].data)
                        if median_counts > options.max_counts:
                            print "%s%s excluded (matched, but saturated " \
                                  "with %d ADUs)" % (style.prefix, img.path,
                                                     median_counts)
                            saturated_excluded += 1
                            break

                    # This point reached if median number of ADUs of image is
                    # above the threshold or irrelevant, so it can be imported.
                    print "%s%s imported (%s matches '%s')" % (style.prefix,
                           img.path, object_name, pattern)

                    object_set.add(img)
                    break

            else: # only executed if for loop exited cleanly
                print "%s%s excluded (%s does not match anything)" % \
                      (style.prefix, img.path, object_name)
                non_match_excluded += 1
        except KeyError:
            pass

    if not saturated_excluded and not non_match_excluded:
        print "%sNo images were filtered out. Hooray!" % style.prefix
    if saturated_excluded:
        print "%s%d files were discarded because they were saturated " \
              "(> %d ADUs)." % (style.prefix, saturated_excluded,
                                options.max_counts)
    if non_match_excluded:
        print "%s%d files were discarded because of their non-matching " \
              "object names." % (style.prefix, non_match_excluded)

    # Abort the execution if all the FITS files were filtered out
    if not object_set:
        print "%sThere are no FITS files left. Exiting." % style.prefix
        return 1

    # Sort the FITS files by their date of observation, according to the header
    print style.prefix
    print "%sSorting the FITS files by their date of observation " \
          "[keyword: %s]..." % (style.prefix, options.datek) ,

    kwargs = dict(date_keyword = options.datek,
                  time_keyword = options.timek,
                  exp_keyword = options.exptimek)
    get_date = operator.methodcaller('date', **kwargs)
    sorted_imgs = sorted(object_set, key = get_date)

    # Let the user know if one or more images could not be sorted (because of
    # problems when parsing the FITS keywords from which the observation date
    # is derived) and thus discarded.
    difference = len(object_set) - len(sorted_imgs)
    assert difference >= 0
    if difference:
        print
        print "%s%d files were discarded as the observation date keyword " \
              "was not found or the " % (style.prefix, difference)
        print "%sdate in it represented did not conform to the FITS " \
              "standard." % style.prefix

        # Execution is aborted if all the FITS files were filtered out
        if not sorted_imgs:
            print "%sThere are no FITS files left. Exiting." % style.prefix
            return 1
    else:
        print 'done.'

    # If no filename for the output images was specified, attempt to
    # automatically detect the most common basename among the FITS files.
    # This is doing by extracting the leftmost non-numeric substring of
    # all the filenames and taking that which repeats the most.

    if not options.filename:
        print style.prefix
        print "%sDetecting the most common name among input files..." % \
              style.prefix ,
        sys.stdout.flush()

        # Use a dictionary in order to keep the track of how many times we
        # have come across each prefix (leftmost non-numeric substring in
        # the filename) and select that with most occurrences.

        prefixes = collections.defaultdict(int)
        for prefix in (img.prefix for img in sorted_imgs):
            prefixes[prefix] += 1

        # Select the prefix (key) that is repeated the most
        options.filename = max(prefixes, key = prefixes.get)
        print 'done.'

    print "%sImported FITS filenames will start with the string: '%s'" % \
          (style.prefix, options.filename)

    # Now we have to copy the FITS files. The basename of each imported file
    # will be options.filename + its sequence number. Filling zeros will be
    # affixed to each number so that the lenth of all the basenames is the
    # same. Following Dijkstra's teachings, we start numbering at zero.

    assert len(sorted_imgs)
    ndigits = len(str(len(sorted_imgs) - 1))
    print "%s%d digits are needed in order to enumerate %d files." % \
          (style.prefix, ndigits, len(sorted_imgs))

    print style.prefix
    print "%sCopying the FITS files to '%s'..." % \
          (style.prefix, output_dir)

    for index, fits_file in enumerate(sorted_imgs):

        # i.e., 'ferM_' + '0000' + '.fits' = 'ferM_0000.fits'
        dest_name = '%s%0*d.fits' % (options.filename, ndigits, index)
        dest_path = os.path.join(output_dir, dest_name)

        shutil.copy2(fits_file.path, dest_path)

        # The permission bits have been copied, but we need to make sure
        # that the copy of the FITS file is always writable, no matter what
        # the original permissions were. This is equivalent to `chmod u+w`
        methods.owner_writable(dest_path, True)

        dest_img = fitsimage.FITSImage(dest_path)

        # Add some information to the FITS header...
        if not options.exact:

            msg1 = "File imported by LEMON on %s" % methods.utctime()
            dest_img.add_history(msg1)

            # If the --uik option is given, store in this keyword the absolute
            # path to the image of which we made a copy. This allows other
            # LEMON commands, if necessary, to access the original FITS files
            # in case the imported images are modified (e.g., bias subtraction
            # or flat-fielding) before these other commands are executed.

            if options.uncimgk:

                comment = "before any calibration task"
                dest_img.update_keyword(options.uncimgk,
                                        os.path.abspath(dest_img.path),
                                        comment = comment)

                msg2 = "[Import] Original image: %s"
                dest_img.add_history(msg2 % os.path.abspath(fits_file.path))

        # ... unless we want an exact copy of the images. If that is the case,
        # verify that the SHA-1 checksum of the original and the copy matches
        elif fits_file.sha1sum != dest_img.sha1sum:
            msg = "copy of %s not identical (SHA-1 differs)" % fits_file.path
            raise IOError(msg)

        # Show which file has been copied, using the format of the
        # 'cp -v' command: `./ultra2/ferM_11.fits' -> `imported/img_01.fits'
        print  "%s`%s' -> `%s'" % (style.prefix, fits_file.path, dest_path)

    # Finally, let the user know how many FITS images, and the fraction of
    # the total, that were imported, as well as their size in megabytes.
    print style.prefix
    ifraction = len(sorted_imgs) / len(images_set) * 100
    print "%sFITS files detected: %d" % (style.prefix, len(images_set))
    print "%sFITS files successfully imported: %d (%.2f%%)" % \
          (style.prefix, len(sorted_imgs), ifraction)

    total_size = 0.0
    for fits_file in sorted_imgs:
        total_size += os.path.getsize(fits_file.path) # in bytes

    print "%sTotal size of imported files: %.2f MB" % \
          (style.prefix, total_size / (1024.0 ** 2))
    print "%sYou're done ^_^" % style.prefix
    return 0
Example #3
0
def main(arguments = None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:] # ignore argv[0], the script name
    (options, args) = parser.parse_args(args = arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format = style.LOG_FORMAT, level = logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output directory
    if len(args) < 2:
        parser.print_help()
        return 2     # 2 is generally used for command line syntax errors
    else:
        input_paths = args[:-1]
        output_dir = args[-1]

    # No index can be within the search area if the radius is not > 0
    if options.radius <= 0:
        msg = "%sError: --radius must a positive number of degrees"
        print msg % style.prefix
        sys.exit(style.error_exit_message)

    # Make sure that the output directory exists; create it if it doesn't.
    methods.determine_output_dir(output_dir)

    print "%sUsing a local build of Astrometry.net." % style.prefix
    msg = "%sDoing astrometry on the %d paths given as input."
    print msg % (style.prefix, len(input_paths))

    pool = multiprocessing.Pool(options.ncores)
    map_async_args = ((path, output_dir, options) for path in input_paths)
    result = pool.map_async(parallel_astrometry, map_async_args)

    while not result.ready():
        time.sleep(1)
        methods.show_progress(queue.qsize() / len(input_paths) * 100)
        # Do not update the progress bar when debugging; instead, print it
        # on a new line each time. This prevents the next logging message,
        # if any, from being printed on the same line that the bar.
        if logging_level < logging.WARNING:
            print

    result.get() # reraise exceptions of the remote call, if any
    methods.show_progress(100) # in case the queue was ready too soon
    print

    # Results in the process shared queue were only necessary to accurately
    # update the progress bar. They are no longer needed, so empty it now.
    queue.clear()

    print "%sYou're done ^_^" % style.prefix
    return 0
Example #4
0
def main(arguments = None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:] # ignore argv[0], the script name
    (options, args) = parser.parse_args(args = arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format = style.LOG_FORMAT, level = logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output directory
    if len(args) < 2:
        parser.print_help()
        return 2     # 2 is generally used for command line syntax errors
    else:
        input_paths = args[:-1]
        output_dir = args[-1]

    # Make sure that the output directory exists, and create it if it doesn't.
    # The subdirectories for discarded images are not yet created; we put this
    # off until we know that at least one image is indeed going to be excluded.
    methods.determine_output_dir(output_dir)
    fwhm_dir = os.path.join(output_dir, options.fwhm_dir)
    elong_dir = os.path.join(output_dir, options.elong_dir)

    print "%s%d paths given as input, on which sources will be detected." % \
          (style.prefix, len(input_paths))
    print "%sRunning SExtractor on all the FITS images..." % style.prefix

    # Use a pool of workers and run SExtractor on the images in parallel!
    pool = multiprocessing.Pool(options.ncores)
    map_async_args = ((path, options) for path in input_paths if os.path.isfile(path))
    result = pool.map_async(parallel_sextractor, map_async_args)

    methods.show_progress(0.0)
    while not result.ready():
        time.sleep(1)
        methods.show_progress(queue.qsize() / len(input_paths) * 100)
        # Do not update the progress bar when debugging; instead, print it
        # on a new line each time. This prevents the next logging message,
        # if any, from being printed on the same line that the bar.
        if logging_level < logging.WARNING:
            print

    result.get()      # reraise exceptions of the remote call, if any
    methods.show_progress(100) # in case the queue was ready too soon
    print

    # Three sets, to keep the track of all the images on which SExtractor
    # has been run and also of which have been discarded because of their
    # unnaceptable FWHM or elongation ratio.
    all_images = set()
    fwhm_discarded = set()
    elong_discarded = set()

    # Dictionary mapping each input image to the temporary output file: a copy
    # of the input image but whose FITS header has been updated with the path
    # to the SExtractor catalog and the MD5 hash of the configuration files.
    seeing_tmp_paths = dict()

    # Extract the four-element tuples (path to the image, FWHM, elongation and
    # number of sources detected by SExtractor) from the multiprocessing' queue
    # and store the values in three independent dictionaries; these provide
    # fast access, with O(1) lookup, to the data.
    fwhms  = {}
    elongs = {}
    nstars = {}

    for _ in xrange(queue.qsize()):
        path, output_tmp_path, fwhm, elong, stars = queue.get()
        all_images.add(path)
        seeing_tmp_paths[path] = output_tmp_path

        # The clean-up function cannot be registered in parallel_sextractor()
        # because it would remove the temporary FITS file when the process
        # terminates (instead of when our program exits, which is what we
        # need). Do it here, to make sure that whatever happens next these
        # temporary files are always deleted.
        atexit.register(methods.clean_tmp_files, output_tmp_path)

        fwhms[path]  = fwhm
        elongs[path] = elong
        nstars[path] = stars

    if not all_images:
        print "%sError. No FITS images were detected." % style.prefix
        print style.error_exit_message
        return 1

    # Let's first discard those images with a bad full width at half maximum.
    # In order to to this, we fit a normal distribution (assuming the FWHMs to
    # be Gaussian distributed) and define the maximum allowed value as that
    # which exceeds the specified number of standard deviations of the mean.

    print "%sFitting a Gaussian distribution to the FWHMs..." % style.prefix ,
    sys.stdout.flush()
    logging.debug("Fitting a Gaussian distribution to the %d FWHMs" % len(fwhms))
    mu, sigma = scipy.stats.norm.fit(fwhms.values())
    logging.debug("FWHMs mean = %.3f" % mu)
    logging.debug("FWHMs sigma = %.3f" % sigma)
    print 'done.'
    sys.stdout.flush()

    print "%sFWHMs mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu, sigma)
    maximum_fwhm = mu + (options.fwhm_sigma * sigma)
    logging.debug("Maximum allowed FWHM = %.3f + %.1f x %.3f = %.3f pixels" % \
                 (mu, options.fwhm_sigma, sigma, maximum_fwhm))
    print "%sDiscarding images with a FWHM > %.3f + %.1f x %.3f = %.3f pixels..." % \
          (style.prefix, mu, options.fwhm_sigma, sigma, maximum_fwhm)

    # Exclude images by adding them to the FWHM-discarded set
    for path, fwhm in sorted(fwhms.iteritems()):
        if fwhm > maximum_fwhm:
            fwhm_discarded.add(path)
            logging.debug("%s discarded (FWHM = %.3f > %.3f" % \
                         (path, fwhm, maximum_fwhm))
            print "%s%s discarded (FWHM = %.3f)" % (style.prefix, path, fwhm)

    logging.info("Images discarded by FWHM: %d" % len(fwhm_discarded))
    if not fwhm_discarded:
        print "%sNo images were discarded because of their FWHM. Hooray!" % style.prefix
    else:
        discarded_fraction = len(fwhm_discarded) / len(all_images) * 100
        nleft = len(all_images) - len(fwhm_discarded)  # non-discarded images
        print "%s%d FITS images (%.2f %%) discarded, %d remain" % \
              (style.prefix,  len(fwhm_discarded), discarded_fraction, nleft)


    # Repeat the same approach, now with the elongation ratios. Images already
    # discarded because of their FWHM are not even considered -- why discard
    # them twice? They can simply be ignored.

    print "%sFitting a Gaussian distribution to the elongations..." % style.prefix ,
    sys.stdout.flush()
    mu, sigma = scipy.stats.norm.fit(elongs.values())
    logging.debug("Elongations mean = %.3f" % mu)
    logging.debug("Elongations sigma = %.3f" % sigma)
    print 'done.'
    sys.stdout.flush()

    print "%sElongation mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu, sigma)
    maximum_elong = mu + (options.elong_sigma * sigma)
    logging.debug("Maximum allowed elongation = %.3f + %.1f x %.3f = %.3f pixels" % \
                 (mu, options.elong_sigma, sigma, maximum_elong))
    print "%sDiscarding images with an elongation > %.3f + %.1f x %.3f = %.3f ..." % \
          (style.prefix, mu, options.elong_sigma, sigma, maximum_elong)

    for path, elong in sorted(elongs.iteritems()):
        # Ignore FWHM-discarded images
        if path in fwhm_discarded:
            logging.debug("%s ignored (already discarded by FWHM)" % path)
            continue
        elif elong > maximum_elong:
            elong_discarded.add(path)
            logging.debug("%s discarded (elongation = %.3f > %.3f" % \
                         (path, fwhm, maximum_elong))
            print "%s%s discarded (elongation = %.3f)" % (style.prefix, path, elong)

    logging.info("Images discarded by elongation: %d" % len(elong_discarded))
    if not elong_discarded:
        print "%sNo images were discarded because of their elongation. Yay!" % style.prefix
    else:
        initial_size = len(all_images) - len(fwhm_discarded)
        discarded_fraction = len(elong_discarded) / initial_size * 100
        nleft = initial_size - len(elong_discarded)
        print "%s%d FITS images (%.2f %%) discarded, %d remain" % \
              (style.prefix,  len(elong_discarded), discarded_fraction, nleft)


    # Finally, take the images whose number of stars is at the 'stars_per'
    # percentile and select the one with the best FWHM. This will be our
    # 'best-seeing' image, in which sources may be detected. Taking directly
    # the image with the best FWHM may not work as we need the best-seeomg
    # image to also be one of the most populated.

    print "%sIdentifying the images whose number of detected sources it at " \
          "the %.2f percentile..." % (style.prefix, options.stars_per) ,
    sys.stdout.flush()
    # Ignore discarded images, for whatever reason
    logging.debug("Finding the %.2f percentile of the number of stars " \
                 "detected by SExtractor"  % options.stars_per)
    for path in fwhm_discarded.union(elong_discarded):
        del nstars[path]
        reason = 'FWHM' if path in fwhm_discarded else 'elongation'
        logging.debug("%s ignored (was discarded by %s)" % (path, reason))
    min_nstars = scipy.stats.scoreatpercentile(nstars.values(), options.stars_per)
    print 'done.'

    print "%sNumber of stars at percentile = %d, taking the images with at " \
          "least this number of sources..." % (style.prefix, min_nstars) ,
    sys.stdout.flush()
    most_populated_images = [path
                             for path, stars in nstars.iteritems()
                             if stars >= min_nstars]

    logging.debug("There are %s images with a number of stars at the %.2f " \
                 "percentile" % (len(most_populated_images), options.stars_per))
    logging.debug("Identifying the image with the lowest FWHM")
    print 'done.'

    print "%sFinally, finding the image with the lowest FWHM among these " \
          "%d images..." % (style.prefix, len(most_populated_images)),
    sys.stdout.flush()

    # Find the image with the best seeing (lowest FWHM)
    best_seeing = min(most_populated_images, key = lambda path: fwhms[path])
    logging.debug("Best-seeing image: %s" % path)
    logging.debug("Best-seeing image FWHM = %.3f" % fwhms[best_seeing])
    logging.debug("Best-seeing image elongation = %.3f" % elongs[best_seeing])
    logging.debug("Best-seeing image sources = %d" % nstars[best_seeing])
    assert best_seeing not in fwhm_discarded
    assert best_seeing not in elong_discarded
    print 'done.'

    print "%sBest-seeing image = %s, with %d sources and a FWHM of %.3f pixels" % \
          (style.prefix, best_seeing, nstars[best_seeing], fwhms[best_seeing])

    # The subdirectories are created only if at least one image is going to be
    # discarded. We do not want empty directories in case no image is discarded
    # because of its full-width at half maximum (FWHM) or elongation.

    if fwhm_discarded:
        methods.determine_output_dir(fwhm_dir, quiet = True)

    if elong_discarded:
        methods.determine_output_dir(elong_dir, quiet = True)

    # Finally, copy all the FITS images to the output directory
    processed = 0
    for path in sorted(all_images):
        # Add the suffix to the basename of the FITS image
        root, ext = os.path.splitext(os.path.basename(path))
        output_filename = root + options.suffix + ext
        logging.debug("Basename '%s' + '%s' becomes '%s'" % \
                     (path, options.suffix, output_filename))

        if path in fwhm_discarded:
            output_path = os.path.join(fwhm_dir, output_filename)
            logging.debug("%s was discarded because of its FWHM" % path)
            logging.debug("%s to be copied to subdirectory %s" % (path, fwhm_dir))
            history_msg1 = "Image discarded by LEMON on %s" % methods.utctime()
            history_msg2 = "[Discarded] FWHM = %.3f pixels, maximum allowed value = %.3f" % \
                           (fwhms[path], maximum_fwhm)

        elif path in elong_discarded:
            output_path = os.path.join(elong_dir, output_filename)
            logging.debug("%s was discarded because of its elongation ratio" % path)
            logging.debug("%s to be copied to subdirectory %s" % (path, elong_dir))
            history_msg1 = "Image discarded by LEMON on %s" % methods.utctime()
            history_msg2 = "[Discarded] Elongation = %.3f, maximum allowed value = %.3f" % \
                           (elongs[path], maximum_elong)

        elif path == best_seeing:

            # Retain original name if --filename is an empty string
            if not options.bseeingfn:
                filename = output_filename
            else:
                filename = options.bseeingfn

            output_path = os.path.join(output_dir, filename)
            logging.debug("%s is the best-seeing image" % path)
            logging.debug("%s to be copied to directory %s with name %s" % \
                         (path, output_dir, options.bseeingfn))
            history_msg1 = "Image identified by LEMON as the 'best-seeing' one"
            history_msg2 = "FWHM = %.3f | Elongation = %.3f | Sources: %d (at %.2f percentile)" % \
                           (fwhms[path], elongs[path], nstars[path], options.stars_per)

        else:
            output_path = os.path.join(output_dir, output_filename)
            logging.debug("%s to be copied to %s" % (path, output_dir))
            history_msg1 = "Image FWHM = %.3f" % fwhms[path]
            history_msg2 = "Image elongation = %.3f" % elongs[path]

        if os.path.exists(output_path) and not options.overwrite:
            msg = ("%sError. Output FITS file '%s' already exists. "
                   "You need to use --overwrite.")
            args = style.prefix, output_path
            print msg % args
            print style.error_exit_message
            return 1

        else:
            src = seeing_tmp_paths[path]
            shutil.move(src, output_path)

        methods.owner_writable(output_path, True) # chmod u+w
        logging.debug("%s copied to %s" % (path, output_path))
        output_img = fitsimage.FITSImage(output_path)
        output_img.add_history(history_msg1)
        output_img.add_history(history_msg2)
        logging.debug("%s: FITS header updated (HISTORY keywords)" % path)

        # Copy the FWHM to the FITS header, for future reference
        comment = "Margin = %d, SNR percentile = %.3f" % (options.margin, options.per)
        output_img.update_keyword(options.fwhmk, fwhms[path], comment = comment)
        logging.debug("%s: FITS header updated (%s keyword)" % (path, options.fwhmk))

        print "%sFITS image %s saved to %s" % (style.prefix, path, output_path)
        processed += 1

    print "%sA total of %d images was saved to directory '%s'." % (style.prefix, processed, output_dir)
    print "%sWe're done ^_^" % style.prefix
    return 0
Example #5
0
def main(arguments=None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format=style.LOG_FORMAT, level=logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output directory
    if len(args) < 2:
        parser.print_help()
        return 2  # 2 is generally used for command line syntax errors
    else:
        input_paths = args[:-1]
        output_dir = args[-1]

    # Make sure that the output directory exists, and create it if it doesn't.
    # The subdirectories for discarded images are not yet created; we put this
    # off until we know that at least one image is indeed going to be excluded.
    methods.determine_output_dir(output_dir)
    fwhm_dir = os.path.join(output_dir, options.fwhm_dir)
    elong_dir = os.path.join(output_dir, options.elong_dir)

    print "%s%d paths given as input, on which sources will be detected." % \
          (style.prefix, len(input_paths))
    print "%sRunning SExtractor on all the FITS images..." % style.prefix

    # Use a pool of workers and run SExtractor on the images in parallel!
    pool = multiprocessing.Pool(options.ncores)
    map_async_args = ((path, options) for path in input_paths
                      if os.path.isfile(path))
    result = pool.map_async(parallel_sextractor, map_async_args)

    methods.show_progress(0.0)
    while not result.ready():
        time.sleep(1)
        methods.show_progress(queue.qsize() / len(input_paths) * 100)
        # Do not update the progress bar when debugging; instead, print it
        # on a new line each time. This prevents the next logging message,
        # if any, from being printed on the same line that the bar.
        if logging_level < logging.WARNING:
            print

    result.get()  # reraise exceptions of the remote call, if any
    methods.show_progress(100)  # in case the queue was ready too soon
    print

    # Three sets, to keep the track of all the images on which SExtractor
    # has been run and also of which have been discarded because of their
    # unnaceptable FWHM or elongation ratio.
    all_images = set()
    fwhm_discarded = set()
    elong_discarded = set()

    # Dictionary mapping each input image to the temporary output file: a copy
    # of the input image but whose FITS header has been updated with the path
    # to the SExtractor catalog and the MD5 hash of the configuration files.
    seeing_tmp_paths = dict()

    # Extract the four-element tuples (path to the image, FWHM, elongation and
    # number of sources detected by SExtractor) from the multiprocessing' queue
    # and store the values in three independent dictionaries; these provide
    # fast access, with O(1) lookup, to the data.
    fwhms = {}
    elongs = {}
    nstars = {}

    for _ in xrange(queue.qsize()):
        path, output_tmp_path, fwhm, elong, stars = queue.get()
        all_images.add(path)
        seeing_tmp_paths[path] = output_tmp_path

        # The clean-up function cannot be registered in parallel_sextractor()
        # because it would remove the temporary FITS file when the process
        # terminates (instead of when our program exits, which is what we
        # need). Do it here, to make sure that whatever happens next these
        # temporary files are always deleted.
        atexit.register(methods.clean_tmp_files, output_tmp_path)

        fwhms[path] = fwhm
        elongs[path] = elong
        nstars[path] = stars

    if not all_images:
        print "%sError. No FITS images were detected." % style.prefix
        print style.error_exit_message
        return 1

    # Let's first discard those images with a bad full width at half maximum.
    # In order to to this, we fit a normal distribution (assuming the FWHMs to
    # be Gaussian distributed) and define the maximum allowed value as that
    # which exceeds the specified number of standard deviations of the mean.

    print "%sFitting a Gaussian distribution to the FWHMs..." % style.prefix,
    sys.stdout.flush()
    logging.debug("Fitting a Gaussian distribution to the %d FWHMs" %
                  len(fwhms))
    mu, sigma = scipy.stats.norm.fit(fwhms.values())
    logging.debug("FWHMs mean = %.3f" % mu)
    logging.debug("FWHMs sigma = %.3f" % sigma)
    print 'done.'
    sys.stdout.flush()

    print "%sFWHMs mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu,
                                                        sigma)
    maximum_fwhm = mu + (options.fwhm_sigma * sigma)
    logging.debug("Maximum allowed FWHM = %.3f + %.1f x %.3f = %.3f pixels" % \
                 (mu, options.fwhm_sigma, sigma, maximum_fwhm))
    print "%sDiscarding images with a FWHM > %.3f + %.1f x %.3f = %.3f pixels..." % \
          (style.prefix, mu, options.fwhm_sigma, sigma, maximum_fwhm)

    # Exclude images by adding them to the FWHM-discarded set
    for path, fwhm in sorted(fwhms.iteritems()):
        if fwhm > maximum_fwhm:
            fwhm_discarded.add(path)
            logging.debug("%s discarded (FWHM = %.3f > %.3f" % \
                         (path, fwhm, maximum_fwhm))
            print "%s%s discarded (FWHM = %.3f)" % (style.prefix, path, fwhm)

    logging.info("Images discarded by FWHM: %d" % len(fwhm_discarded))
    if not fwhm_discarded:
        print "%sNo images were discarded because of their FWHM. Hooray!" % style.prefix
    else:
        discarded_fraction = len(fwhm_discarded) / len(all_images) * 100
        nleft = len(all_images) - len(fwhm_discarded)  # non-discarded images
        print "%s%d FITS images (%.2f %%) discarded, %d remain" % \
              (style.prefix,  len(fwhm_discarded), discarded_fraction, nleft)

    # Repeat the same approach, now with the elongation ratios. Images already
    # discarded because of their FWHM are not even considered -- why discard
    # them twice? They can simply be ignored.

    print "%sFitting a Gaussian distribution to the elongations..." % style.prefix,
    sys.stdout.flush()
    mu, sigma = scipy.stats.norm.fit(elongs.values())
    logging.debug("Elongations mean = %.3f" % mu)
    logging.debug("Elongations sigma = %.3f" % sigma)
    print 'done.'
    sys.stdout.flush()

    print "%sElongation mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu,
                                                             sigma)
    maximum_elong = mu + (options.elong_sigma * sigma)
    logging.debug("Maximum allowed elongation = %.3f + %.1f x %.3f = %.3f pixels" % \
                 (mu, options.elong_sigma, sigma, maximum_elong))
    print "%sDiscarding images with an elongation > %.3f + %.1f x %.3f = %.3f ..." % \
          (style.prefix, mu, options.elong_sigma, sigma, maximum_elong)

    for path, elong in sorted(elongs.iteritems()):
        # Ignore FWHM-discarded images
        if path in fwhm_discarded:
            logging.debug("%s ignored (already discarded by FWHM)" % path)
            continue
        elif elong > maximum_elong:
            elong_discarded.add(path)
            logging.debug("%s discarded (elongation = %.3f > %.3f" % \
                         (path, fwhm, maximum_elong))
            print "%s%s discarded (elongation = %.3f)" % (style.prefix, path,
                                                          elong)

    logging.info("Images discarded by elongation: %d" % len(elong_discarded))
    if not elong_discarded:
        print "%sNo images were discarded because of their elongation. Yay!" % style.prefix
    else:
        initial_size = len(all_images) - len(fwhm_discarded)
        discarded_fraction = len(elong_discarded) / initial_size * 100
        nleft = initial_size - len(elong_discarded)
        print "%s%d FITS images (%.2f %%) discarded, %d remain" % \
              (style.prefix,  len(elong_discarded), discarded_fraction, nleft)

    # Finally, take the images whose number of stars is at the 'stars_per'
    # percentile and select the one with the best FWHM. This will be our
    # 'best-seeing' image, in which sources may be detected. Taking directly
    # the image with the best FWHM may not work as we need the best-seeomg
    # image to also be one of the most populated.

    print "%sIdentifying the images whose number of detected sources it at " \
          "the %.2f percentile..." % (style.prefix, options.stars_per) ,
    sys.stdout.flush()
    # Ignore discarded images, for whatever reason
    logging.debug("Finding the %.2f percentile of the number of stars " \
                 "detected by SExtractor"  % options.stars_per)
    for path in fwhm_discarded.union(elong_discarded):
        del nstars[path]
        reason = 'FWHM' if path in fwhm_discarded else 'elongation'
        logging.debug("%s ignored (was discarded by %s)" % (path, reason))
    min_nstars = scipy.stats.scoreatpercentile(nstars.values(),
                                               options.stars_per)
    print 'done.'

    print "%sNumber of stars at percentile = %d, taking the images with at " \
          "least this number of sources..." % (style.prefix, min_nstars) ,
    sys.stdout.flush()
    most_populated_images = [
        path for path, stars in nstars.iteritems() if stars >= min_nstars
    ]

    logging.debug("There are %s images with a number of stars at the %.2f " \
                 "percentile" % (len(most_populated_images), options.stars_per))
    logging.debug("Identifying the image with the lowest FWHM")
    print 'done.'

    print "%sFinally, finding the image with the lowest FWHM among these " \
          "%d images..." % (style.prefix, len(most_populated_images)),
    sys.stdout.flush()

    # Find the image with the best seeing (lowest FWHM)
    best_seeing = min(most_populated_images, key=lambda path: fwhms[path])
    logging.debug("Best-seeing image: %s" % path)
    logging.debug("Best-seeing image FWHM = %.3f" % fwhms[best_seeing])
    logging.debug("Best-seeing image elongation = %.3f" % elongs[best_seeing])
    logging.debug("Best-seeing image sources = %d" % nstars[best_seeing])
    assert best_seeing not in fwhm_discarded
    assert best_seeing not in elong_discarded
    print 'done.'

    print "%sBest-seeing image = %s, with %d sources and a FWHM of %.3f pixels" % \
          (style.prefix, best_seeing, nstars[best_seeing], fwhms[best_seeing])

    # The subdirectories are created only if at least one image is going to be
    # discarded. We do not want empty directories in case no image is discarded
    # because of its full-width at half maximum (FWHM) or elongation.

    if fwhm_discarded:
        methods.determine_output_dir(fwhm_dir, quiet=True)

    if elong_discarded:
        methods.determine_output_dir(elong_dir, quiet=True)

    # Finally, copy all the FITS images to the output directory
    processed = 0
    for path in sorted(all_images):
        # Add the suffix to the basename of the FITS image
        root, ext = os.path.splitext(os.path.basename(path))
        output_filename = root + options.suffix + ext
        logging.debug("Basename '%s' + '%s' becomes '%s'" % \
                     (path, options.suffix, output_filename))

        if path in fwhm_discarded:
            output_path = os.path.join(fwhm_dir, output_filename)
            logging.debug("%s was discarded because of its FWHM" % path)
            logging.debug("%s to be copied to subdirectory %s" %
                          (path, fwhm_dir))
            history_msg1 = "Image discarded by LEMON on %s" % methods.utctime()
            history_msg2 = "[Discarded] FWHM = %.3f pixels, maximum allowed value = %.3f" % \
                           (fwhms[path], maximum_fwhm)

        elif path in elong_discarded:
            output_path = os.path.join(elong_dir, output_filename)
            logging.debug("%s was discarded because of its elongation ratio" %
                          path)
            logging.debug("%s to be copied to subdirectory %s" %
                          (path, elong_dir))
            history_msg1 = "Image discarded by LEMON on %s" % methods.utctime()
            history_msg2 = "[Discarded] Elongation = %.3f, maximum allowed value = %.3f" % \
                           (elongs[path], maximum_elong)

        elif path == best_seeing:

            # Retain original name if --filename is an empty string
            if not options.bseeingfn:
                filename = output_filename
            else:
                filename = options.bseeingfn

            output_path = os.path.join(output_dir, filename)
            logging.debug("%s is the best-seeing image" % path)
            logging.debug("%s to be copied to directory %s with name %s" % \
                         (path, output_dir, options.bseeingfn))
            history_msg1 = "Image identified by LEMON as the 'best-seeing' one"
            history_msg2 = "FWHM = %.3f | Elongation = %.3f | Sources: %d (at %.2f percentile)" % \
                           (fwhms[path], elongs[path], nstars[path], options.stars_per)

        else:
            output_path = os.path.join(output_dir, output_filename)
            logging.debug("%s to be copied to %s" % (path, output_dir))
            history_msg1 = "Image FWHM = %.3f" % fwhms[path]
            history_msg2 = "Image elongation = %.3f" % elongs[path]

        if os.path.exists(output_path) and not options.overwrite:
            msg = ("%sError. Output FITS file '%s' already exists. "
                   "You need to use --overwrite.")
            args = style.prefix, output_path
            print msg % args
            print style.error_exit_message
            return 1

        else:
            src = seeing_tmp_paths[path]
            shutil.move(src, output_path)

        methods.owner_writable(output_path, True)  # chmod u+w
        logging.debug("%s copied to %s" % (path, output_path))
        output_img = fitsimage.FITSImage(output_path)
        output_img.add_history(history_msg1)
        output_img.add_history(history_msg2)
        logging.debug("%s: FITS header updated (HISTORY keywords)" % path)

        # Copy the FWHM to the FITS header, for future reference
        comment = "Margin = %d, SNR percentile = %.3f" % (options.margin,
                                                          options.per)
        output_img.update_keyword(options.fwhmk, fwhms[path], comment=comment)
        logging.debug("%s: FITS header updated (%s keyword)" %
                      (path, options.fwhmk))

        print "%sFITS image %s saved to %s" % (style.prefix, path, output_path)
        processed += 1

    print "%sA total of %d images was saved to directory '%s'." % (
        style.prefix, processed, output_dir)
    print "%sWe're done ^_^" % style.prefix
    return 0