def draw_hulls_and_images(master_hull,
                          stackhulls,
                          hullmap,
                          stkevt3dir,
                          xmdat3dir,
                          outdir,
                          ensemble,
                          ensemblemap,
                          revision,
                          qahulls=None,
                          evtscale='log10',
                          master_color='gold',
                          qa_color='cyan',
                          axscale=0.5,
                          show_other_stack_hulls=False,
                          colorbar=True):
    """Draw individidual hull+image for all members of an ensemble.

    An image is created per stack-hull; this means that if there are
    2 hulls from the same stack then there will be 2 images. This is
    a change to the original implementation, party necessitated by
    the desire to show the correct band. The display is limited to
    a "zoomed" in section around the master hull (controlled
    by the axscale). Other hulls from the stack are drawn, but will
    only be visible if they fall in this area.

    Parameters
    ----------
    master_hull: dict
        Contains the master hull: fields are 'master_id', 'status',
        and 'eqpos'. The 'status' field should be one of:
        'todo', 'okay', 'qa[-...]'.
    stackhulls : list
        What stack-level hulls form this master hull? Each entry
        is a dictionary with the keys 'stack' and 'component'.
    hullmap : dict
        The stack-level hull data, stored by the stack id. The values
        are those returned by read_hulls_from_mrgsrc3.
    stkevt3dir : str
        The location of the stkevt3 files.
    xmdat3dir : str
        The location of the xmdat3 files (per stack, optional).
    outdir : str
        The output directory, which must exist.
    ensemble : str
        The ensemble value.
    ensemblemap : dict
        The keys are stack ids, and the values are the STKIDxxx
        value (i.e. the integer value of xxx).
    revision : int
        The revision number of the file.
    qahulls : None or list of dict, optional
        This is only used if master_hull['status'] is set to 'qa[-...]'.
        Each entry represents a hull, and has the 'eqpos' field
        which contains the polygon.
    evtscale : {'none', 'log10', 'log', 'sqrt'}, optional
        The scaling to apply to the event data before displaying it.
    master_color : str, optional
        The color for the master hull.
    qa_color : str, optional
        The color for any QA hulls.
    axscale : float, optional
        The additional space around the hull, as a fraction of the
        width/height of the hull. This value refers to the delta
        added to each side (so twice this is added overall). Note
        that the space added will be more than this fraction for the
        "smaller" side of the hull, as the plot aspect ratio is
        maintained.
    show_other_stack_hulls : bool, optional
        Should the hulls from other stacks be drawn on the image?
    colorbar : bool, optional
        Should each image have a color bar?

    Notes
    -----
    For the initial revision we do not need to worry about Match_Type
    being ambiguous (this is the 'match_type' keyword of each entry
    in stackhulls). If we re-run after some master-match work then
    it may be necessary to disambiguate visually.

    """

    if not os.path.isdir(outdir):
        raise ValueError("outdir={} is not a directory".format(outdir))

    masterid = master_hull['master_id']

    # It is easier if we can map the stack hull data via the
    # key: stack, cpt. Actually, the following code is very unclear
    # about data ownership, and really could do with a clean up.
    #
    # It is also useful to know what stacks we have.
    #
    stacks = set()
    for shull in stackhulls:
        stacks.add(shull['stack'])

    stacks = sorted(list(stacks))

    # How many hulls are there in this master hull?
    nhulls = len(stackhulls)
    nstacks = len(stacks)

    # Extract all the hulls from the stacks we are interested in,
    # whether they are in related to the given master hull or not.
    #
    hulldata = {}
    for stack, hulls in six.iteritems(hullmap):
        if stack not in stacks:
            continue

        for hull in hulls:
            assert hull['stack'] == stack
            key = stack, hull['component']
            assert key not in hulldata
            hulldata[key] = hull

    # Do we have any xmdat3 files for these stacks? We want to
    # store the None to mark stacks as having no data.
    #
    xmdat3map = {}
    for stack in stacks:
        xmdat3map[stack] = read_xmdat3(xmdat3dir, stack)

    # Need limits for determining how much of the event file
    # to read in.
    #
    # Technically the QA and master hulls should be included here, but
    # they should all, by definition, be no larger than the
    # stack-level hulls.
    #
    data_lims = find_hull_limits(stackhulls, hulldata, axscale=axscale)
    sfilt = data_lims['dmfilter']

    # Find the event files for the stacks. Note that we check
    # all files and then report errors at the end (so you don't
    # have to download a file, run, download a file, run, ...
    # to get them all). Of course, if there are multiple hulls
    # in the ensemble this only handles one of them.
    #
    evtfiles = {}
    failed = []
    for stack in stacks:
        path = os.path.join(stkevt3dir, '{}*_evt3.fits*'.format(stack))
        try:
            evtfiles[stack] = utils.find_single_match(path)
        except IOError as exc:
            failed.append((stack, str(exc)))

    if len(failed) > 0:
        emsg = "\n".join([f[1] for f in failed])
        raise IOError("The following event file(s) are " +
                      "missing:\n{}".format(emsg))

    # Could go for a more-adaptive scheme
    #
    # Note that we special-case ensemble 0019200_001, which has
    # ~ 35 stack-level hulls, and is also known as Cas A, to
    # only have a 2 by 2 grid in the hope this saves enough
    # memory. It isn't, because there are multiple evt files
    # whose size is >~ 4 Gb for this particular source.
    #
    if ensemble == 'ens0019200_001':
        print('Note: special case Cas A to 2 by 2 grid')
        nsize = 2
        nplots = nsize * nsize
        npages = np.ceil(nhulls / (nplots * 1.0)).astype(np.int)

    elif nhulls <= 9:
        nsize = np.ceil(np.sqrt(nhulls)).astype(np.int)
        nplots = nsize * nsize
        npages = 1
    else:
        nsize = 3
        nplots = nsize * nsize
        npages = np.ceil(nhulls / (nplots * 1.0)).astype(np.int)

    def save_plot(escale, pagenum):
        """Create PNG output then destroy the window"""

        fmt = 'hull.{}.{:03d}.p{:03d}.v{:03d}.{}.png'
        outfile = fmt.format(ensemble, masterid, pagenum, revision, escale)
        outfile = os.path.join(outdir, outfile)

        # Hide any warnings about square root of a negative number.
        #
        oldsettings = np.seterr(all='ignore')
        try:
            plt.savefig(outfile)
        finally:
            np.seterr(**oldsettings)

        print("Created: {}".format(outfile))
        plt.close()

    title = "{} {:03d}".format(ensemble, masterid) + \
        "  #stacks={} #hulls= {}".format(nstacks, nhulls)
    qatitle = None

    if chs_status.is_qa(master_hull['status']):
        qatitle = master_hull['status'].upper()

    page_idx = 0

    # Are these correct?
    efilts = {
        'w': '',
        'b': '[energy=500:7000]',
        'u': '[energy=300:500]',
        's': '[energy=500:1200]',
        'm': '[energy=1200:2000]',
        'h': '[energy=2000:7000]'
    }

    # Work out the event file + VFS stacks to use before reading
    # anything else, since this lets us cache the result, which may
    # help out with really-large datasets (but only if the
    # band matches).
    #
    # TODO: look at this
    # Note that this cache is fairly pointless as we repeat this
    # code several times (for different scalings), when we should
    # perhaps just change the scaling here (to avoid re-creating
    # everything).
    #
    # key = (stack, cpt), value = file name incl VFS
    evt_name = {}

    # keys = file name incl VFS, values = number of times
    evt_count = defaultdict(int)

    for hull_idx, shull in enumerate(stackhulls):

        stack = shull['stack']
        cpt = shull['component']
        key = stack, cpt

        evtfile = evtfiles[stack]

        # What band to use?
        #
        bname = hulldata[key]['eband']

        if bname == 'w':
            bspec = '[bin sky=::32]'
        else:
            bspec = '[bin sky=::8]'

        efilt = efilts[bname]
        iname = evtfile + efilt + sfilt + bspec

        evt_name[key] = iname
        evt_count[iname] += 1

    evt_cache = {}
    fig = None
    axplot = None
    for hull_idx, shull in enumerate(stackhulls):

        r = add_component_image(nhulls,
                                nsize,
                                npages,
                                title,
                                qatitle,
                                fig,
                                axplot,
                                page_idx,
                                hull_idx,
                                nplots,
                                evtscale,
                                shull,
                                hulldata,
                                xmdat3map,
                                hullmap,
                                ensemblemap,
                                stacks,
                                master_hull,
                                qahulls,
                                save_plot,
                                evt_name,
                                evt_count,
                                evt_cache,
                                master_color=master_color,
                                qa_color=qa_color,
                                show_other_stack_hulls=show_other_stack_hulls,
                                colorbar=colorbar)
        fig, axplot, page_idx = r

    # Don't forget to save the last page.
    save_plot(evtscale, page_idx)
    assert page_idx == npages
    return {'npages': npages}
def update_review_products(chsfile, outdir, mrgsrc3dir, stkevt3dir, stkfov3dir,
                           xmdat3dir):
    """Update the review products.

    Parameters
    ----------
    chsfile : str
        The FITS file containing the master hull data.
    outdir : str
        The output directory, which must exist.
    mrgsrc3dir, stkevt3dir, stkfov3dir, xmdat3dir : str
        The directory names containing the mrgsrc3, evt3, fov3, and
        xmdat3 files for the stacks. The names must match
        <stack>*<type>.fits[.gz] and there can only be one per stack
        per type. The xmdat3 files are optional and are stored as
        <stack>/<stack>N000_xmdat3.fits.

    Notes
    -----
    Should the "context" plot - i.e. all hulls from an ensemble - also
    include the current master hulls? I worry that it will make it
    harder to see the details, but let's you see at a glance if there
    are potential issues.
    """

    # Since we are assumed to be running on updated files, the output
    # directory should already exist.
    #
    if not os.path.isdir(outdir):
        sys.stderr.write("ERROR: outdir does not exist ")
        sys.stderr.write("{}\n".format(outdir))
        sys.exit(1)

    chsdir = os.path.dirname(chsfile)

    hullmatch, hulllist, metadata = utils.read_master_hulls(
        chsfile, mrgsrc3dir)

    ensemble = metadata['ensemble']
    ensemblemap = metadata['ensemblemap']
    revision = metadata['revision']

    filename = 'field.{}.v{:03d}.png'.format(ensemble, revision)
    ctxfile = os.path.join(outdir, filename)
    if os.path.exists(ctxfile):
        sys.stderr.write("ERROR: context image already exists ")
        sys.stderr.write("{}\n".format(ctxfile))
        sys.exit(1)

    mids = sorted(hulllist.keys())

    # What stacks do we care about (those with hulls)
    stacks = set([])

    # ncpts is the number of stack hulls in the ensemble
    ncpts = 0
    for stkhulls in hullmatch.values():
        for stkhull in stkhulls:
            stacks.add(stkhull['stack'])
            ncpts += 1

    assert len(stacks) > 0

    # It might be more useful to sort the stacks by "number of fov
    # files" say, or some other criteria.
    #
    stacks = list(stacks)
    stacks = sorted(stacks)

    # Force a check that we can find these files before any
    # processing.
    #
    fov3files = [utils.find_stkfov3(s, stkfov3dir) for s in stacks]

    # Read in all the QAs, labelled by mid.
    #
    qas = {}
    for mid in mids:
        src = hulllist[mid]
        if chs_status.is_qa(src['status']):
            qas[mid] = read_qa_hulls(chsdir, revision, src['master_id'])

    # Ideally I would remove hullmap, and use hullmatch directly,
    # but the format is slightly different, so recreate hullmap
    # (it used to be formed by calling read_hulls_from_mrgsrc3
    # but this is now part of read_master_hulls).
    #
    hullmap = defaultdict(list)
    for masterhull in hullmatch.values():
        for hull in masterhull:
            stack = hull['stack']
            hullmap[stack].append(hull)

    # Context image: FOV + all the hulls
    #
    hulls = [hullmap[s] for s in stacks]
    plots.draw_ensemble_outline(ensemble, hulllist, hulls, qas, fov3files)

    plt.savefig(ctxfile)
    print("Created: {}".format(ctxfile))

    # Per-master hulls
    #
    for mid in mids:

        src = hulllist[mid]
        try:
            qahulls = qas[mid]
        except KeyError:
            qahulls = None

        # a lot of repeated work to support different scalings,
        # but not worth the complexity of avoiding this
        for evtscale in ['log10', 'sqrt', 'none']:
            plots.draw_hulls_and_images(src,
                                        hullmatch[mid],
                                        hullmap,
                                        stkevt3dir,
                                        xmdat3dir,
                                        outdir,
                                        ensemble,
                                        ensemblemap,
                                        revision,
                                        evtscale=evtscale,
                                        qahulls=qahulls)
def draw_ensemble_outline(ensemble,
                          mhulls,
                          hulls,
                          qas,
                          fov3files,
                          qa_color='cyan'):
    """Draw up the FOVs for the ensembles and the different stack-level hulls.

    Parameters
    ----------
    ensemble : str
        The ensemble name; it is used for the plot title and error
        messages.
    mhulls : dict
        The master hulls - the second argument of
        utils.read_master_hulls - which is a dictionary keyed by
        the master id and the values give the master-hull details.
    hulls : list of list of stack hulls
        The stack-level hulls; each entry in the list is the output of
        read_hulls_from_mrgsrc3
    qas : None or dict
        If not None then the QA hulls for this ensemble, where the
        key is the master id and the value is the return value
        of chs_create_review_images_mpl:read_qa_hulls.
    fov3files : list of str
        The FOV3 files for the stacks in the ensemble. This is assumed
        to be the FOV files for those stacks containing hulls (i.e.
        they act as a reasonable bounding box for the display).
    qa_color : str, optional
        The color for any QA hulls and labels.

    """

    # Use the first transform we find as the base.
    #
    tr = None
    for hlist in hulls:
        for hull in hlist:
            tr = hull['transform']
            break

    if tr is None:
        raise IOError("No hulls found for " + "ensemble: {}".format(ensemble))

    wcs = tr2wcs(tr)
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(1, 1, 1, projection=wcs)
    ax_trans = ax.get_transform('world')

    # We want a different color for each stack, but note that there
    # are multiple polygons per stack, so we can not just rely on
    # matplotlib's automatic cycling of colors.
    #
    # Use the default matplotlib set of colors for cycling.
    #
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    _nextcolor = cycler(color=colors)()

    def nextcolor():
        # return _nextcolor.next()['color']
        return next(_nextcolor)['color']

    for fovfile in fov3files:
        # assume everything is a polygon
        cr = pycrates.read_file(fovfile)
        color = nextcolor()
        for eqpos in cr.get_column('EQPOS').values:
            ra = eqpos[0]
            dec = eqpos[1]
            idx = np.isfinite(ra)

            ra = ra[idx]
            dec = dec[idx]

            plt.plot(ra, dec, color=color, alpha=0.07, transform=ax_trans)

    # Draw these somewhat transparent so that the numbering added
    # later can be seen (it can get busy).
    #
    # stack_color = 'orange'
    # main_color = 'gold'
    stack_color = 'black'
    main_color = 'black'

    for i, hlist in enumerate(hulls):

        for stkhull in hlist:
            if stkhull['mancode']:
                lstyle = 'dotted'
            else:
                lstyle = 'solid'

            # should this use chs_status.DELETE?
            if stkhull['match_type'] == "deleted":
                use_color = 'red'
            else:
                use_color = stack_color

            eqpos = stkhull['eqpos']
            ra = eqpos[0]
            dec = eqpos[1]

            plt.plot(ra,
                     dec,
                     linewidth=1,
                     linestyle=lstyle,
                     alpha=0.6,
                     transform=ax_trans,
                     color=use_color)

    # Now the master hulls; these are going to overwrite the stack-level
    # hulls for the majority of cases (single stack hull), so use a
    # thinner line width, which is not ideal.
    #
    title_col = 'k'
    for mid, mhull in mhulls.items():
        if chs_status.is_qa(mhull['status']):
            title_col = 'r'
            continue

        eqpos = mhull['eqpos']
        ra = eqpos[0]
        dec = eqpos[1]

        plt.plot(ra,
                 dec,
                 linewidth=1,
                 linestyle='solid',
                 alpha=0.6,
                 transform=ax_trans,
                 color=main_color)

        label_hull(ax_trans, mhull, "{}".format(mid), main_color)

    # Do we have any qa hulls?
    #
    if qas is not None:
        for mid, qahulls in qas.items():

            # We label each "qa component"
            #
            nqas = len(qahulls)
            for i, qahull in enumerate(qahulls):
                draw_hull(ax_trans, qahull, qa_color, 1, 'dashed')

                lbl = "QA {}".format(mid)
                if nqas > 1:
                    lbl += " [{}]".format(i + 1)

                label_hull(ax_trans, qahull, lbl, qa_color)

    # Report on the number of stacks with hulls, not the number
    # in the ensemble.
    #
    lbl = "Ensemble {}   #stacks={}  #hulls={}".format(ensemble,
                                                       len(fov3files),
                                                       len(mhulls))
    plt.title(lbl, fontsize=18, color=title_col)

    ax.coords['ra'].set_major_formatter('hh:mm:ss')
    ax.coords['dec'].set_major_formatter('dd:mm:ss')

    ax.set_aspect('equal')
def add_hulls(stack,
              cpt,
              hullmap,
              master_hull,
              qahulls,
              axes,
              master_color='gold',
              qa_color='cyan'):
    """Draw the hulls for this stack.

    draw the hulls for this stack

    draw the other hulls first, as reference
    (i.e. those from other stacks)

    NOTE: this draws on all hulls, so can be useful
          if nearby ones overlap.

    Parameters
    ----------
    stack : str
        The stack name.
    cpt
        The component number for the hull.
    hullmap : dict
        The stack-level hull data, stored by the stack id. The values
        are those returned by read_hulls_from_mrgsrc3.
    master_hull: dict
        Contains the master hull: fields are 'master_id', 'status',
        and 'eqpos'. The 'status' field should be one of:
        'todo', 'okay', 'qa[-...]'.
    qahulls : None or list of dict, optional
        This is only used if master_hull['status'] is set to 'qa[-...]'.
        Each entry represents a hull, and has the 'eqpos' field
        which contains the polygon.
    axes
        The plot axes.
    master_color : str, optional
        The color for the master hull.
    qa_color : str, optional
        The color for any QA hulls.
    """

    ax_trans = axes.get_transform('world')

    # Draw the hull for this component in orange and the
    # others in the stack as a red-ish color. Trying to
    # match masterhull.js behavior.
    #
    for hull in hullmap[stack]:
        if hull['component'] == cpt:
            hullcol = 'orange'
        else:
            hullcol = '#cc3333'

        draw_hull(ax_trans, hull, hullcol, 2, 'solid')

    # NOTE: these are drawn thinner than the stack-level hulls so
    # they do not obscure them (for cases when the two contours
    # are the same or very similar).
    #
    if chs_status.is_qa(master_hull['status']):
        for qahull in qahulls:
            draw_hull(ax_trans, qahull, qa_color, 1, 'dashed')
    else:
        draw_hull(ax_trans, master_hull, master_color, 1, 'solid')
def create_review_products(chsfile, outdir,
                           mrgsrc3dir, stkevt3dir,
                           stkfov3dir, xmdat3dir,
                           ignorestatus=False,
                           ignorenvertex=False):
    """Create the review products.

    Parameters
    ----------
    chsfile : str
        The FITS file containing the master hull data.
    outdir : str
        The output directory, which may be created by the routine.
        The last component *must* be the ensemble name.
    mrgsrc3dir, stkevt3dir, stkfov3dir, xmdat3dir : str
        The directory names containing the mrgsrc3, evt3, fov3, and
        xmdat3 files for the stacks. The names must match
        <stack>*<type>.fits[.gz] and there can only be one per stack
        per type. The xmdat3 files are optional and are stored as
        <stack>/<stack>N000_xmdat3.fits.
    ignorestatus : bool, optional
        If set, the STATUS value for each master hull is set to
        chs_utils.DONE, and a screen message is written out saying
        what the old value was.
    ignorenvertex : bool, optional
        If set, the NVERTEX value of the HULLLIST block is not used
        to filter the position array. Instead a manual check is used.
        This is because the output from Joe's code hasn't adjusted
        this value.

    Notes
    -----
    Should the "context" plot - i.e. all hulls from an ensemble - also
    include the current master hulls? I worry that it will make it
    harder to see the details, but let's you see at a glance if there
    are potential issues.
    """

    chsdir = os.path.dirname(chsfile)

    hullmatch, hulllist, metadata = utils.read_master_hulls(chsfile,
                                                            mrgsrc3dir,
                                                            ignorestatus=ignorestatus,
                                                            ignorenvertex=ignorenvertex)

    ensemble = metadata['ensemble']
    ensemblemap = metadata['ensemblemap']
    revision = metadata['revision']

    # NOTE: utils.save_master requires the "user directory" - i.e. the
    # parent of outdir - and this means that we assume that outdir
    # ends in the ensemble name. We
    #
    if os.path.basename(outdir) != ensemble:
        raise IOError("Expected outdir={} ".format(outdir) +
                      "to end with {}".format(ensemble))

    userdir = os.path.normpath(os.path.join(outdir, '..'))

    mids = sorted(hulllist.keys())

    # What stacks do we care about (those with hulls)
    stacks = set([])

    # ncpts is the number of stack hulls in the ensemble
    ncpts = 0
    for stkhulls in hullmatch.values():
        for stkhull in stkhulls:
            stacks.add(stkhull['stack'])
            ncpts += 1

    assert len(stacks) > 0

    # It might be more useful to sort the stacks by "number of fov
    # files" say, or some other criteria.
    #
    stacks = list(stacks)
    stacks = sorted(stacks)

    # Force a check that we can find these files before any
    # processing.
    #
    fov3files = [utils.find_stkfov3(s, stkfov3dir) for s in stacks]

    # Read in all the QAs, labelled by mid.
    #
    qas = {}
    for mid in mids:
        src = hulllist[mid]
        if chs_status.is_qa(src['status']):
            qas[mid] = read_qa_hulls(chsdir, revision,
                                     src['master_id'])

    # Ideally I would remove hullmap, and use hullmatch directly,
    # but the format is slightly different, so recreate hullmap
    # (it used to be formed by calling read_hulls_from_mrgsrc3
    # but this is now part of read_master_hulls).
    #
    hullmap = defaultdict(list)
    for masterhull in hullmatch.values():
        for hull in masterhull:
            stack = hull['stack']
            hullmap[stack].append(hull)

    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    # Context image: FOV + all the hulls
    #
    hulls = [hullmap[s] for s in stacks]
    plots.draw_ensemble_outline(ensemble, hulllist, hulls, qas,
                                fov3files)

    filename = 'field.{}.v{:03d}.png'.format(ensemble, revision)
    outfile = os.path.join(outdir, filename)

    plt.savefig(outfile)
    print("Created: {}".format(outfile))

    # Create the ensemble JSON file
    #
    revstr = "{:03d}".format(revision)
    ensdata = {'name': ensemble,
               'revision': revstr,
               'nmasters': len(mids),
               'nstacks': len(stacks),
               'ncpts': ncpts,
               'stackmap': ensemblemap,
               'status': chs_status.TODO,
               'lastmodified': '',  # could add date string here
               'usernotes': ''
               }

    filename = utils.make_field_name_json(ensemble, revision)
    outfile = os.path.join(outdir, filename)
    open(outfile, 'w').write(json.dumps(ensdata))
    print("Created: {}".format(outfile))

    # Create the per-component JSON files
    #
    for stkhulls in hullmatch.values():
        for stkhull in stkhulls:

            # Need to convert from NumPy booleans to Python ones
            # otherwise the serialization to JSON fails.
            #
            stkdata = {'lastmodified': '',
                       'stack': stkhull['stack'],
                       'component': stkhull['component'],
                       'key': stkhull['key'],
                       'ensemble': ensemble,
                       'revision': revstr,
                       # Note: need an array for master ids (in case
                       #       of ambiguous links)
                       'master_id': [stkhull['master_id']],
                       'likelihood': stkhull['likelihood'],
                       'eband': stkhull['eband'],
                       'mrg3rev': stkhull['mrg3rev'],
                       'mancode':
                       bool(stkhull['mancode']),
                       'stksvdqa':
                       bool(stkhull['stksvdqa']),
                       'include_in_centroid':
                       bool(stkhull['include_in_centroid'])}

            filename = utils.make_component_name_json(ensemble,
                                                      stkhull['stack'],
                                                      stkhull['component'],
                                                      revision)
            outfile = os.path.join(outdir, filename)
            open(outfile, 'w').write(json.dumps(stkdata))
            print("Created: {}".format(outfile))

    # Per-master hulls
    #
    for mid in mids:

        src = hulllist[mid]
        try:
            qahulls = qas[mid]
        except KeyError:
            qahulls = None

        # a lot of repeated work to support different scalings,
        # but not worth the complexity of avoiding this
        for evtscale in ['log10', 'sqrt', 'none']:
            pinfo = plots.draw_hulls_and_images(src,
                                                hullmatch[mid],
                                                hullmap,
                                                stkevt3dir,
                                                xmdat3dir,
                                                outdir,
                                                ensemble,
                                                ensemblemap,
                                                revision,
                                                evtscale=evtscale,
                                                qahulls=qahulls)

        if chs_status.is_qa(src['status']):
            action = 'manual'
        else:
            action = ''

        ensdata = {'ensemble': ensemble,
                   'masterid': mid,
                   'revision': revstr,
                   'ncpts': len(hullmatch[mid]),
                   'npages': pinfo['npages'],
                   'useraction': action,
                   'usernotes': ''
                   }

        outfile = utils.save_master(userdir, ensdata)
        print("Created: {}".format(outfile))
Ejemplo n.º 6
0
def make_merged_hull(hullcpts,
                     mrgsrc3dir,
                     acceptfrac=0.2,
                     maxcount="cohorts",
                     sigma=3,
                     nsigma=5,
                     tmpdir=None):
    """Create the merged hull.

    Parameters
    ----------
    hullcpts : sequence of (stackid, component)
        The stack-level hulls to merge
    mrgsrc3dir : str
        The location of the mrgsrc3 files.
    acceptfrac : float, optional
        The fraction at which to draw the merged polygon
        (when multiple hulls are present). The value used
        is acceptfrac * n, where n is determined by maxcount.
    maxcount : {'cohorts', 'hulls'}
        Should n be the number of different cohorts in the
        list of overlapping hulls (maxcount='cohorts', or the
        number of convex hulls (maxcount='hulls'). This only
        makes a difference when there are multiple hulls from
        a single cohort in the list.
    nsigma, sigma : float
        The number of sigma (the box size) and the sigma, in pixels,
        of the gaussian used to smooth the image. If either is None
        then no smoothing is done. Note that the smoothing scale,
        sigma, is given in ACIS pixels, so is multiplied by 3.8
        before being applied to HRC data (so that the physical
        scale being smoothed is similar).
    tmpdir : None or str
        The directory to use for temporary files. If None, the default
        is used (the Python tempfile isn't very explicit about
        what this default is, but it tends to be /tmp on Linux).

    Returns
    -------
    outline : dict
        The outline of the merged hull. The keys are:
        status, eqpos, pos, base_stack. If status is not 'todo'
        then eqpos and pos can be None (status is 'error', in
        which case base_stack will also be None)
        or 3D shapes (i.e. npolygons, 2, npts), when status is
        'qa-<reason>'.

    Notes
    -----
    Information flow on failure (including unexpected or qa) could
    be better.
    """

    if len(hullcpts) == 0:
        raise ValueError("hullcpts is empty")

    hulls = []
    for stkid, cpt in hullcpts:
        hulls.append(read_hull(stkid, cpt, mrgsrc3dir))

    # If there is only one hull then just promote the stack-level
    # data.
    #
    if len(hulls) == 1:
        h0 = hulls[0]
        outline = {
            'status': 'todo',
            'eqpos': h0['eqsrc'],
            'pos': h0['pos'],
            'base_stack': h0['stkid']
        }
        return outline

    out, tr_stkid = merge_polygons(hulls,
                                   acceptfrac=0.2,
                                   maxcount="cohorts",
                                   sigma=3,
                                   nsigma=5,
                                   tmpdir=None)

    # ARGH: the keys are different to read_hull, which makes this
    #       needlessly different, but not worth changing now.
    #
    if out['status'] == 'okay':
        outline = {
            'status': 'todo',
            'eqpos': out['hull_cel'],
            'pos': out['hull_sky'],
            'base_stack': tr_stkid
        }
        return outline

    print("WARNING: when calculating merged hull got")
    print("status= {}".format(out['status']))
    print("reason= {}".format(out['reason']))

    # Possible failures are
    #    status=failed   - may not have any data
    #    status=error    - returned unexpected data
    #    status=qa...    - there's a problem
    #
    if chs_status.is_qa(out['status']):
        outline = {
            'status': out['status'],
            'eqpos': out['hulls_cel'],
            'pos': out['hulls_sky'],
            'base_stack': tr_stkid
        }
        return outline

    # There could be some data to return here, but for now
    # error out everything.
    return {'status': 'error', 'eqpos': None, 'pos': None, 'base_stack': None}
def process_ensemble(ensemblefile, ensemble, outdir,
                     mrgsrc3dir,
                     svdqafile=None,
                     centroidfile=None,
                     compzero=0,
                     revision=1,
                     creator=None,
                     master_color='green',
                     stack_color='green',
                     qa_color='green'):
    """Process the ensemble to create the CHS review products.

    Parameters
    ----------
    ensemblefile : str
        The file should contain columns ensemble and stack, and is
        used to find what stacks to look for.
    ensemble : str
        The ensemble name.
    outdir : str
        The output directory, which will be created by the routine.
    mrgsrc3dir : str
        The directory name containing the mrgsrc3 files for the
        stacks. The names must match
        <stack>*mrgsrc3.fits* and there can only be one file per
        stack.
    svdqafile : str or None, optional
        If given then the name of the file containing the stack ids
        that went to SVD QA. The first column of this file is used
        as the stack id. The full path is written to the SVDQAFIL
        header keyword (or the string NONE if not given). If there
        is no such file then the STKSVDQA column is not written out.
    centroidfile : str or None, optional
        If given theh the name of the file containing the stack,cpt,
        include_centroid information (a partial list). Used to set
        up the INCLUDE_IN_CENTROID column.
    compzero : int, optional
        The value of the COMPONENT column in the HULLMATCH block
        for a stack-level hull which has a component value of 0
        (from the mrgsrc3 MEXTSRC block); note that there are no
        such component values since they are 1 based. Must be >= 0.
    revision : int, optional
        The value to write out to the header as the CHSVER
        keyword.
    creator : str or None, optional
        If set it is used as the CREATOR keyword value in the output
        file.
    """

    if compzero < 0:
        raise ValueError("compzero must be >= 0, sent {}".format(compzero))

    if os.path.exists(outdir):
        raise IOError("The output directory already exists: {}".format(outdir))

    # validate input before creating the output
    #
    stacks = find_ensemble_stacks(ensemblefile, ensemble)
    nstacks = len(stacks)

    # I need to pass around eband,mancode info to write_hulls and
    # I am too lazy to re-architect the code, so I am adding a
    # "global" dict which contains the stack-level information indexed
    # by (STACKID, COMPONENT), since that is used in write_hulls.
    #
    hull_store = {}
    transform_store = {}

    hulls = []
    stacks_with_hulls = 0
    for stack in stacks:
        shulls = read_hulls(stack, mrgsrc3dir)
        if shulls == []:
            continue

        hulls.extend(shulls)
        stacks_with_hulls += 1

        for shull in shulls:
            key = (shull['stack'], shull['component'])
            assert key not in hull_store
            hull_store[key] = shull

        # Add in the stack transform
        transform_store[stack] = shulls[0]['transform']

    if hulls == []:
        print("No hulls were found in ensemble {}".format(ensemble))
        no_hulls(outdir)
        return

    os.mkdir(outdir)

    nhulls_stack = len(hulls)
    print("There are {} stack-level hulls ".format(nhulls_stack) +
          "in {} stacks".format(stacks_with_hulls))
    if stacks_with_hulls != nstacks:
        print("Total number of stacks: {}".format(nstacks))

    # Dump the stack-level hulls as a DS9 region file and extract
    # the hull areas.
    hull_areas = {}
    for hull in hulls:
        write_stack_hull_as_ds9(hull, outdir, revision=revision,
                                color=stack_color)

        hull_areas[hull['stack'], hull['component']] = hull['area']

    print("Created stack-level region files in {}".format(outdir))

    # What hulls overlap and what don't?
    #
    overlap_gr, singles = identify.find_overlap_graph(hulls)
    overlaps = identify.get_nodes(overlap_gr)

    # Order the overlaps so that
    # a) longest first - that is the most (stack,cpt) pairs is first
    # b) order by (stack,cpt) within an overlap
    #
    # This also changes the overlap from a set to a list.
    #
    # Using -len ensures that we go from longest to shortest.
    #
    overlaps = sorted([sorted(list(overlap)) for overlap in overlaps],
                      key=lambda x: (-len(x), x))

    master_hulls = []

    # Overlap hulls
    #
    seen = set([])
    for overlap in overlaps:
        master_hulls.append([(stack, cpt) for stack, cpt in overlap])
        for k in overlap:
            assert k not in seen, \
                'repeat occurence of {}'.format(k)
            seen.add(k)

    # Single hulls
    #
    for key in singles:
        master_hulls.append([key])

        assert key not in seen, \
            'repeat occurence of {}'.format(key)
        seen.add(key)

    nfound = len(seen)
    assert nhulls_stack == nfound, 'Lost (or gained) a hull'

    noverlap = len(overlaps)
    nsingle = len(singles)
    print("Found {} overlap and {} single hulls".format(noverlap,
                                                        nsingle))

    # Create the initial version of the master hulls and dump out
    # any QA cases here.
    #
    # The master hulls are created, then a check is made to
    # ensure they do not overlap (marking them as qa cases if
    # they do), then they are written out.
    #
    outlines = []
    for hullcpts in master_hulls:
        outline = merge.make_merged_hull(hullcpts, mrgsrc3dir)
        outlines.append(outline)

    noverlap = 0
    for m1, m2 in itertools.combinations(outlines, 2):
        if merge.do_masters_overlap(m1, m2, transform_store):
            noverlap += 1

    if noverlap > 0:
        print("NOTE: found {} master overlaps -> QA".format(noverlap))

    for i, outline in enumerate(outlines):
        if chs_status.is_qa(outline['status']):
            dump_qa(ensemble, outdir, i + 1, outline,
                    creator=creator,
                    revision=revision,
                    color=qa_color)
        else:
            if outline['status'] != chs_status.TODO:
                print("NOTE: status = {}".format(outline['status']))

            outfile = os.path.join(outdir,
                                   'master.{}.v{:03d}.reg'.format(i + 1,
                                                                  revision))
            with open(outfile, 'w') as ofh:
                ds9_header(ofh, color=master_color)
                ostr = ds9_shape(outline['eqpos'])
                ostr += ' # text={{Master_Id={}}}\n'.format(i + 1)
                ofh.write(ostr)

            print("Created: {}".format(outfile))

    # Write out the "cmst3" file, as a FITS file.
    #
    filename = utils.make_mhull_name(ensemble, revision)
    outfile = os.path.join(outdir, filename)
    write_hulls(ensemble, outfile, master_hulls, hull_areas,
                outlines,
                hull_store,
                svdqafile, centroidfile, stacks,
                compzero=compzero,
                revision=revision,
                creator=creator)
def dump_qa(ensemble, outdir, ctr, outline,
            creator=None, revision=1, color='green'):
    """Dump the QA data to a region-like FITS file

    The output files are qa.<ctr>.v<version>.[fits|reg], where the
    integers are written as 3-character, zero-padded values.

    Parameters
    ----------
    ensemble : string
        The ensemble value, written to the header as the ENSEMBLE
        keyword.
    outdir : string
        This output directory
    ctr : int
        Used for the file name and added to the header as the
        HULLCPT keyword.
    outline : dict
        The hull data to write out.
    creator : None or str, optional
        The name to use for the CREATOR field in the header.
    revision : int
        The value to write out to the header as the CHSVER
        keyword.
    color : str
        The color for the DS9 region files.

    Notes
    ------
    I'd like to add a transform to POS but it is a little-bit involved
    and I don't have time to dig up the code, so just add an explicit
    column instead of a virtual one. It should be the case that the
    base stack is the same for each row, but leave as a column rather
    than move to a header keyword for now.

    """

    assert chs_status.is_qa(outline['status'])
    assert outline['eqpos'] is not None
    assert outline['pos'] is not None
    assert outline['base_stack'] is not None

    idx = outline['status'].find('-')
    if idx == -1:
        reason = "unknown"
    else:
        reason = outline['status'][idx + 1:]

    outfile = os.path.join(outdir,
                           'qa.{:03d}.v{:03d}.fits'.format(ctr,
                                                           revision))

    cr = pycrates.TABLECrate()
    cr.name = 'QACASE'

    utils.add_standard_header(cr, creator=creator, revision=revision)
    utils.add_header(cr, [('ENSEMBLE', ensemble,
                           'The ensemble'),
                          ('HULLCPT', ctr,
                           'The Master_Id of the hull'),
                          ('QREASON', reason,
                           'Why is this a QA case?')])

    eqpos = outline['eqpos']
    pos = outline['pos']
    assert len(pos) == len(eqpos)

    ncpts = len(pos)

    utils.add_col(cr, 'COMPONENT', np.arange(1, ncpts + 1))
    utils.add_col(cr, 'SHAPE', ['Polygon'] * ncpts)
    utils.add_col(cr, 'BASE_STK', [outline['base_stack']] * ncpts,
                  desc='The stack used for SKY coord system, or NONE')

    nvertex = []
    for poly in pos:
        x = poly[0]
        y = poly[1]
        xidx = np.isfinite(x)
        yidx = np.isfinite(y)
        assert (xidx == yidx).all()
        nvertex.append(xidx.sum())

    utils.add_col(cr, 'NVERTEX', nvertex,
                  desc='The number of vertexes in the closed hull')

    nmax = max(nvertex)
    pos_out = np.full((ncpts, 2, nmax), np.nan, dtype=np.float64)
    eqpos_out = np.full((ncpts, 2, nmax), np.nan, dtype=np.float64)

    for i, dvals in enumerate(zip(pos, eqpos)):

        npts = nvertex[i]
        phys, cel = dvals
        pos_out[i, :, :npts] = phys
        eqpos_out[i, :, :npts] = cel

    col = pycrates.create_vector_column('POS', ['X', 'Y'])
    col.desc = 'The master hull vertices'
    col.unit = 'pixel'
    col.values = pos_out
    cr.add_column(col)

    col = pycrates.create_vector_column('EQPOS', ['RA', 'DEC'])
    col.desc = 'The master hull vertices'
    col.unit = 'degree'
    col.values = eqpos_out
    cr.add_column(col)

    cr.write(outfile)
    print("Created: {}".format(outfile))

    outfile = os.path.join(outdir,
                           'qa.{:03d}.v{:03d}.reg'.format(ctr,
                                                          revision))
    with open(outfile, 'w') as ofh:
        ds9_header(ofh, color=color)

        for i, cel in enumerate(eqpos):

            npts = nvertex[i]
            ostr = ds9_shape(cel[:, :npts])
            ostr += ' # text={{Id={} {}}}\n'.format(ctr,
                                                    i + 1)
            ofh.write(ostr)

    print("Created: {}".format(outfile))
def write_hulls(ensemble, outfile, hullcpts, hullareas, outlines,
                hull_store,
                svdqafile,
                centroidfile,
                stacks,
                compzero=0,
                revision=1,
                creator=None):
    """Create the "CHS mst3" file.

    Parameters
    ----------
    ensemble : string
        The ensemble value, written to the header as the ENSEMBLE
        keyword.
    outfile : string
        This file is overwritten if it exists.
    hullcpts : sequence of (stack, cpt) sequences
        The stack-level hulls that form each master hull.
    hullareas : dict
        The stack hull areas, in arcsec^2, where the key is the
        pair (stack, component) and the value is the area.
    outlines : list of dict
        The corresponding outline (can be an error state).
    hull_store : dict
        The keys are (stack, component) and the values are the
        stack-level data read from the mrgsrc3 file.
    svdqafile : str
        The name of the file containing the stack ids that went to
        SVD QA. The first column of this file is used as the stack
        id. The full path is written to the SVDQAFIL
        header keyword.
    centroidfile : str
        The name of the file containing the stack, cpt,
        include_centroid information (a partial list). Used to set
        up the INCLUDE_IN_CENTROID column. It is stored in the
        CENFILE header keyword.
    stacks : list of str
        The stacks that form this ensemble (whether or not they
        have a stack-level convex hull). This is used to create
        the STKIDNUM and STKIDxxx header values in the output file.
        This list is sorted by the routine (i.e. the order of input
        is not guaranteed).
    compzero : int, optional
        The value of the COMPONENT column in the HULLMATCH block
        for a stack-level hull which has a component value of 0
        (from the mrgsrc3 MEXTSRC block); note that there are no
        such component values since they are 1 based. Must be >= 0.
    revision : int
        The value to write out to the header as the CHSVER
        keyword.
    creator : None or str, optional
        The name to use for the CREATOR field in the header.
    """

    assert len(hullcpts) == len(outlines), \
        'len = {} vs {}'.format(len(hullcpts), len(outlines))

    header = {'compzero': compzero}
    header['stacks'] = sorted(list(stacks))

    svdqafile = os.path.abspath(svdqafile)
    header['svdqafile'] = svdqafile
    svdqas = utils.read_svdqafile(svdqafile)

    cenfile = os.path.abspath(centroidfile)
    header['centroidfile'] = cenfile
    exclude_cens = utils.read_centroidfile(cenfile)

    # What data do we want to store?
    mid = []
    nhulls = []
    stks = []
    cpts = []
    mtypes = []
    areas = []
    ebands = []
    lhoods = []
    mancodes = []
    revnums = []

    incl_centroids = []
    stksvdqas = []

    for i, hcpts in enumerate(hullcpts):

        m = i + 1
        nh = len(hcpts)

        # sort to try and make the file easier to scan by the user
        for key in sorted(hcpts):
            assert key in hull_store

            stack, cpt = key

            stored = hull_store[key]

            mid.append(m)
            nhulls.append(nh)
            stks.append(stack)
            cpts.append(cpt)
            mtypes.append('Unambiguous')
            areas.append(hullareas[key])
            ebands.append(stored['eband'])
            lhoods.append(stored['likelihood'])

            # would like to keep as a bit array
            mancodes.append(stored['man_code'])

            revnums.append(stored['mrgsrc3rev'])

            # These are case-sensitive checks
            incl_centroids.append(key not in exclude_cens)
            stksvdqas.append(stack in svdqas)

    hullmatch = {}
    hullmatch['master_id'] = mid
    hullmatch['nhulls'] = nhulls
    hullmatch['stackid'] = stks
    hullmatch['component'] = cpts
    hullmatch['match_type'] = mtypes
    hullmatch['area'] = areas
    hullmatch['eband'] = ebands
    hullmatch['likelihood'] = lhoods
    hullmatch['man_code'] = mancodes
    hullmatch['mrg3rev'] = revnums
    hullmatch['include_in_centroid'] = incl_centroids
    hullmatch['stksvdqa'] = stksvdqas

    # What is the maximum number of points in a hull?
    #
    nmax = 0
    for outline in outlines:
        if chs_status.is_qa(outline['status']):
            continue

        eqpos = outline['eqpos']
        nmax = max(nmax, eqpos.shape[1])

    # It is okay for nmax=0 - if all master hulls are QA cases - so
    # only want to warn if we have an unusually-small polygon.
    #
    if nmax < 3:
        if nmax > 0:
            print("WARNING: max number of vertices in a " +
                  "hull={}".format(nmax))
        nmax = 3

    # Some of these columns are a single value, but put in here
    # so we can see the proposed file structure, and to make
    # downstream processing a bit easier (i.e. not having to worry
    # about whether a file exists).
    #
    mid = []
    status = []
    base_stack = []
    man_reg = []
    man_match = []

    nhulls = len(outlines)

    # I have moved this logic into create_mhull_file
    # eqpos = np.full((nhulls, 2, nmax), np.nan,
    #                 dtype=np.float64)
    eqpos = []

    # Ugh: loop getting messy
    nstkhull = [len(c) for c in hullcpts]
    nvertex = []

    for i, outline in enumerate(outlines):

        # TODO: could calculate the number of stacks that contribute/
        #       number with no data
        mid.append(i + 1)
        status.append(outline['status'])

        man_reg.append(False)
        man_match.append(False)

        bs = outline['base_stack']
        if bs is None:
            bs = "NONE"

        base_stack.append(bs)

        if chs_status.is_qa(outline['status']):
            nvertex.append(0)
            eqpos.append(None)

            print("Skipping data for " +
                  "Match_Id={} status={}".format(i + 1,
                                                 outline['status']))
        else:
            vs = outline['eqpos']
            assert vs.ndim == 2
            assert vs.shape[0] == 2
            npts = vs.shape[1]

            nvertex.append(npts)
            # eqpos[i, :, :npts] = vs
            eqpos.append(vs)

    hulllist = {}
    hulllist['master_id'] = mid
    hulllist['status'] = status
    hulllist['base_stk'] = base_stack
    hulllist['manmatch'] = man_match
    hulllist['manreg'] = man_reg
    hulllist['nvertex'] = nvertex
    hulllist['nstkhull'] = nstkhull
    hulllist['eqpos'] = eqpos

    utils.create_mhull_file(ensemble, revision, outfile,
                            hullmatch, hulllist, header,
                            creator=creator)