def draw_hulls_and_images(master_hull, stackhulls, hullmap, stkevt3dir, xmdat3dir, outdir, ensemble, ensemblemap, revision, qahulls=None, evtscale='log10', master_color='gold', qa_color='cyan', axscale=0.5, show_other_stack_hulls=False, colorbar=True): """Draw individidual hull+image for all members of an ensemble. An image is created per stack-hull; this means that if there are 2 hulls from the same stack then there will be 2 images. This is a change to the original implementation, party necessitated by the desire to show the correct band. The display is limited to a "zoomed" in section around the master hull (controlled by the axscale). Other hulls from the stack are drawn, but will only be visible if they fall in this area. Parameters ---------- master_hull: dict Contains the master hull: fields are 'master_id', 'status', and 'eqpos'. The 'status' field should be one of: 'todo', 'okay', 'qa[-...]'. stackhulls : list What stack-level hulls form this master hull? Each entry is a dictionary with the keys 'stack' and 'component'. hullmap : dict The stack-level hull data, stored by the stack id. The values are those returned by read_hulls_from_mrgsrc3. stkevt3dir : str The location of the stkevt3 files. xmdat3dir : str The location of the xmdat3 files (per stack, optional). outdir : str The output directory, which must exist. ensemble : str The ensemble value. ensemblemap : dict The keys are stack ids, and the values are the STKIDxxx value (i.e. the integer value of xxx). revision : int The revision number of the file. qahulls : None or list of dict, optional This is only used if master_hull['status'] is set to 'qa[-...]'. Each entry represents a hull, and has the 'eqpos' field which contains the polygon. evtscale : {'none', 'log10', 'log', 'sqrt'}, optional The scaling to apply to the event data before displaying it. master_color : str, optional The color for the master hull. qa_color : str, optional The color for any QA hulls. axscale : float, optional The additional space around the hull, as a fraction of the width/height of the hull. This value refers to the delta added to each side (so twice this is added overall). Note that the space added will be more than this fraction for the "smaller" side of the hull, as the plot aspect ratio is maintained. show_other_stack_hulls : bool, optional Should the hulls from other stacks be drawn on the image? colorbar : bool, optional Should each image have a color bar? Notes ----- For the initial revision we do not need to worry about Match_Type being ambiguous (this is the 'match_type' keyword of each entry in stackhulls). If we re-run after some master-match work then it may be necessary to disambiguate visually. """ if not os.path.isdir(outdir): raise ValueError("outdir={} is not a directory".format(outdir)) masterid = master_hull['master_id'] # It is easier if we can map the stack hull data via the # key: stack, cpt. Actually, the following code is very unclear # about data ownership, and really could do with a clean up. # # It is also useful to know what stacks we have. # stacks = set() for shull in stackhulls: stacks.add(shull['stack']) stacks = sorted(list(stacks)) # How many hulls are there in this master hull? nhulls = len(stackhulls) nstacks = len(stacks) # Extract all the hulls from the stacks we are interested in, # whether they are in related to the given master hull or not. # hulldata = {} for stack, hulls in six.iteritems(hullmap): if stack not in stacks: continue for hull in hulls: assert hull['stack'] == stack key = stack, hull['component'] assert key not in hulldata hulldata[key] = hull # Do we have any xmdat3 files for these stacks? We want to # store the None to mark stacks as having no data. # xmdat3map = {} for stack in stacks: xmdat3map[stack] = read_xmdat3(xmdat3dir, stack) # Need limits for determining how much of the event file # to read in. # # Technically the QA and master hulls should be included here, but # they should all, by definition, be no larger than the # stack-level hulls. # data_lims = find_hull_limits(stackhulls, hulldata, axscale=axscale) sfilt = data_lims['dmfilter'] # Find the event files for the stacks. Note that we check # all files and then report errors at the end (so you don't # have to download a file, run, download a file, run, ... # to get them all). Of course, if there are multiple hulls # in the ensemble this only handles one of them. # evtfiles = {} failed = [] for stack in stacks: path = os.path.join(stkevt3dir, '{}*_evt3.fits*'.format(stack)) try: evtfiles[stack] = utils.find_single_match(path) except IOError as exc: failed.append((stack, str(exc))) if len(failed) > 0: emsg = "\n".join([f[1] for f in failed]) raise IOError("The following event file(s) are " + "missing:\n{}".format(emsg)) # Could go for a more-adaptive scheme # # Note that we special-case ensemble 0019200_001, which has # ~ 35 stack-level hulls, and is also known as Cas A, to # only have a 2 by 2 grid in the hope this saves enough # memory. It isn't, because there are multiple evt files # whose size is >~ 4 Gb for this particular source. # if ensemble == 'ens0019200_001': print('Note: special case Cas A to 2 by 2 grid') nsize = 2 nplots = nsize * nsize npages = np.ceil(nhulls / (nplots * 1.0)).astype(np.int) elif nhulls <= 9: nsize = np.ceil(np.sqrt(nhulls)).astype(np.int) nplots = nsize * nsize npages = 1 else: nsize = 3 nplots = nsize * nsize npages = np.ceil(nhulls / (nplots * 1.0)).astype(np.int) def save_plot(escale, pagenum): """Create PNG output then destroy the window""" fmt = 'hull.{}.{:03d}.p{:03d}.v{:03d}.{}.png' outfile = fmt.format(ensemble, masterid, pagenum, revision, escale) outfile = os.path.join(outdir, outfile) # Hide any warnings about square root of a negative number. # oldsettings = np.seterr(all='ignore') try: plt.savefig(outfile) finally: np.seterr(**oldsettings) print("Created: {}".format(outfile)) plt.close() title = "{} {:03d}".format(ensemble, masterid) + \ " #stacks={} #hulls= {}".format(nstacks, nhulls) qatitle = None if chs_status.is_qa(master_hull['status']): qatitle = master_hull['status'].upper() page_idx = 0 # Are these correct? efilts = { 'w': '', 'b': '[energy=500:7000]', 'u': '[energy=300:500]', 's': '[energy=500:1200]', 'm': '[energy=1200:2000]', 'h': '[energy=2000:7000]' } # Work out the event file + VFS stacks to use before reading # anything else, since this lets us cache the result, which may # help out with really-large datasets (but only if the # band matches). # # TODO: look at this # Note that this cache is fairly pointless as we repeat this # code several times (for different scalings), when we should # perhaps just change the scaling here (to avoid re-creating # everything). # # key = (stack, cpt), value = file name incl VFS evt_name = {} # keys = file name incl VFS, values = number of times evt_count = defaultdict(int) for hull_idx, shull in enumerate(stackhulls): stack = shull['stack'] cpt = shull['component'] key = stack, cpt evtfile = evtfiles[stack] # What band to use? # bname = hulldata[key]['eband'] if bname == 'w': bspec = '[bin sky=::32]' else: bspec = '[bin sky=::8]' efilt = efilts[bname] iname = evtfile + efilt + sfilt + bspec evt_name[key] = iname evt_count[iname] += 1 evt_cache = {} fig = None axplot = None for hull_idx, shull in enumerate(stackhulls): r = add_component_image(nhulls, nsize, npages, title, qatitle, fig, axplot, page_idx, hull_idx, nplots, evtscale, shull, hulldata, xmdat3map, hullmap, ensemblemap, stacks, master_hull, qahulls, save_plot, evt_name, evt_count, evt_cache, master_color=master_color, qa_color=qa_color, show_other_stack_hulls=show_other_stack_hulls, colorbar=colorbar) fig, axplot, page_idx = r # Don't forget to save the last page. save_plot(evtscale, page_idx) assert page_idx == npages return {'npages': npages}
def update_review_products(chsfile, outdir, mrgsrc3dir, stkevt3dir, stkfov3dir, xmdat3dir): """Update the review products. Parameters ---------- chsfile : str The FITS file containing the master hull data. outdir : str The output directory, which must exist. mrgsrc3dir, stkevt3dir, stkfov3dir, xmdat3dir : str The directory names containing the mrgsrc3, evt3, fov3, and xmdat3 files for the stacks. The names must match <stack>*<type>.fits[.gz] and there can only be one per stack per type. The xmdat3 files are optional and are stored as <stack>/<stack>N000_xmdat3.fits. Notes ----- Should the "context" plot - i.e. all hulls from an ensemble - also include the current master hulls? I worry that it will make it harder to see the details, but let's you see at a glance if there are potential issues. """ # Since we are assumed to be running on updated files, the output # directory should already exist. # if not os.path.isdir(outdir): sys.stderr.write("ERROR: outdir does not exist ") sys.stderr.write("{}\n".format(outdir)) sys.exit(1) chsdir = os.path.dirname(chsfile) hullmatch, hulllist, metadata = utils.read_master_hulls( chsfile, mrgsrc3dir) ensemble = metadata['ensemble'] ensemblemap = metadata['ensemblemap'] revision = metadata['revision'] filename = 'field.{}.v{:03d}.png'.format(ensemble, revision) ctxfile = os.path.join(outdir, filename) if os.path.exists(ctxfile): sys.stderr.write("ERROR: context image already exists ") sys.stderr.write("{}\n".format(ctxfile)) sys.exit(1) mids = sorted(hulllist.keys()) # What stacks do we care about (those with hulls) stacks = set([]) # ncpts is the number of stack hulls in the ensemble ncpts = 0 for stkhulls in hullmatch.values(): for stkhull in stkhulls: stacks.add(stkhull['stack']) ncpts += 1 assert len(stacks) > 0 # It might be more useful to sort the stacks by "number of fov # files" say, or some other criteria. # stacks = list(stacks) stacks = sorted(stacks) # Force a check that we can find these files before any # processing. # fov3files = [utils.find_stkfov3(s, stkfov3dir) for s in stacks] # Read in all the QAs, labelled by mid. # qas = {} for mid in mids: src = hulllist[mid] if chs_status.is_qa(src['status']): qas[mid] = read_qa_hulls(chsdir, revision, src['master_id']) # Ideally I would remove hullmap, and use hullmatch directly, # but the format is slightly different, so recreate hullmap # (it used to be formed by calling read_hulls_from_mrgsrc3 # but this is now part of read_master_hulls). # hullmap = defaultdict(list) for masterhull in hullmatch.values(): for hull in masterhull: stack = hull['stack'] hullmap[stack].append(hull) # Context image: FOV + all the hulls # hulls = [hullmap[s] for s in stacks] plots.draw_ensemble_outline(ensemble, hulllist, hulls, qas, fov3files) plt.savefig(ctxfile) print("Created: {}".format(ctxfile)) # Per-master hulls # for mid in mids: src = hulllist[mid] try: qahulls = qas[mid] except KeyError: qahulls = None # a lot of repeated work to support different scalings, # but not worth the complexity of avoiding this for evtscale in ['log10', 'sqrt', 'none']: plots.draw_hulls_and_images(src, hullmatch[mid], hullmap, stkevt3dir, xmdat3dir, outdir, ensemble, ensemblemap, revision, evtscale=evtscale, qahulls=qahulls)
def draw_ensemble_outline(ensemble, mhulls, hulls, qas, fov3files, qa_color='cyan'): """Draw up the FOVs for the ensembles and the different stack-level hulls. Parameters ---------- ensemble : str The ensemble name; it is used for the plot title and error messages. mhulls : dict The master hulls - the second argument of utils.read_master_hulls - which is a dictionary keyed by the master id and the values give the master-hull details. hulls : list of list of stack hulls The stack-level hulls; each entry in the list is the output of read_hulls_from_mrgsrc3 qas : None or dict If not None then the QA hulls for this ensemble, where the key is the master id and the value is the return value of chs_create_review_images_mpl:read_qa_hulls. fov3files : list of str The FOV3 files for the stacks in the ensemble. This is assumed to be the FOV files for those stacks containing hulls (i.e. they act as a reasonable bounding box for the display). qa_color : str, optional The color for any QA hulls and labels. """ # Use the first transform we find as the base. # tr = None for hlist in hulls: for hull in hlist: tr = hull['transform'] break if tr is None: raise IOError("No hulls found for " + "ensemble: {}".format(ensemble)) wcs = tr2wcs(tr) fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(1, 1, 1, projection=wcs) ax_trans = ax.get_transform('world') # We want a different color for each stack, but note that there # are multiple polygons per stack, so we can not just rely on # matplotlib's automatic cycling of colors. # # Use the default matplotlib set of colors for cycling. # colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] _nextcolor = cycler(color=colors)() def nextcolor(): # return _nextcolor.next()['color'] return next(_nextcolor)['color'] for fovfile in fov3files: # assume everything is a polygon cr = pycrates.read_file(fovfile) color = nextcolor() for eqpos in cr.get_column('EQPOS').values: ra = eqpos[0] dec = eqpos[1] idx = np.isfinite(ra) ra = ra[idx] dec = dec[idx] plt.plot(ra, dec, color=color, alpha=0.07, transform=ax_trans) # Draw these somewhat transparent so that the numbering added # later can be seen (it can get busy). # # stack_color = 'orange' # main_color = 'gold' stack_color = 'black' main_color = 'black' for i, hlist in enumerate(hulls): for stkhull in hlist: if stkhull['mancode']: lstyle = 'dotted' else: lstyle = 'solid' # should this use chs_status.DELETE? if stkhull['match_type'] == "deleted": use_color = 'red' else: use_color = stack_color eqpos = stkhull['eqpos'] ra = eqpos[0] dec = eqpos[1] plt.plot(ra, dec, linewidth=1, linestyle=lstyle, alpha=0.6, transform=ax_trans, color=use_color) # Now the master hulls; these are going to overwrite the stack-level # hulls for the majority of cases (single stack hull), so use a # thinner line width, which is not ideal. # title_col = 'k' for mid, mhull in mhulls.items(): if chs_status.is_qa(mhull['status']): title_col = 'r' continue eqpos = mhull['eqpos'] ra = eqpos[0] dec = eqpos[1] plt.plot(ra, dec, linewidth=1, linestyle='solid', alpha=0.6, transform=ax_trans, color=main_color) label_hull(ax_trans, mhull, "{}".format(mid), main_color) # Do we have any qa hulls? # if qas is not None: for mid, qahulls in qas.items(): # We label each "qa component" # nqas = len(qahulls) for i, qahull in enumerate(qahulls): draw_hull(ax_trans, qahull, qa_color, 1, 'dashed') lbl = "QA {}".format(mid) if nqas > 1: lbl += " [{}]".format(i + 1) label_hull(ax_trans, qahull, lbl, qa_color) # Report on the number of stacks with hulls, not the number # in the ensemble. # lbl = "Ensemble {} #stacks={} #hulls={}".format(ensemble, len(fov3files), len(mhulls)) plt.title(lbl, fontsize=18, color=title_col) ax.coords['ra'].set_major_formatter('hh:mm:ss') ax.coords['dec'].set_major_formatter('dd:mm:ss') ax.set_aspect('equal')
def add_hulls(stack, cpt, hullmap, master_hull, qahulls, axes, master_color='gold', qa_color='cyan'): """Draw the hulls for this stack. draw the hulls for this stack draw the other hulls first, as reference (i.e. those from other stacks) NOTE: this draws on all hulls, so can be useful if nearby ones overlap. Parameters ---------- stack : str The stack name. cpt The component number for the hull. hullmap : dict The stack-level hull data, stored by the stack id. The values are those returned by read_hulls_from_mrgsrc3. master_hull: dict Contains the master hull: fields are 'master_id', 'status', and 'eqpos'. The 'status' field should be one of: 'todo', 'okay', 'qa[-...]'. qahulls : None or list of dict, optional This is only used if master_hull['status'] is set to 'qa[-...]'. Each entry represents a hull, and has the 'eqpos' field which contains the polygon. axes The plot axes. master_color : str, optional The color for the master hull. qa_color : str, optional The color for any QA hulls. """ ax_trans = axes.get_transform('world') # Draw the hull for this component in orange and the # others in the stack as a red-ish color. Trying to # match masterhull.js behavior. # for hull in hullmap[stack]: if hull['component'] == cpt: hullcol = 'orange' else: hullcol = '#cc3333' draw_hull(ax_trans, hull, hullcol, 2, 'solid') # NOTE: these are drawn thinner than the stack-level hulls so # they do not obscure them (for cases when the two contours # are the same or very similar). # if chs_status.is_qa(master_hull['status']): for qahull in qahulls: draw_hull(ax_trans, qahull, qa_color, 1, 'dashed') else: draw_hull(ax_trans, master_hull, master_color, 1, 'solid')
def create_review_products(chsfile, outdir, mrgsrc3dir, stkevt3dir, stkfov3dir, xmdat3dir, ignorestatus=False, ignorenvertex=False): """Create the review products. Parameters ---------- chsfile : str The FITS file containing the master hull data. outdir : str The output directory, which may be created by the routine. The last component *must* be the ensemble name. mrgsrc3dir, stkevt3dir, stkfov3dir, xmdat3dir : str The directory names containing the mrgsrc3, evt3, fov3, and xmdat3 files for the stacks. The names must match <stack>*<type>.fits[.gz] and there can only be one per stack per type. The xmdat3 files are optional and are stored as <stack>/<stack>N000_xmdat3.fits. ignorestatus : bool, optional If set, the STATUS value for each master hull is set to chs_utils.DONE, and a screen message is written out saying what the old value was. ignorenvertex : bool, optional If set, the NVERTEX value of the HULLLIST block is not used to filter the position array. Instead a manual check is used. This is because the output from Joe's code hasn't adjusted this value. Notes ----- Should the "context" plot - i.e. all hulls from an ensemble - also include the current master hulls? I worry that it will make it harder to see the details, but let's you see at a glance if there are potential issues. """ chsdir = os.path.dirname(chsfile) hullmatch, hulllist, metadata = utils.read_master_hulls(chsfile, mrgsrc3dir, ignorestatus=ignorestatus, ignorenvertex=ignorenvertex) ensemble = metadata['ensemble'] ensemblemap = metadata['ensemblemap'] revision = metadata['revision'] # NOTE: utils.save_master requires the "user directory" - i.e. the # parent of outdir - and this means that we assume that outdir # ends in the ensemble name. We # if os.path.basename(outdir) != ensemble: raise IOError("Expected outdir={} ".format(outdir) + "to end with {}".format(ensemble)) userdir = os.path.normpath(os.path.join(outdir, '..')) mids = sorted(hulllist.keys()) # What stacks do we care about (those with hulls) stacks = set([]) # ncpts is the number of stack hulls in the ensemble ncpts = 0 for stkhulls in hullmatch.values(): for stkhull in stkhulls: stacks.add(stkhull['stack']) ncpts += 1 assert len(stacks) > 0 # It might be more useful to sort the stacks by "number of fov # files" say, or some other criteria. # stacks = list(stacks) stacks = sorted(stacks) # Force a check that we can find these files before any # processing. # fov3files = [utils.find_stkfov3(s, stkfov3dir) for s in stacks] # Read in all the QAs, labelled by mid. # qas = {} for mid in mids: src = hulllist[mid] if chs_status.is_qa(src['status']): qas[mid] = read_qa_hulls(chsdir, revision, src['master_id']) # Ideally I would remove hullmap, and use hullmatch directly, # but the format is slightly different, so recreate hullmap # (it used to be formed by calling read_hulls_from_mrgsrc3 # but this is now part of read_master_hulls). # hullmap = defaultdict(list) for masterhull in hullmatch.values(): for hull in masterhull: stack = hull['stack'] hullmap[stack].append(hull) if not os.path.isdir(outdir): os.mkdir(outdir) # Context image: FOV + all the hulls # hulls = [hullmap[s] for s in stacks] plots.draw_ensemble_outline(ensemble, hulllist, hulls, qas, fov3files) filename = 'field.{}.v{:03d}.png'.format(ensemble, revision) outfile = os.path.join(outdir, filename) plt.savefig(outfile) print("Created: {}".format(outfile)) # Create the ensemble JSON file # revstr = "{:03d}".format(revision) ensdata = {'name': ensemble, 'revision': revstr, 'nmasters': len(mids), 'nstacks': len(stacks), 'ncpts': ncpts, 'stackmap': ensemblemap, 'status': chs_status.TODO, 'lastmodified': '', # could add date string here 'usernotes': '' } filename = utils.make_field_name_json(ensemble, revision) outfile = os.path.join(outdir, filename) open(outfile, 'w').write(json.dumps(ensdata)) print("Created: {}".format(outfile)) # Create the per-component JSON files # for stkhulls in hullmatch.values(): for stkhull in stkhulls: # Need to convert from NumPy booleans to Python ones # otherwise the serialization to JSON fails. # stkdata = {'lastmodified': '', 'stack': stkhull['stack'], 'component': stkhull['component'], 'key': stkhull['key'], 'ensemble': ensemble, 'revision': revstr, # Note: need an array for master ids (in case # of ambiguous links) 'master_id': [stkhull['master_id']], 'likelihood': stkhull['likelihood'], 'eband': stkhull['eband'], 'mrg3rev': stkhull['mrg3rev'], 'mancode': bool(stkhull['mancode']), 'stksvdqa': bool(stkhull['stksvdqa']), 'include_in_centroid': bool(stkhull['include_in_centroid'])} filename = utils.make_component_name_json(ensemble, stkhull['stack'], stkhull['component'], revision) outfile = os.path.join(outdir, filename) open(outfile, 'w').write(json.dumps(stkdata)) print("Created: {}".format(outfile)) # Per-master hulls # for mid in mids: src = hulllist[mid] try: qahulls = qas[mid] except KeyError: qahulls = None # a lot of repeated work to support different scalings, # but not worth the complexity of avoiding this for evtscale in ['log10', 'sqrt', 'none']: pinfo = plots.draw_hulls_and_images(src, hullmatch[mid], hullmap, stkevt3dir, xmdat3dir, outdir, ensemble, ensemblemap, revision, evtscale=evtscale, qahulls=qahulls) if chs_status.is_qa(src['status']): action = 'manual' else: action = '' ensdata = {'ensemble': ensemble, 'masterid': mid, 'revision': revstr, 'ncpts': len(hullmatch[mid]), 'npages': pinfo['npages'], 'useraction': action, 'usernotes': '' } outfile = utils.save_master(userdir, ensdata) print("Created: {}".format(outfile))
def make_merged_hull(hullcpts, mrgsrc3dir, acceptfrac=0.2, maxcount="cohorts", sigma=3, nsigma=5, tmpdir=None): """Create the merged hull. Parameters ---------- hullcpts : sequence of (stackid, component) The stack-level hulls to merge mrgsrc3dir : str The location of the mrgsrc3 files. acceptfrac : float, optional The fraction at which to draw the merged polygon (when multiple hulls are present). The value used is acceptfrac * n, where n is determined by maxcount. maxcount : {'cohorts', 'hulls'} Should n be the number of different cohorts in the list of overlapping hulls (maxcount='cohorts', or the number of convex hulls (maxcount='hulls'). This only makes a difference when there are multiple hulls from a single cohort in the list. nsigma, sigma : float The number of sigma (the box size) and the sigma, in pixels, of the gaussian used to smooth the image. If either is None then no smoothing is done. Note that the smoothing scale, sigma, is given in ACIS pixels, so is multiplied by 3.8 before being applied to HRC data (so that the physical scale being smoothed is similar). tmpdir : None or str The directory to use for temporary files. If None, the default is used (the Python tempfile isn't very explicit about what this default is, but it tends to be /tmp on Linux). Returns ------- outline : dict The outline of the merged hull. The keys are: status, eqpos, pos, base_stack. If status is not 'todo' then eqpos and pos can be None (status is 'error', in which case base_stack will also be None) or 3D shapes (i.e. npolygons, 2, npts), when status is 'qa-<reason>'. Notes ----- Information flow on failure (including unexpected or qa) could be better. """ if len(hullcpts) == 0: raise ValueError("hullcpts is empty") hulls = [] for stkid, cpt in hullcpts: hulls.append(read_hull(stkid, cpt, mrgsrc3dir)) # If there is only one hull then just promote the stack-level # data. # if len(hulls) == 1: h0 = hulls[0] outline = { 'status': 'todo', 'eqpos': h0['eqsrc'], 'pos': h0['pos'], 'base_stack': h0['stkid'] } return outline out, tr_stkid = merge_polygons(hulls, acceptfrac=0.2, maxcount="cohorts", sigma=3, nsigma=5, tmpdir=None) # ARGH: the keys are different to read_hull, which makes this # needlessly different, but not worth changing now. # if out['status'] == 'okay': outline = { 'status': 'todo', 'eqpos': out['hull_cel'], 'pos': out['hull_sky'], 'base_stack': tr_stkid } return outline print("WARNING: when calculating merged hull got") print("status= {}".format(out['status'])) print("reason= {}".format(out['reason'])) # Possible failures are # status=failed - may not have any data # status=error - returned unexpected data # status=qa... - there's a problem # if chs_status.is_qa(out['status']): outline = { 'status': out['status'], 'eqpos': out['hulls_cel'], 'pos': out['hulls_sky'], 'base_stack': tr_stkid } return outline # There could be some data to return here, but for now # error out everything. return {'status': 'error', 'eqpos': None, 'pos': None, 'base_stack': None}
def process_ensemble(ensemblefile, ensemble, outdir, mrgsrc3dir, svdqafile=None, centroidfile=None, compzero=0, revision=1, creator=None, master_color='green', stack_color='green', qa_color='green'): """Process the ensemble to create the CHS review products. Parameters ---------- ensemblefile : str The file should contain columns ensemble and stack, and is used to find what stacks to look for. ensemble : str The ensemble name. outdir : str The output directory, which will be created by the routine. mrgsrc3dir : str The directory name containing the mrgsrc3 files for the stacks. The names must match <stack>*mrgsrc3.fits* and there can only be one file per stack. svdqafile : str or None, optional If given then the name of the file containing the stack ids that went to SVD QA. The first column of this file is used as the stack id. The full path is written to the SVDQAFIL header keyword (or the string NONE if not given). If there is no such file then the STKSVDQA column is not written out. centroidfile : str or None, optional If given theh the name of the file containing the stack,cpt, include_centroid information (a partial list). Used to set up the INCLUDE_IN_CENTROID column. compzero : int, optional The value of the COMPONENT column in the HULLMATCH block for a stack-level hull which has a component value of 0 (from the mrgsrc3 MEXTSRC block); note that there are no such component values since they are 1 based. Must be >= 0. revision : int, optional The value to write out to the header as the CHSVER keyword. creator : str or None, optional If set it is used as the CREATOR keyword value in the output file. """ if compzero < 0: raise ValueError("compzero must be >= 0, sent {}".format(compzero)) if os.path.exists(outdir): raise IOError("The output directory already exists: {}".format(outdir)) # validate input before creating the output # stacks = find_ensemble_stacks(ensemblefile, ensemble) nstacks = len(stacks) # I need to pass around eband,mancode info to write_hulls and # I am too lazy to re-architect the code, so I am adding a # "global" dict which contains the stack-level information indexed # by (STACKID, COMPONENT), since that is used in write_hulls. # hull_store = {} transform_store = {} hulls = [] stacks_with_hulls = 0 for stack in stacks: shulls = read_hulls(stack, mrgsrc3dir) if shulls == []: continue hulls.extend(shulls) stacks_with_hulls += 1 for shull in shulls: key = (shull['stack'], shull['component']) assert key not in hull_store hull_store[key] = shull # Add in the stack transform transform_store[stack] = shulls[0]['transform'] if hulls == []: print("No hulls were found in ensemble {}".format(ensemble)) no_hulls(outdir) return os.mkdir(outdir) nhulls_stack = len(hulls) print("There are {} stack-level hulls ".format(nhulls_stack) + "in {} stacks".format(stacks_with_hulls)) if stacks_with_hulls != nstacks: print("Total number of stacks: {}".format(nstacks)) # Dump the stack-level hulls as a DS9 region file and extract # the hull areas. hull_areas = {} for hull in hulls: write_stack_hull_as_ds9(hull, outdir, revision=revision, color=stack_color) hull_areas[hull['stack'], hull['component']] = hull['area'] print("Created stack-level region files in {}".format(outdir)) # What hulls overlap and what don't? # overlap_gr, singles = identify.find_overlap_graph(hulls) overlaps = identify.get_nodes(overlap_gr) # Order the overlaps so that # a) longest first - that is the most (stack,cpt) pairs is first # b) order by (stack,cpt) within an overlap # # This also changes the overlap from a set to a list. # # Using -len ensures that we go from longest to shortest. # overlaps = sorted([sorted(list(overlap)) for overlap in overlaps], key=lambda x: (-len(x), x)) master_hulls = [] # Overlap hulls # seen = set([]) for overlap in overlaps: master_hulls.append([(stack, cpt) for stack, cpt in overlap]) for k in overlap: assert k not in seen, \ 'repeat occurence of {}'.format(k) seen.add(k) # Single hulls # for key in singles: master_hulls.append([key]) assert key not in seen, \ 'repeat occurence of {}'.format(key) seen.add(key) nfound = len(seen) assert nhulls_stack == nfound, 'Lost (or gained) a hull' noverlap = len(overlaps) nsingle = len(singles) print("Found {} overlap and {} single hulls".format(noverlap, nsingle)) # Create the initial version of the master hulls and dump out # any QA cases here. # # The master hulls are created, then a check is made to # ensure they do not overlap (marking them as qa cases if # they do), then they are written out. # outlines = [] for hullcpts in master_hulls: outline = merge.make_merged_hull(hullcpts, mrgsrc3dir) outlines.append(outline) noverlap = 0 for m1, m2 in itertools.combinations(outlines, 2): if merge.do_masters_overlap(m1, m2, transform_store): noverlap += 1 if noverlap > 0: print("NOTE: found {} master overlaps -> QA".format(noverlap)) for i, outline in enumerate(outlines): if chs_status.is_qa(outline['status']): dump_qa(ensemble, outdir, i + 1, outline, creator=creator, revision=revision, color=qa_color) else: if outline['status'] != chs_status.TODO: print("NOTE: status = {}".format(outline['status'])) outfile = os.path.join(outdir, 'master.{}.v{:03d}.reg'.format(i + 1, revision)) with open(outfile, 'w') as ofh: ds9_header(ofh, color=master_color) ostr = ds9_shape(outline['eqpos']) ostr += ' # text={{Master_Id={}}}\n'.format(i + 1) ofh.write(ostr) print("Created: {}".format(outfile)) # Write out the "cmst3" file, as a FITS file. # filename = utils.make_mhull_name(ensemble, revision) outfile = os.path.join(outdir, filename) write_hulls(ensemble, outfile, master_hulls, hull_areas, outlines, hull_store, svdqafile, centroidfile, stacks, compzero=compzero, revision=revision, creator=creator)
def dump_qa(ensemble, outdir, ctr, outline, creator=None, revision=1, color='green'): """Dump the QA data to a region-like FITS file The output files are qa.<ctr>.v<version>.[fits|reg], where the integers are written as 3-character, zero-padded values. Parameters ---------- ensemble : string The ensemble value, written to the header as the ENSEMBLE keyword. outdir : string This output directory ctr : int Used for the file name and added to the header as the HULLCPT keyword. outline : dict The hull data to write out. creator : None or str, optional The name to use for the CREATOR field in the header. revision : int The value to write out to the header as the CHSVER keyword. color : str The color for the DS9 region files. Notes ------ I'd like to add a transform to POS but it is a little-bit involved and I don't have time to dig up the code, so just add an explicit column instead of a virtual one. It should be the case that the base stack is the same for each row, but leave as a column rather than move to a header keyword for now. """ assert chs_status.is_qa(outline['status']) assert outline['eqpos'] is not None assert outline['pos'] is not None assert outline['base_stack'] is not None idx = outline['status'].find('-') if idx == -1: reason = "unknown" else: reason = outline['status'][idx + 1:] outfile = os.path.join(outdir, 'qa.{:03d}.v{:03d}.fits'.format(ctr, revision)) cr = pycrates.TABLECrate() cr.name = 'QACASE' utils.add_standard_header(cr, creator=creator, revision=revision) utils.add_header(cr, [('ENSEMBLE', ensemble, 'The ensemble'), ('HULLCPT', ctr, 'The Master_Id of the hull'), ('QREASON', reason, 'Why is this a QA case?')]) eqpos = outline['eqpos'] pos = outline['pos'] assert len(pos) == len(eqpos) ncpts = len(pos) utils.add_col(cr, 'COMPONENT', np.arange(1, ncpts + 1)) utils.add_col(cr, 'SHAPE', ['Polygon'] * ncpts) utils.add_col(cr, 'BASE_STK', [outline['base_stack']] * ncpts, desc='The stack used for SKY coord system, or NONE') nvertex = [] for poly in pos: x = poly[0] y = poly[1] xidx = np.isfinite(x) yidx = np.isfinite(y) assert (xidx == yidx).all() nvertex.append(xidx.sum()) utils.add_col(cr, 'NVERTEX', nvertex, desc='The number of vertexes in the closed hull') nmax = max(nvertex) pos_out = np.full((ncpts, 2, nmax), np.nan, dtype=np.float64) eqpos_out = np.full((ncpts, 2, nmax), np.nan, dtype=np.float64) for i, dvals in enumerate(zip(pos, eqpos)): npts = nvertex[i] phys, cel = dvals pos_out[i, :, :npts] = phys eqpos_out[i, :, :npts] = cel col = pycrates.create_vector_column('POS', ['X', 'Y']) col.desc = 'The master hull vertices' col.unit = 'pixel' col.values = pos_out cr.add_column(col) col = pycrates.create_vector_column('EQPOS', ['RA', 'DEC']) col.desc = 'The master hull vertices' col.unit = 'degree' col.values = eqpos_out cr.add_column(col) cr.write(outfile) print("Created: {}".format(outfile)) outfile = os.path.join(outdir, 'qa.{:03d}.v{:03d}.reg'.format(ctr, revision)) with open(outfile, 'w') as ofh: ds9_header(ofh, color=color) for i, cel in enumerate(eqpos): npts = nvertex[i] ostr = ds9_shape(cel[:, :npts]) ostr += ' # text={{Id={} {}}}\n'.format(ctr, i + 1) ofh.write(ostr) print("Created: {}".format(outfile))
def write_hulls(ensemble, outfile, hullcpts, hullareas, outlines, hull_store, svdqafile, centroidfile, stacks, compzero=0, revision=1, creator=None): """Create the "CHS mst3" file. Parameters ---------- ensemble : string The ensemble value, written to the header as the ENSEMBLE keyword. outfile : string This file is overwritten if it exists. hullcpts : sequence of (stack, cpt) sequences The stack-level hulls that form each master hull. hullareas : dict The stack hull areas, in arcsec^2, where the key is the pair (stack, component) and the value is the area. outlines : list of dict The corresponding outline (can be an error state). hull_store : dict The keys are (stack, component) and the values are the stack-level data read from the mrgsrc3 file. svdqafile : str The name of the file containing the stack ids that went to SVD QA. The first column of this file is used as the stack id. The full path is written to the SVDQAFIL header keyword. centroidfile : str The name of the file containing the stack, cpt, include_centroid information (a partial list). Used to set up the INCLUDE_IN_CENTROID column. It is stored in the CENFILE header keyword. stacks : list of str The stacks that form this ensemble (whether or not they have a stack-level convex hull). This is used to create the STKIDNUM and STKIDxxx header values in the output file. This list is sorted by the routine (i.e. the order of input is not guaranteed). compzero : int, optional The value of the COMPONENT column in the HULLMATCH block for a stack-level hull which has a component value of 0 (from the mrgsrc3 MEXTSRC block); note that there are no such component values since they are 1 based. Must be >= 0. revision : int The value to write out to the header as the CHSVER keyword. creator : None or str, optional The name to use for the CREATOR field in the header. """ assert len(hullcpts) == len(outlines), \ 'len = {} vs {}'.format(len(hullcpts), len(outlines)) header = {'compzero': compzero} header['stacks'] = sorted(list(stacks)) svdqafile = os.path.abspath(svdqafile) header['svdqafile'] = svdqafile svdqas = utils.read_svdqafile(svdqafile) cenfile = os.path.abspath(centroidfile) header['centroidfile'] = cenfile exclude_cens = utils.read_centroidfile(cenfile) # What data do we want to store? mid = [] nhulls = [] stks = [] cpts = [] mtypes = [] areas = [] ebands = [] lhoods = [] mancodes = [] revnums = [] incl_centroids = [] stksvdqas = [] for i, hcpts in enumerate(hullcpts): m = i + 1 nh = len(hcpts) # sort to try and make the file easier to scan by the user for key in sorted(hcpts): assert key in hull_store stack, cpt = key stored = hull_store[key] mid.append(m) nhulls.append(nh) stks.append(stack) cpts.append(cpt) mtypes.append('Unambiguous') areas.append(hullareas[key]) ebands.append(stored['eband']) lhoods.append(stored['likelihood']) # would like to keep as a bit array mancodes.append(stored['man_code']) revnums.append(stored['mrgsrc3rev']) # These are case-sensitive checks incl_centroids.append(key not in exclude_cens) stksvdqas.append(stack in svdqas) hullmatch = {} hullmatch['master_id'] = mid hullmatch['nhulls'] = nhulls hullmatch['stackid'] = stks hullmatch['component'] = cpts hullmatch['match_type'] = mtypes hullmatch['area'] = areas hullmatch['eband'] = ebands hullmatch['likelihood'] = lhoods hullmatch['man_code'] = mancodes hullmatch['mrg3rev'] = revnums hullmatch['include_in_centroid'] = incl_centroids hullmatch['stksvdqa'] = stksvdqas # What is the maximum number of points in a hull? # nmax = 0 for outline in outlines: if chs_status.is_qa(outline['status']): continue eqpos = outline['eqpos'] nmax = max(nmax, eqpos.shape[1]) # It is okay for nmax=0 - if all master hulls are QA cases - so # only want to warn if we have an unusually-small polygon. # if nmax < 3: if nmax > 0: print("WARNING: max number of vertices in a " + "hull={}".format(nmax)) nmax = 3 # Some of these columns are a single value, but put in here # so we can see the proposed file structure, and to make # downstream processing a bit easier (i.e. not having to worry # about whether a file exists). # mid = [] status = [] base_stack = [] man_reg = [] man_match = [] nhulls = len(outlines) # I have moved this logic into create_mhull_file # eqpos = np.full((nhulls, 2, nmax), np.nan, # dtype=np.float64) eqpos = [] # Ugh: loop getting messy nstkhull = [len(c) for c in hullcpts] nvertex = [] for i, outline in enumerate(outlines): # TODO: could calculate the number of stacks that contribute/ # number with no data mid.append(i + 1) status.append(outline['status']) man_reg.append(False) man_match.append(False) bs = outline['base_stack'] if bs is None: bs = "NONE" base_stack.append(bs) if chs_status.is_qa(outline['status']): nvertex.append(0) eqpos.append(None) print("Skipping data for " + "Match_Id={} status={}".format(i + 1, outline['status'])) else: vs = outline['eqpos'] assert vs.ndim == 2 assert vs.shape[0] == 2 npts = vs.shape[1] nvertex.append(npts) # eqpos[i, :, :npts] = vs eqpos.append(vs) hulllist = {} hulllist['master_id'] = mid hulllist['status'] = status hulllist['base_stk'] = base_stack hulllist['manmatch'] = man_match hulllist['manreg'] = man_reg hulllist['nvertex'] = nvertex hulllist['nstkhull'] = nstkhull hulllist['eqpos'] = eqpos utils.create_mhull_file(ensemble, revision, outfile, hullmatch, hulllist, header, creator=creator)