def _bin_column(self, weights, obs_name, err_name): from astropy.io.fits import Column data = self.data[obs_name] # spurious zero error replaced by the minimum error. For # GRAVITY it only seems that's for flagged data anyway... error = self.data[err_name] error[error == 0] = error[error != 0].min() datamask = self.FLAG | np.isnan(data) data = ma.masked_array(data, mask=datamask) inverror2 = ma.masked_array(error**-2, mask=datamask) wsum = ma.dot(inverror2, weights) dsum = ma.dot(data * inverror2, weights) new_data = dsum / wsum new_error = wsum**-0.5 nchan = new_data.shape[1] col = self.columns[obs_name] unit = col.unit new_fmt = f"{nchan}{col.format[-1]}" new_data = Column(obs_name, new_fmt, unit, array=new_data) new_error = Column(err_name, new_fmt, unit, array=new_error) return [new_data, new_error]
def toFits(self, fits): """Write to a FITS file Parameters ---------- fits : `astropy.io.fits.HDUList` Opened FITS file. """ from astropy.io.fits import BinTableHDU, Column maxLength = max( len(ff) for ff in self.fiberMags.keys()) if self.fiberMags else 1 header = astropyHeaderFromDict( {attr.upper(): getattr(self, attr) for attr in self._attributes}) header.update(TargetType.getFitsHeaders()) hdu = BinTableHDU.from_columns([ Column("filterName", "%dA" % maxLength, array=list(self.fiberMags.keys())), Column("fiberMag", "E", array=np.array(list(self.fiberMags.values()))), ], header=header, name="TARGET") fits.append(hdu)
def write_stdstar_model(norm_modelfile, normalizedFlux, wave, fibers, data, header=None): """Writes the normalized flux for the best model. """ hdr = fitsheader(header) hdr['EXTNAME'] = ('FLUX', 'erg/s/cm2/A') hdr['BUNIT'] = ('erg/s/cm2/A', 'Flux units') hdu1 = fits.PrimaryHDU(normalizedFlux, header=hdr) #fits.writeto(norm_modelfile,normalizedFlux,header=hdr, clobber=True) hdr['EXTNAME'] = ('WAVE', '[Angstroms]') hdr['BUNIT'] = ('Angstrom', 'Wavelength units') hdu2 = fits.ImageHDU(wave, header=hdr) hdr['EXTNAME'] = ('FIBERS', 'no dimension') hdu3 = fits.ImageHDU(fibers, header=hdr) hdr['EXTNAME'] = ('METADATA', 'no dimension') from astropy.io.fits import Column BESTMODELINDEX = Column(name='BESTMODELINDEX', format='K', array=data['BESTMODEL']) TEMPLATEID = Column(name='TEMPLATEID', format='K', array=data['TEMPLATEID']) CHI2DOF = Column(name='CHI2DOF', format='D', array=data['CHI2DOF']) cols = fits.ColDefs([BESTMODELINDEX, TEMPLATEID, CHI2DOF]) tbhdu = fits.BinTableHDU.from_columns(cols, header=hdr) hdulist = fits.HDUList([hdu1, hdu2, hdu3, tbhdu]) hdulist.writeto(norm_modelfile, clobber=True)
def beams_to_bintable(beams): """ Convert a list of beams to a CASA-style BinTableHDU """ c1 = Column(name='BMAJ', format='1E', array=[bm.major.to(u.arcsec).value for bm in beams], unit=u.arcsec.to_string('FITS')) c2 = Column(name='BMIN', format='1E', array=[bm.minor.to(u.arcsec).value for bm in beams], unit=u.arcsec.to_string('FITS')) c3 = Column(name='BPA', format='1E', array=[bm.pa.to(u.deg).value for bm in beams], unit=u.deg.to_string('FITS')) c4 = Column( name='CHAN', format='1J', array=[bm.meta['CHAN'] if 'CHAN' in bm.meta else 0 for bm in beams]) c5 = Column( name='POL', format='1J', array=[bm.meta['POL'] if 'POL' in bm.meta else 0 for bm in beams]) bmhdu = BinTableHDU.from_columns([c1, c2, c3, c4, c5]) bmhdu.header['EXTNAME'] = 'BEAMS' bmhdu.header['EXTVER'] = 1 bmhdu.header['XTENSION'] = 'BINTABLE' bmhdu.header['NCHAN'] = len(beams) bmhdu.header['NPOL'] = len(set([bm.meta['POL'] for bm in beams])) return bmhdu
def cat_m67(outfil=None,ZP_fil=None): from astropy.table import Table, Column, vstack # ZP file if ZP_fil == None: ZP_fil = 'Std/ZP_SA104.fits' zp_dat = Table.read(ZP_fil,format='fits') # Sex files m67_sex_files = glob.glob('Sex/sex_M67*.dat') # Loop flg=0 for ff in m67_sex_files: # Read Table #pdb.set_trace() dat = Table.read(ff,format='ascii.sextractor') ndat = len(dat) # Alter by ZeroPoint filt = ff[-5] idx = np.where( zp_dat['Filter'] == 'B')[0] ZPval = float( zp_dat['ZP'][idx] ) dat['MAG_BEST'] += ZPval pdb.set_trace() # Filter column afilt = ndat * [filt] fcolm = Column(name='FILTER',data=afilt) # Field column field = [ff[12:14]] afield = ndat * field fldcolm = Column(name='FIELD',data=afield) dat.add_columns([fcolm,fldcolm]) if flg == 0: all_dat = dat flg = 1 else: all_dat = vstack( [all_dat, dat] ) #pdb.set_trace() # Write if outfil == None: outfil='M67_catalog.fits' #pdb.set_trace() #hdu=fits.new_table(all_dat) all_dat.write(outfil, format='fits',overwrite=True)#, clobber=True) print 'cat_m67: Wrote ', outfil, ' with ', len(all_dat), ' entires' return
def _bin_helper(self, R): from astropy.io.fits import Column min, max = np.minimum, np.maximum # convert to double precision (rounding errors in weights...) wave = np.asarray(self.get_wave(shape='wavelength'), dtype=float) band = np.asarray(self.get_band(shape='wavelength'), dtype=float) winf, wsup = wave - band / 2, wave + band / 2 wmin, wmax = winf[0], wsup[-1] wmed = (wmin + wmax) / 2 R0 = wmed / ((wmax - wmin) / len(wave)) if R0 > R and len(wave) > 1: nwave = int(np.ceil(len(wave) * R / R0)) dw = (wmax - wmin) / nwave w1 = winf[0] + dw / 2 w2 = wsup[-1] - dw / 2 new_wave = np.linspace(w1, w2, nwave) new_winf = new_wave - dw / 2 new_wsup = new_wave + dw / 2 new_band = np.full((nwave, ), dw) weights = max( min(wsup, new_wsup[:, None]) - max(winf, new_winf[:, None]), 0) / band weights = weights.T oi_colnames = self._get_oi_colnames() fmt = f"{nwave}E" wave_col = Column('EFF_WAVE', fmt, 'm', array=new_wave) band_col = Column('EFF_BAND', fmt, 'm', array=new_band) other_cols = [c for c in self.columns if c.name not in oi_colnames] cols = [wave_col, band_col, *other_cols] new = self.from_columns(cols, header=self.header) else: new = self.copy() weights = None return new, weights
def table_to_bintablehdu(table, extname=None): """ Convert an astropy Table object to a BinTableHDU before writing to disk. Parameters ---------- table: astropy.table.Table instance the table to be converted to a BinTableHDU extname: str name to go in the EXTNAME field of the FITS header Returns ------- BinTableHDU """ add_header_to_table(table) array = table.as_array() header = table.meta['header'].copy() if extname: header['EXTNAME'] = (extname, 'added by AstroData') coldefs = [] for n, name in enumerate(array.dtype.names, 1): coldefs.append( Column(name=header.get('TTYPE{}'.format(n)), format=header.get('TFORM{}'.format(n)), unit=header.get('TUNIT{}'.format(n)), null=header.get('TNULL{}'.format(n)), bscale=header.get('TSCAL{}'.format(n)), bzero=header.get('TZERO{}'.format(n)), disp=header.get('TDISP{}'.format(n)), start=header.get('TBCOL{}'.format(n)), dim=header.get('TDIM{}'.format(n)), array=array[name])) return BinTableHDU(data=FITS_rec.from_columns(coldefs), header=header)
def _bin_helper(self, weights): from astropy.io.fits import Column if weights is None: return self.copy() colnames = self.columns.names obs_names = [n for n in self.get_observable_names() if n in colnames] err_names = [n for n in self.get_error_names() if n in colnames] oi_colnames = [*obs_names, *err_names, 'FLAG'] new_mask = np.dot(~self.FLAG, weights) < 1 new_cols = [] for obs_name, err_name in zip(obs_names, err_names): new_cols += self._bin_column(weights, obs_name, err_name) nchan = new_mask.shape[1] new_flag = Column('FLAG', f"{nchan}L", array=new_mask) new_cols.append(new_flag) new_cols += [c for c in self.columns if c.name not in oi_colnames] binned_hdu = self.from_columns(new_cols, header=self.header) return binned_hdu
def _writeImpl(self, fits): """Implementation for writing to FITS file Parameters ---------- fits : `astropy.io.fits.HDUList` List of FITS HDUs. This has a Primary HDU already, the header of which may be supplemented with additional keywords. """ from astropy.io.fits import BinTableHDU, Column fits.append(BinTableHDU.from_columns([ Column("wavelength", "D", array=self.wavelength), Column("flux", "D", array=self.flux), Column("mask", "K", array=self.mask), ], header=astropyHeaderFromDict(self.flags.toFitsHeader()), name="FLUXTBL")) self.target.toFits(fits)
def write_stdstar_models(norm_modelfile, normalizedFlux, wave, fibers, data, fibermap, input_frames, header=None): """Writes the normalized flux for the best models. Args: norm_modelfile : output file path normalizedFlux : 2D array of flux[nstdstars, nwave] wave : 1D array of wavelengths[nwave] in Angstroms fibers : 1D array of fiberids for these spectra data : meta data table about which templates best fit fibermap : fibermaps rows for the input standard stars input_frames : Table with NIGHT, EXPID, CAMERA of input frames used """ log = get_logger() hdr = fitsheader(header) add_dependencies(hdr) #- support input Table, np.array, and dict data = Table(data) hdr['EXTNAME'] = ('FLUX', '[10**-17 erg/(s cm2 Angstrom)]') hdr['BUNIT'] = ('10**-17 erg/(s cm2 Angstrom)', 'Flux units') hdu1=fits.PrimaryHDU(normalizedFlux.astype('f4'), header=hdr) hdu2 = fits.ImageHDU(wave.astype('f4')) hdu2.header['EXTNAME'] = ('WAVELENGTH', '[Angstrom]') hdu2.header['BUNIT'] = ('Angstrom', 'Wavelength units') hdu3 = fits.ImageHDU(fibers, name='FIBERS') # metadata from astropy.io.fits import Column cols=[] for k in data.colnames: if len(data[k].shape)==1 : cols.append(Column(name=k,format='D',array=data[k])) tbhdu=fits.BinTableHDU.from_columns(fits.ColDefs(cols), name='METADATA') hdulist=fits.HDUList([hdu1,hdu2,hdu3,tbhdu]) # add coefficients if "COEFF" in data.colnames: hdulist.append(fits.ImageHDU(data["COEFF"],name="COEFF")) fmhdu = table_to_hdu(Table(fibermap)) fmhdu.name = 'FIBERMAP' hdulist.append(fmhdu) inhdu = table_to_hdu(Table(input_frames)) inhdu.name = 'INPUT_FRAMES' hdulist.append(inhdu) t0 = time.time() tmpfile = norm_modelfile+".tmp" hdulist.writeto(tmpfile, overwrite=True, checksum=True) os.rename(tmpfile, norm_modelfile) duration = time.time() - t0 log.info(iotime.format('write', norm_modelfile, duration))
def write_stdstar_models(norm_modelfile, normalizedFlux, wave, fibers, data, header=None): """Writes the normalized flux for the best models. Args: norm_modelfile : output file path normalizedFlux : 2D array of flux[nstdstars, nwave] wave : 1D array of wavelengths[nwave] in Angstroms fibers : 1D array of fiberids for these spectra data : meta data table about which templates best fit; should include BESTMODEL, TEMPLATEID, CHI2DOF, REDSHIFT """ hdr = fitsheader(header) add_dependencies(hdr) hdr['EXTNAME'] = ('FLUX', 'erg/s/cm2/A') hdr['BUNIT'] = ('erg/s/cm2/A', 'Flux units') hdu1 = fits.PrimaryHDU(normalizedFlux.astype('f4'), header=hdr.copy()) hdr['EXTNAME'] = ('WAVELENGTH', '[Angstroms]') hdr['BUNIT'] = ('Angstrom', 'Wavelength units') hdu2 = fits.ImageHDU(wave.astype('f4'), header=hdr.copy()) hdr['EXTNAME'] = ('FIBERS', 'no dimension') hdu3 = fits.ImageHDU(fibers, header=hdr.copy()) hdr['EXTNAME'] = ('METADATA', 'no dimension') from astropy.io.fits import Column BESTMODEL = Column(name='BESTMODEL', format='K', array=data['BESTMODEL']) TEMPLATEID = Column(name='TEMPLATEID', format='K', array=data['TEMPLATEID']) CHI2DOF = Column(name='CHI2DOF', format='D', array=data['CHI2DOF']) REDSHIFT = Column(name='REDSHIFT', format='D', array=data['REDSHIFT']) cols = fits.ColDefs([BESTMODEL, TEMPLATEID, CHI2DOF, REDSHIFT]) tbhdu = fits.BinTableHDU.from_columns(cols, header=hdr) hdulist = fits.HDUList([hdu1, hdu2, hdu3, tbhdu]) tmpfile = norm_modelfile + ".tmp" hdulist.writeto(tmpfile, clobber=True, checksum=True) os.rename(tmpfile, norm_modelfile)
def toFits(self, fits): """Write to a FITS file Parameters ---------- fits : `astropy.io.fits.HDUList` Opened FITS file. """ from astropy.io.fits import BinTableHDU, Column identityLength = max(len(str(ident)) for ident in self.identity) hdu = BinTableHDU.from_columns([ Column("identity", "%dA" % identityLength, array=self.identity), Column("fiberId", "K", array=self.fiberId), Column("pfiNominal", "2D", array=self.pfiNominal), Column("pfiCenter", "2D", array=self.pfiCenter), ], name="OBSERVATIONS") fits.append(hdu)
def toFits(self, fits): """Write to a FITS file Parameters ---------- fits : `astropy.io.fits.HDUList` Opened FITS file. """ from astropy.io.fits import BinTableHDU, Column header = self.flags.toFitsHeader() hdu = BinTableHDU.from_columns([ Column("wavelength", "E", array=self.wavelength), Column("flux", "E", array=self.flux), Column("error", "E", array=self.error), Column("mask", "K", array=self.mask), ], header=astropyHeaderFromDict(header), name=self._hduName) fits.append(hdu)
def update_hdrtab(image, level, total_obj_list, input_exposures): """Build HAP entry table extension for product""" # Convert input_exposure filenames into HAP product filenames name_col = [] orig_tab = image['hdrtab'].data # get the name of the product so it can be selected from # the total_obj_list for updating update_filename = image[0].header['filename'] for tot_obj in total_obj_list: # Get the HAPProduct object for the input image to be updated # The '.find_member()' method looks for exposure, filter and # total level product. img_obj = tot_obj.find_member(update_filename) if img_obj is None: # Didn't find the input image in this total_obj instance, # try another... continue # if tot_obj.drizzle_filename != update_filename: # continue # Only for the total_obj_list entry that matches the input image # should we build the list of new rootnames for row in orig_tab: rootname = str(row['rootname']) # The rootname is ipppssoot, but the expname is only contains ipppssoo, # so remove the last character for the comparisons rootname = rootname[0:-1] for expname in input_exposures: if rootname in expname: # Convert input exposure names into HAP names for exposure in tot_obj.edp_list: if rootname in exposure.full_filename: name_col.append(exposure.product_basename) break hdrtab_cols = orig_tab.columns if name_col: # define new column with HAP expname max_len = min(max([len(name) for name in name_col]), 51) hapcol = Column(array=np.array(name_col, dtype=np.str), name=HAPCOLNAME, format='{}A'.format(max_len + 4)) newcol = fits.ColDefs([hapcol]) hdrtab_cols += newcol # define new extension haphdu = fits.BinTableHDU.from_columns(hdrtab_cols) haphdu.header['extname'] = 'HDRTAB' haphdu.header['extver'] = 1 # remove old extension del image['hdrtab'] # replace with new extension image.append(haphdu)
def wcs_to_asdftablehdu(wcs, extver=None): """ Serialize a gWCS object as a FITS TableHDU (ASCII) extension. The ASCII table is actually a mini ASDF file. The constituent AstroPy models must have associated ASDF "tags" that specify how to serialize them. In the event that serialization as pure ASCII fails (this should not happen), a binary table representation will be used as a fallback. """ # Create a small ASDF file in memory containing the WCS object # representation because there's no public API for generating only the # relevant YAML subsection and an ASDF file handles the "tags" properly. try: af = asdf.AsdfFile({"wcs": wcs}) except jsonschema.exceptions.ValidationError: # (The original traceback also gets printed here) raise TypeError( "Cannot serialize model(s) for 'WCS' extension {}".format(extver or '')) # ASDF can only dump YAML to a binary file object, so do that and read # the contents back from it for storage in a FITS extension: with BytesIO() as fd: with af: # Generate the YAML, dumping any binary arrays as text: af.write_to(fd, all_array_storage='inline') fd.seek(0) wcsbuf = fd.read() # Convert the bytes to readable lines of text for storage (falling back to # saving as binary in the unexpected event that this is not possible): try: wcsbuf = wcsbuf.decode('ascii').splitlines() except UnicodeDecodeError: # This should not happen, but if the ASDF contains binary data in # spite of the 'inline' option above, we have to dump the bytes to # a non-human-readable binary table rather than an ASCII one: LOGGER.warning("Could not convert WCS {} ASDF to ASCII; saving table " "as binary".format(extver or '')) hduclass = BinTableHDU fmt = 'B' wcsbuf = np.frombuffer(wcsbuf, dtype=np.uint8) else: hduclass = TableHDU fmt = 'A{0}'.format(max(len(line) for line in wcsbuf)) # Construct the FITS table extension: col = Column(name='gWCS', format=fmt, array=wcsbuf, ascii=hduclass is TableHDU) return hduclass.from_columns([col], name='WCS', ver=extver)
def slits_HDUtable(slit_pos, order): ''' create the BinaryHDU table for the output image and the slit image The fits binary table format for 64-bit floats is K ''' columns = [] columns.append(Column(name='spline_order', format='K', array=[order])) columns.append(Column(name='slitnum', format='K', array=[len(slit_pos)])) for i in range(0, len(slit_pos)): columns.append( Column(name='slit_%i_left_edge' % i, format='K', array=slit_pos[i][1])) columns.append( Column(name='slit_%i_right_edge' % i, format='K', array=slit_pos[i][2])) tbhdu = fits.BinTableHDU.from_columns(columns) return tbhdu
def write_stdstar_models(norm_modelfile, normalizedFlux, wave, fibers, data, header=None): """Writes the normalized flux for the best models. Args: norm_modelfile : output file path normalizedFlux : 2D array of flux[nstdstars, nwave] wave : 1D array of wavelengths[nwave] in Angstroms fibers : 1D array of fiberids for these spectra data : meta data table about which templates best fit """ hdr = fitsheader(header) add_dependencies(hdr) #- support input Table, np.array, and dict data = Table(data) hdr['EXTNAME'] = ('FLUX', '[10**-17 erg/(s cm2 Angstrom)]') hdr['BUNIT'] = ('10**-17 erg/(s cm2 Angstrom)', 'Flux units') hdu1 = fits.PrimaryHDU(normalizedFlux.astype('f4'), header=hdr) hdu2 = fits.ImageHDU(wave.astype('f4')) hdu2.header['EXTNAME'] = ('WAVELENGTH', '[Angstrom]') hdu2.header['BUNIT'] = ('Angstrom', 'Wavelength units') hdu3 = fits.ImageHDU(fibers, name='FIBERS') # metadata from astropy.io.fits import Column cols = [] for k in data.colnames: if len(data[k].shape) == 1: cols.append(Column(name=k, format='D', array=data[k])) tbhdu = fits.BinTableHDU.from_columns(fits.ColDefs(cols), name='METADATA') hdulist = fits.HDUList([hdu1, hdu2, hdu3, tbhdu]) # add coefficients if "COEFF" in data.colnames: hdulist.append(fits.ImageHDU(data["COEFF"], name="COEFF")) tmpfile = norm_modelfile + ".tmp" hdulist.writeto(tmpfile, overwrite=True, checksum=True) os.rename(tmpfile, norm_modelfile)
def update_hdrtab(image, level, total_obj_list, input_exposures): """Build HAP entry table extension for product""" # Convert input_exposure filenames into HAP product filenames name_col = [] orig_tab = image['hdrtab'].data for row in orig_tab: rootname = str(row['rootname']) # The rootname is ipppssoot, but the expname is only contains ipppssoo, # so remove the last character for the comparisons rootname = rootname[0:-1] for expname in input_exposures: if rootname in expname: if level == 1: # Intrepret inputs as exposures (FLT/FLC) filename not HAP names name_col.append(expname) else: # Convert input exposure names into HAP names foundit = False for tot_obj in total_obj_list: for exposure in tot_obj.edp_list: if rootname in exposure.full_filename: name_col.append(exposure.drizzle_filename) foundit = True break # define new column with HAP expname max_len = min(max([len(name) for name in name_col]), 51) hapcol = Column(array=np.array(name_col, dtype=np.str), name=HAPCOLNAME, format='{}A'.format(max_len + 4)) newcol = fits.ColDefs([hapcol]) # define new extension haphdu = fits.BinTableHDU.from_columns(orig_tab.columns + newcol) haphdu.header['extname'] = 'HDRTAB' haphdu.header['extver'] = 1 # remove old extension del image['hdrtab'] # replace with new extension image.append(haphdu)
def toFits(self, fits): """Write to a FITS file Parameters ---------- fits : `astropy.io.fits.HDUList` Opened FITS file. """ armLength = max(len(arm) for arm in self.arm) columns = [ Column("visit", "J", array=self.visit), Column("arm", f"{armLength}A", array=self.arm), Column("spectrograph", "J", array=self.spectrograph), Column("fiberId", "J", array=self.fiberId), Column("pfsDesignId", "K", array=self.pfsDesignId), Column("pfiNominal", "2E", array=self.pfiNominal), Column("pfiCenter", "2E", array=self.pfiCenter), ] hdu = BinTableHDU.from_columns(columns, name="OBSERVATIONS") fits.append(hdu)
vf.rename_column(oldnames[i], 'ferengi-13;a-{}'.format(i - 49)) for i in range(53, 59): vf.rename_column(oldnames[i], 'ferengi-14;a-{}'.format(i - 53)) for i in range(60, 63): vf.rename_column(oldnames[i], 'ferengi-15;a-{}'.format(i - 60)) for i in range(64, 66): vf.rename_column(oldnames[i], 'ferengi-16;a-{}'.format(i - 64)) for i in range(67, 69): vf.rename_column(oldnames[i], 'ferengi-17;a-{}'.format(i - 67)) users = list(set(data['user'])) strcolumn = np.array([' '] * len(users), dtype='S50') floatcolumn = np.zeros(len(users), dtype=float) c1 = Column(name='user', format='A50', array=strcolumn) c2 = Column(name='kappa', format='D', array=floatcolumn) c3 = Column(name='weight', format='D', array=floatcolumn) weightcols = pyfits.new_table([c1, c2, c3]) weight_table = pyfits.new_table(weightcols.columns) def get_kappa(usersanswer, allanswers, X, N_answers): kappa = [] for j in range(0, N_answers): if usersanswer == 'a-%i' % j: kappa.append(allanswers['ferengi-%i;a-%i' % (X, j)][0]) else: kappa.append(1 - allanswers['ferengi-%i;a-%i' % (X, j)][0]) return (sum(kappa) / len(kappa))
# The URLs to different versions of the subject images. # # Now set up the collated classification columns. For each question there is a number of classifications # as well as vote fractions for each possible answer. # Each question has a question number T (T00 to T16) # Each of those questions has a number of answers A. # In previous iterations the answer numbers were themselves unique but in CANDELS # they appear to start at A01 for each question number, so they're not. # print 'Creating columns for vote fractions...' intcolumn = map(int, subjinfo.magnitude - subjinfo.magnitude) floatcolumn = subjinfo.magnitude - subjinfo.magnitude c01 = Column(name='num_classifications', format='J', array=intcolumn) c02 = Column(name='t00_smooth_or_featured_a0_smooth_frac', format='D', array=floatcolumn) c03 = Column(name='t00_smooth_or_featured_a1_features_frac', format='D', array=floatcolumn) c04 = Column(name='t00_smooth_or_featured_a2_artifact_frac', format='D', array=floatcolumn) c05 = Column(name='t00_smooth_or_featured_count', format='J', array=intcolumn) c06 = Column(name='t01_how_rounded_a0_completely_frac', format='D', array=floatcolumn)
def api_search(request, query): import numpy as np from astropy.io import fits from astropy.io.fits import Column result = unsqurl(query) if result['returncode'] != '200': #return error return HttpResponse(result['error']) print('UNSqurl:', result) #run the query cursor = connections['cosmo'].cursor() cursor.execute(result['sql']) rows = cursor.fetchall() #return FITS file priheader = fits.Header() priheader['COMMENT'] = "This file was generated by the Cosmo web portal." prihdu = fits.PrimaryHDU(header=priheader) # this works if the fields all map directly with no arrays # for i in range (0, nrows): # for j in range (0, len(dtypes)): # data[i][dtypes[j][0]] = rows[i][j] nrows = len(rows) if result['table'] == "DEFAULT": # try building from columns data = { 'cand_id': [], 'brickid': [], 'objid': [], 'type': [], 'ra': [], 'ra_ivar': [], 'dec': [], 'dec_ivar': [], 'bx': [], 'by': [], 'bx0': [], 'by0': [], 'ebv': [], 'dchisq': [], 'fracDev': [], 'fracDev_ivar': [], 'shapeExp_r': [], 'shapeExp_r_ivar': [], 'shapeExp_e1': [], 'shapeExp_e1_ivar': [], 'shapeExp_e2': [], 'shapeExp_e2_ivar': [], 'shapeDev_r': [], 'shapeDev_r_ivar': [], 'shapeDev_e1': [], 'shapeDev_e1_ivar': [], 'shapeDev_e2': [], 'shapeDev_e2_ivar': [], 'decam_flux': [], 'decam_flux_ivar': [], 'decam_fracflux': [], 'decam_fracmasked': [], 'decam_fracin': [], 'decam_rchi2': [], 'decam_nobs': [], 'decam_anymask': [], 'decam_allmask': [], 'decam_mw_transmission': [], 'wise_flux': [], 'wise_flux_ivar': [], 'wise_fracflux': [], 'wise_rchi2': [], 'wise_nobs': [], 'wise_mw_transmission': [], 'decam_apflux': [], 'decam_apflux_resid': [], 'decam_apflux_ivar': [], } for i in range(0, nrows): data['cand_id'].append(rows[i][0]) data['brickid'].append(rows[i][3]) data['objid'].append(rows[i][4]) data['type'].append(rows[i][5]) data['ra'].append(rows[i][6]) data['ra_ivar'].append(rows[i][7]) data['dec'].append(rows[i][8]) data['dec_ivar'].append(rows[i][9]) data['bx'].append(rows[i][10]) data['by'].append(rows[i][11]) data['bx0'].append(rows[i][12]) data['by0'].append(rows[i][13]) data['ebv'].append(rows[i][14]) data['dchisq'].append( [rows[i][15], rows[i][16], rows[i][17], rows[i][18]]) data['fracDev'].append(rows[i][19]) data['fracDev_ivar'].append(rows[i][20]) data['shapeExp_r'].append(rows[i][21]) data['shapeExp_r_ivar'].append(rows[i][22]) data['shapeExp_e1'].append(rows[i][23]) data['shapeExp_e1_ivar'].append(rows[i][24]) data['shapeExp_e2'].append(rows[i][25]) data['shapeExp_e2_ivar'].append(rows[i][26]) data['shapeDev_r'].append(rows[i][27]) data['shapeDev_r_ivar'].append(rows[i][28]) data['shapeDev_e1'].append(rows[i][29]) data['shapeDev_e1_ivar'].append(rows[i][30]) data['shapeDev_e2'].append(rows[i][31]) data['shapeDev_e2_ivar'].append(rows[i][32]) data['decam_flux'].append([ rows[i][33], rows[i][34], rows[i][35], rows[i][36], rows[i][37], rows[i][38] ]) data['decam_flux_ivar'].append([ rows[i][39], rows[i][40], rows[i][41], rows[i][42], rows[i][43], rows[i][44] ]) data['decam_fracflux'].append([ rows[i][45], rows[i][46], rows[i][47], rows[i][48], rows[i][49], rows[i][50] ]) data['decam_fracmasked'].append([ rows[i][51], rows[i][52], rows[i][53], rows[i][54], rows[i][55], rows[i][56] ]) data['decam_fracin'].append([ rows[i][57], rows[i][58], rows[i][59], rows[i][60], rows[i][61], rows[i][62] ]) data['decam_rchi2'].append([ rows[i][63], rows[i][64], rows[i][65], rows[i][66], rows[i][67], rows[i][68] ]) data['decam_nobs'].append([ rows[i][69], rows[i][70], rows[i][71], rows[i][72], rows[i][73], rows[i][74] ]) data['decam_anymask'].append([ rows[i][75], rows[i][76], rows[i][77], rows[i][78], rows[i][79], rows[i][80] ]) data['decam_allmask'].append([ rows[i][81], rows[i][82], rows[i][83], rows[i][84], rows[i][85], rows[i][86] ]) data['decam_mw_transmission'].append([ rows[i][87], rows[i][88], rows[i][89], rows[i][90], rows[i][91], rows[i][92] ]) data['wise_flux'].append( [rows[i][93], rows[i][99], rows[i][105], rows[i][111]]) data['wise_flux_ivar'].append( [rows[i][94], rows[i][100], rows[i][106], rows[i][112]]) data['wise_fracflux'].append( [rows[i][95], rows[i][101], rows[i][107], rows[i][113]]) data['wise_rchi2'].append( [rows[i][96], rows[i][102], rows[i][108], rows[i][114]]) data['wise_nobs'].append( [rows[i][97], rows[i][103], rows[i][109], rows[i][115]]) data['wise_mw_transmission'].append( [rows[i][98], rows[i][104], rows[i][110], rows[i][116]]) data['decam_apflux'].append([ rows[i][118], rows[i][119], rows[i][120], rows[i][121], rows[i][122], rows[i][123], rows[i][124], rows[i][125], rows[i][142], rows[i][143], rows[i][144], rows[i][145], rows[i][146], rows[i][147], rows[i][148], rows[i][149], rows[i][166], rows[i][167], rows[i][168], rows[i][169], rows[i][170], rows[i][171], rows[i][172], rows[i][173], rows[i][190], rows[i][191], rows[i][192], rows[i][193], rows[i][194], rows[i][195], rows[i][196], rows[i][197], rows[i][214], rows[i][215], rows[i][216], rows[i][217], rows[i][218], rows[i][219], rows[i][220], rows[i][221], rows[i][238], rows[i][239], rows[i][240], rows[i][241], rows[i][242], rows[i][243], rows[i][244], rows[i][245] ]) data['decam_apflux_resid'].append([ rows[i][126], rows[i][127], rows[i][128], rows[i][129], rows[i][130], rows[i][131], rows[i][132], rows[i][133], rows[i][150], rows[i][151], rows[i][152], rows[i][153], rows[i][154], rows[i][155], rows[i][156], rows[i][157], rows[i][174], rows[i][175], rows[i][176], rows[i][177], rows[i][178], rows[i][179], rows[i][180], rows[i][181], rows[i][198], rows[i][199], rows[i][200], rows[i][201], rows[i][202], rows[i][203], rows[i][204], rows[i][205], rows[i][222], rows[i][223], rows[i][224], rows[i][225], rows[i][226], rows[i][227], rows[i][228], rows[i][229], rows[i][246], rows[i][247], rows[i][248], rows[i][249], rows[i][250], rows[i][251], rows[i][252], rows[i][253] ]) data['decam_apflux_ivar'].append([ rows[i][134], rows[i][135], rows[i][136], rows[i][137], rows[i][138], rows[i][139], rows[i][140], rows[i][141], rows[i][158], rows[i][159], rows[i][160], rows[i][161], rows[i][162], rows[i][163], rows[i][164], rows[i][165], rows[i][182], rows[i][183], rows[i][184], rows[i][185], rows[i][186], rows[i][187], rows[i][188], rows[i][189], rows[i][206], rows[i][207], rows[i][208], rows[i][209], rows[i][210], rows[i][211], rows[i][212], rows[i][213], rows[i][230], rows[i][231], rows[i][232], rows[i][233], rows[i][234], rows[i][235], rows[i][236], rows[i][237], rows[i][254], rows[i][255], rows[i][256], rows[i][257], rows[i][258], rows[i][259], rows[i][260], rows[i][261] ]) c0 = Column(name='cand_id', format='J', array=data['cand_id']) c1 = Column(name='brickid', format='J', array=data['brickid']) c2 = Column(name='objid', format='J', array=data['objid']) c3 = Column(name='type', format='10A', array=data['type']) c4 = Column(name='ra', format='D', array=data['ra']) c5 = Column(name='ra_ivar', format='E', array=data['ra_ivar']) c6 = Column(name='dec', format='D', array=data['dec']) c7 = Column(name='dec_ivar', format='E', array=data['dec_ivar']) c8 = Column(name='bx', format='D', array=data['bx']) c9 = Column(name='by', format='D', array=data['by']) c10 = Column(name='bx0', format='E', array=data['bx0']) c11 = Column(name='by0', format='E', array=data['by0']) c12 = Column(name='ebv', format='E', array=data['ebv']) c13 = Column(name='dchisq', format='4D', array=data['dchisq']) c14 = Column(name='fracDev', format='E', array=data['fracDev']) c15 = Column(name='fracDev_ivar', format='E', array=data['fracDev_ivar']) c16 = Column(name='shapeExp_r', format='E', array=data['shapeExp_r']) c17 = Column(name='shapeExp_r_ivar', format='E', array=data['shapeExp_r_ivar']) c18 = Column(name='shapeExp_e1', format='E', array=data['shapeExp_e1']) c19 = Column(name='shapeExp_e1_ivar', format='E', array=data['shapeExp_e1_ivar']) c20 = Column(name='shapeExp_e2', format='E', array=data['shapeExp_e2']) c21 = Column(name='shapeExp_e2_ivar', format='E', array=data['shapeExp_e2_ivar']) c22 = Column(name='shapeDev_r', format='E', array=data['shapeDev_r']) c23 = Column(name='shapeDev_r_ivar', format='E', array=data['shapeDev_r_ivar']) c24 = Column(name='shapeDev_e1', format='E', array=data['shapeDev_e1']) c25 = Column(name='shapeDev_e1_ivar', format='E', array=data['shapeDev_e1_ivar']) c26 = Column(name='shapeDev_e2', format='E', array=data['shapeDev_e2']) c27 = Column(name='shapeDev_e2_ivar', format='E', array=data['shapeDev_e2_ivar']) c28 = Column(name='decam_flux', format='6E', array=data['decam_flux']) c29 = Column(name='decam_flux_ivar', format='6E', array=data['decam_flux_ivar']) c30 = Column(name='decam_fracflux', format='6E', array=data['decam_fracflux']) c31 = Column(name='decam_fracmasked', format='6E', array=data['decam_fracmasked']) c32 = Column(name='decam_fracin', format='6E', array=data['decam_fracin']) c33 = Column(name='decam_rchi2', format='6E', array=data['decam_rchi2']) c34 = Column(name='decam_nobs', format='6I', array=data['decam_nobs']) c35 = Column(name='decam_anymask', format='6I', array=data['decam_anymask']) c36 = Column(name='decam_allmask', format='6I', array=data['decam_allmask']) c37 = Column(name='decam_mw_transmission', format='6E', array=data['decam_mw_transmission']) c38 = Column(name='wise_flux', format='4E', array=data['wise_flux']) c39 = Column(name='wise_flux_ivar', format='4E', array=data['wise_flux_ivar']) c40 = Column(name='wise_fracflux', format='4E', array=data['wise_fracflux']) c41 = Column(name='wise_rchi2', format='4E', array=data['wise_rchi2']) c42 = Column(name='wise_nobs', format='4I', array=data['wise_nobs']) c43 = Column(name='wise_mw_transmission', format='4E', array=data['wise_mw_transmission']) c44 = Column(name='decam_apflux', format='48E', array=data['decam_apflux']) c45 = Column(name='decam_apflux_resid', format='48E', array=data['decam_apflux_resid']) c46 = Column(name='decam_apflux_ivar', format='48E', array=data['decam_apflux_ivar']) hdu = fits.BinTableHDU.from_columns([ c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37, c38, c39, c40, c41, c42, c43, c44, c45, c46 ]) elif result['table'] == 'CANDIDATE': dtypes = [('brickid', 'i4'), ('objid', 'i4'), ('blob', 'i8'), ('type', 'S10'), ('ra', 'float64'), ('ra_ivar', 'float64'), ('dec', 'float64'), ('dec_ivar', 'float64'), ('bx', 'float64'), ('by', 'float64'), ('bx0', 'float64'), ('by0', 'float64'), ('ebv', 'float64'), ('dchisq1', 'float64'), ('dchisq2', 'float64'), ('dchisq3', 'float64'), ('dchisq4', 'float64'), ('fracdev', 'float64'), ('fracdev_ivar', 'float64'), ('shapeexp_r', 'float64'), ('shapeexp_r_ivar', 'float64'), ('shapeexp_e1', 'float64'), ('shapeexp_e1_ivar', 'float64'), ('shapeexp_e2', 'float64'), ('shapeexp_e2_ivar', 'float64'), ('shapedev_r', 'float64'), ('shapedev_r_ivar', 'float64'), ('shapedev_e1', 'float64'), ('shapedev_e1_ivar', 'float64'), ('shapedev_e2', 'float64'), ('shapedev_e2_ivar', 'float64')] data = np.zeros(nrows, dtype=dtypes) for i in range(0, nrows): data[i]['brickid'] = rows[i][0] data[i]['objid'] = rows[i][1] data[i]['blob'] = rows[i][2] data[i]['type'] = str(rows[i][3]) data[i]['ra'] = rows[i][4] data[i]['ra_ivar'] = rows[i][5] data[i]['dec'] = rows[i][6] data[i]['dec_ivar'] = rows[i][7] data[i]['bx'] = rows[i][8] data[i]['by'] = rows[i][9] data[i]['bx0'] = rows[i][10] data[i]['by0'] = rows[i][11] data[i]['ebv'] = rows[i][12] data[i]['dchisq1'] = rows[i][13] data[i]['dchisq2'] = rows[i][14] data[i]['dchisq3'] = rows[i][15] data[i]['dchisq4'] = rows[i][16] data[i]['fracdev'] = rows[i][17] data[i]['fracdev_ivar'] = rows[i][18] data[i]['shapeexp_r'] = rows[i][19] data[i]['shapeexp_r_ivar'] = rows[i][20] data[i]['shapeexp_e1'] = rows[i][21] data[i]['shapeexp_e1_ivar'] = rows[i][22] data[i]['shapeexp_e2'] = rows[i][23] data[i]['shapeexp_e2_ivar'] = rows[i][24] data[i]['shapedev_r'] = rows[i][25] data[i]['shapedev_r_ivar'] = rows[i][26] data[i]['shapedev_e1'] = rows[i][27] data[i]['shapedev_e1_ivar'] = rows[i][28] data[i]['shapedev_e2'] = rows[i][29] data[i]['shapedev_e2_ivar'] = rows[i][30] hdu = fits.BinTableHDU(data, header=priheader) elif result['table'] == 'DECAM': # try building from columns data = { 'cand_id': [], 'decam_flux': [], 'decam_flux_ivar': [], 'decam_fracflux': [], 'decam_fracmasked': [], 'decam_fracin': [], 'decam_rchi2': [], 'decam_nobs': [], 'decam_anymask': [], 'decam_allmask': [], 'decam_ext': [] } for i in range(0, nrows): data['cand_id'].append(rows[i][0]) data['decam_flux'].append([ rows[i][1], rows[i][11], rows[i][21], rows[i][31], rows[i][41], rows[i][51] ]) data['decam_flux_ivar'].append([ rows[i][2], rows[i][12], rows[i][22], rows[i][32], rows[i][42], rows[i][52] ]) data['decam_fracflux'].append([ rows[i][3], rows[i][13], rows[i][23], rows[i][33], rows[i][43], rows[i][53] ]) data['decam_fracmasked'].append([ rows[i][4], rows[i][14], rows[i][24], rows[i][34], rows[i][44], rows[i][54] ]) data['decam_fracin'].append([ rows[i][5], rows[i][15], rows[i][25], rows[i][35], rows[i][45], rows[i][55] ]) data['decam_rchi2'].append([ rows[i][6], rows[i][16], rows[i][26], rows[i][36], rows[i][46], rows[i][56] ]) data['decam_nobs'].append([ rows[i][7], rows[i][17], rows[i][27], rows[i][37], rows[i][47], rows[i][57] ]) data['decam_anymask'].append([ rows[i][8], rows[i][18], rows[i][28], rows[i][38], rows[i][48], rows[i][58] ]) data['decam_allmask'].append([ rows[i][9], rows[i][19], rows[i][29], rows[i][39], rows[i][49], rows[i][59] ]) data['decam_ext'].append([ rows[i][10], rows[i][20], rows[i][30], rows[i][40], rows[i][50], rows[i][60] ]) c1 = Column(name='cand_id', format='J', array=data['cand_id']) c2 = Column(name='decam_flux', format='6D', array=data['decam_flux']) c3 = Column(name='decam_flux_ivar', format='6D', array=data['decam_flux_ivar']) c4 = Column(name='decam_fracflux', format='6D', array=data['decam_fracflux']) c5 = Column(name='decam_fracmasked', format='6D', array=data['decam_fracmasked']) c6 = Column(name='decam_fracin', format='6D', array=data['decam_fracin']) c7 = Column(name='decam_rchi2', format='6D', array=data['decam_rchi2']) c8 = Column(name='decam_nobs', format='6D', array=data['decam_nobs']) c9 = Column(name='decam_anymask', format='6D', array=data['decam_anymask']) c10 = Column(name='decam_allmask', format='6D', array=data['decam_allmask']) c11 = Column(name='decam_ext', format='6D', array=data['decam_ext']) hdu = fits.BinTableHDU.from_columns( [c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11]) # dtypes = [('cand_id', 'i4'),('decam_flux',np.float64(6,)), # ('decam_flux_ivar',np.float64(6,)),('decam_fracflux',np.float64(6,)),('decam_fracmasked',np.float64(6,)),('decam_fracin',np.float64(6,)), # ('decam_rchi2',np.float64(6,)),('decam_nobs',np.float64(6,)),('decam_anymask',np.float64(6,)),('decam_allmask',np.float64(6,)),('decam_ext',np.float64(6,))] # data = np.zeros((nrows,6), dtype=dtypes) elif result['table'] == 'WISE': dtypes = [()] #line_cand = [ tbdata['brickid'][i], tbdata['objid'][i], tbdata['blob'][i], tbdata['type'][i], tbdata['ra'][i], tbdata['ra_ivar'][i], tbdata['dec'][i], tbdata['dec_ivar'][i], tbdata['bx'][i], tbdata['by'][i], tbdata['bx0'][i], tbdata['by0'][i], bool(lb), bool(oob), tbdata['ebv'][i], tbdata['dchisq'][i][0], tbdata['dchisq'][i][1], tbdata['dchisq'][i][2], tbdata['dchisq'][i][3], tbdata['fracDev'][i], tbdata['fracDev_ivar'][i], tbdata['shapeExp_r'][i], tbdata['shapeExp_r_ivar'][i], tbdata['shapeExp_e1'][i], tbdata['shapeExp_e1_ivar'][i], tbdata['shapeExp_e2'][i], tbdata['shapeExp_e2_ivar'][i], tbdata['shapeDev_r'][i], tbdata['shapeDev_r_ivar'][i], tbdata['shapeDev_e1'][i], tbdata['shapeDev_e1_ivar'][i], tbdata['shapeDev_e2'][i], tbdata['shapeDev_e2_ivar'][i] ] # for later # ('decam_flux','f8',(3,4)),('decam_flux_ivar','f8',(3,4)),('decam_apflux','f8',(3,4)), # ('decam_apflux_resid','f8',(3,4)),('decam_apflux_ivar','f8',(3,4)),('decam_mw_transmission','f8',(3,4)), # ('decam_nobs','f8',(3,4)),('decam_rchi2','f8',(3,4)),('decam_fracflux','f8',(3,4)),('decam_fracmasked','f8',(3,4)), # ('decam_fracin','f8',(3,4)),('decam_saturated','f8',(3,4)),('out_of_bounds','f8',(3,4)),('decam_anymask','f8',(3,4)), # ('decam_allmask','f8',(3,4)) outfile = 'data.fits' fits.writeto(outfile, hdu.data, hdu.header, clobber=True) fsock = open(outfile, "rb") response = StreamingHttpResponse(fsock, content_type='application/fits') response['Content-Disposition'] = 'attachment; filename="' + outfile + '"' return response
def main(): usage = "usage: %(prog)s [archive file]" description = "Build the extended archive from the master archive YAML file." parser = argparse.ArgumentParser(usage=usage, description=description) parser.add_argument('--outname', default=None, required=True) parser.add_argument('--vernum', default=0, required=True) parser.add_argument('masterfile', help='Extended archive master YAML file.') args = parser.parse_args() npar_max = 5 sources = yaml.load(open(args.masterfile)) cols = [ Column(name='Source_Name', format='18A'), Column(name='RAJ2000', format='E', unit='deg', disp='F8.4'), Column(name='DEJ2000', format='E', unit='deg', disp='F8.4'), Column(name='GLON', format='E', unit='deg', disp='F8.4'), Column(name='GLAT', format='E', unit='deg', disp='F8.4'), Column(name='Photon_Flux', format='E', unit='ph cm-2 s-1', disp='E8.2'), Column(name='Energy_Flux', format='E', unit='erg cm-2 s-1', disp='E8.2'), Column(name='Model_Form', format='12A'), Column(name='Model_SemiMajor', format='E', unit='deg', disp='E7.3'), Column(name='Model_SemiMinor', format='E', unit='deg', disp='E7.3'), Column(name='Model_PosAng', format='E', unit='deg', disp='E6.1'), Column(name='Spatial_Function', format='15A'), Column(name='Spatial_Filename', format='50A'), Column(name='Spectral_Function', format='12A'), Column(name='Spectral_Filename', format='40A'), Column(name='Name_1FGL', format='18A'), Column(name='Name_2FGL', format='18A'), Column(name='Name_3FGL', format='18A'), Column(name='Spectral_Param_Name', format='45A9'), Column(name='Spectral_Param_Value', format='E', dim=str(npar_max), disp='E9.4'), Column(name='Spectral_Param_Error', format='E', dim=str(npar_max), disp='E9.4'), Column(name='Spectral_Param_Scale', format='E', dim=str(npar_max)), ] for c in cols: c.array = build_column_array(c.name, sources, npar_max) record = FITS_rec.from_columns(cols) record.sort(order="RAJ2000") outdir = args.outname + "_v" + args.vernum mkdir(outdir) fitsname = "LAT_extended_sources_v" + args.vernum + ".fits" output = BinTableHDU(record) output.writeto(os.path.join(outdir, fitsname), overwrite=True) xmldir = os.path.join(outdir, 'XML') mkdir(xmldir) for k, v in sources.items(): xmlpath = os.path.join(xmldir, v['Source_Name'].replace(' ', '') + '.xml') to_xml(xmlpath, v['Source_Name'], v)
def collate_classifications(ferengi_filename): # This is the fits file that maps all the IDs to one another: # print '' print 'Reading %s ...' % ferengi_filename #data = ascii.read(ferengi_filename, 'b') #subjects = set(data['subject_id']) # using pandas is faster even without low-memory shortcut this_data = pd.read_csv(ferengi_filename,low_memory=False) subjects = this_data.subject_id.unique() # Now set up the collated classification columns. # Each question has a question number from ferengi-0 to ferengi-18 # Each of those questions has some number of possible answers a-0, a-1, etc. # One question = odd features (18) has click boxes where multiple answers can be selected. # This question alone needs to be treated differently than the others. # In GZ2/GZH the answer numbers were themselves unique but in Ouroboros they start at a-0 for each question number. # print 'Creating columns for vote fractions...' # Create column of integer zeros and float zeros intcolumn = np.zeros(len(subjects),dtype=int) floatcolumn = np.zeros(len(subjects),dtype=float) strcolumn = np.array([' ']*len(subjects),dtype='S24') #c01 = Column(name='num_classifications', format='J', array=intcolumn) # c05 = c01, by definition c01 = Column(name='subject_id', format='A24', array=strcolumn) c02 = Column(name='t01_smooth_or_features_a01_smooth_frac', format='D', array=floatcolumn) c03 = Column(name='t01_smooth_or_features_a02_features_frac', format='D', array=floatcolumn) c04 = Column(name='t01_smooth_or_features_a03_artifact_frac', format='D', array=floatcolumn) c05 = Column(name='t01_smooth_or_features_count', format='J', array=intcolumn) c06 = Column(name='t02_disk_edge_on_a04_yes_frac', format='D', array=floatcolumn) c07 = Column(name='t02_disk_edge_on_a05_no_frac', format='D', array=floatcolumn) c08 = Column(name='t02_disk_edge_on_count', format='J', array=intcolumn) c09 = Column(name='t03_bar_a06_bar_frac', format='D', array=floatcolumn) c10 = Column(name='t03_bar_a07_no_bar_frac', format='D', array=floatcolumn) c11 = Column(name='t03_bar_count', format='J', array=intcolumn) c12 = Column(name='t04_spiral_a08_spiral_frac', format='D', array=floatcolumn) c13 = Column(name='t04_spiral_a09_no_spiral_frac', format='D', array=floatcolumn) c14 = Column(name='t04_spiral_count', format='J', array=intcolumn) c15 = Column(name='t05_bulge_prominence_a10_no_bulge_frac', format='D', array=floatcolumn) c16 = Column(name='t05_bulge_prominence_a11_just_noticeable_frac', format='D', array=floatcolumn) c17 = Column(name='t05_bulge_prominence_a12_obvious_frac', format='D', array=floatcolumn) c18 = Column(name='t05_bulge_prominence_a13_dominant_frac', format='D', array=floatcolumn) c19 = Column(name='t05_bulge_prominence_count', format='J', array=intcolumn) c20 = Column(name='t06_odd_a14_yes_frac', format='D', array=floatcolumn) c21 = Column(name='t06_odd_a15_no_frac', format='D', array=floatcolumn) c22 = Column(name='t06_odd_count', format='J', array=intcolumn) c23 = Column(name='t07_rounded_a16_completely_round_frac', format='D', array=floatcolumn) c24 = Column(name='t07_rounded_a17_in_between_frac', format='D', array=floatcolumn) c25 = Column(name='t07_rounded_a18_cigar_shaped_frac', format='D', array=floatcolumn) c26 = Column(name='t07_rounded_count', format='J', array=intcolumn) c27 = Column(name='t08_odd_feature_a19_ring_frac', format='D', array=floatcolumn) c28 = Column(name='t08_odd_feature_a20_lens_frac', format='D', array=floatcolumn) c29 = Column(name='t08_odd_feature_a21_disturbed_frac', format='D', array=floatcolumn) c30 = Column(name='t08_odd_feature_a22_irregular_frac', format='D', array=floatcolumn) c31 = Column(name='t08_odd_feature_a23_other_frac', format='D', array=floatcolumn) c32 = Column(name='t08_odd_feature_a24_merger_frac', format='D', array=floatcolumn) c33 = Column(name='t08_odd_feature_a38_dustlane_frac', format='D', array=floatcolumn) c34 = Column(name='t08_odd_feature_count', format='J', array=intcolumn) c35 = Column(name='t09_bulge_shape_a25_rounded_frac', format='D', array=floatcolumn) c36 = Column(name='t09_bulge_shape_a26_boxy_frac', format='D', array=floatcolumn) c37 = Column(name='t09_bulge_shape_a27_no_bulge_frac', format='D', array=floatcolumn) c38 = Column(name='t09_bulge_shape_count', format='J', array=intcolumn) c39 = Column(name='t10_arms_winding_a28_tight_frac', format='D', array=floatcolumn) c40 = Column(name='t10_arms_winding_a29_medium_frac', format='D', array=floatcolumn) c41 = Column(name='t10_arms_winding_a30_loose_frac', format='D', array=floatcolumn) c42 = Column(name='t10_arms_winding_count', format='J', array=intcolumn) c43 = Column(name='t11_arms_number_a31_1_frac', format='D', array=floatcolumn) c44 = Column(name='t11_arms_number_a32_2_frac', format='D', array=floatcolumn) c45 = Column(name='t11_arms_number_a33_3_frac', format='D', array=floatcolumn) c46 = Column(name='t11_arms_number_a34_4_frac', format='D', array=floatcolumn) c47 = Column(name='t11_arms_number_a36_more_than_4_frac', format='D', array=floatcolumn) c48 = Column(name='t11_arms_number_a37_cant_tell_frac', format='D', array=floatcolumn) c49 = Column(name='t11_arms_number_count', format='J', array=intcolumn) c50 = Column(name='t14_clumpy_a39_yes_frac', format='D', array=floatcolumn) c51 = Column(name='t14_clumpy_a40_no_frac', format='D', array=floatcolumn) c52 = Column(name='t14_clumpy_count', format='J', array=floatcolumn) c53 = Column(name='t16_bright_clump_a43_yes_frac', format='D', array=floatcolumn) c54 = Column(name='t16_bright_clump_a44_no_frac', format='D', array=floatcolumn) c55 = Column(name='t16_bright_clump_count', format='J', array=floatcolumn) c56 = Column(name='t17_bright_clump_central_a45_yes_frac', format='D', array=floatcolumn) c57 = Column(name='t17_bright_clump_central_a46_no_frac', format='D', array=floatcolumn) c58 = Column(name='t17_bright_clump_central_count', format='J', array=floatcolumn) c59 = Column(name='t18_clumps_arrangement_a47_line_frac', format='D', array=floatcolumn) c60 = Column(name='t18_clumps_arrangement_a48_chain_frac', format='D', array=floatcolumn) c61 = Column(name='t18_clumps_arrangement_a49_cluster_frac', format='D', array=floatcolumn) c62 = Column(name='t18_clumps_arrangement_a59_spiral_frac', format='D', array=floatcolumn) c63 = Column(name='t18_clumps_arrangement_count', format='J', array=floatcolumn) c64 = Column(name='t19_clumps_count_a50_2_frac', format='D', array=floatcolumn) c65 = Column(name='t19_clumps_count_a51_3_frac', format='D', array=floatcolumn) c66 = Column(name='t19_clumps_count_a52_4_frac', format='D', array=floatcolumn) c67 = Column(name='t19_clumps_count_a53_more_than_4_frac', format='D', array=floatcolumn) c68 = Column(name='t19_clumps_count_a54_cant_tell_frac', format='D', array=floatcolumn) c69 = Column(name='t19_clumps_count_a60_1_frac', format='D', array=floatcolumn) c70 = Column(name='t19_clumps_count_count', format='J', array=floatcolumn) c71 = Column(name='t20_clumps_symmetrical_a55_yes_frac', format='D', array=floatcolumn) c72 = Column(name='t20_clumps_symmetrical_a56_no_frac', format='D', array=floatcolumn) c73 = Column(name='t20_clumps_symmetrical_count', format='J', array=floatcolumn) c74 = Column(name='t21_clumps_embedded_a57_yes_frac', format='D', array=floatcolumn) c75 = Column(name='t21_clumps_embedded_a58_no_frac', format='D', array=floatcolumn) c76 = Column(name='t21_clumps_embedded_count', format='J', array=floatcolumn) c77 = Column(name='t22_discuss_a61_yes_frac', format='D', array=floatcolumn) c78 = Column(name='t22_discuss_a62_no_frac', format='D', array=floatcolumn) c79 = Column(name='t22_discuss_count', format='J', array=intcolumn) # Note the answer order in the csv is not the same as the task numbers in hubble zoo # it's based on https://github.com/zooniverse/Galaxy-Zoo/blob/master/app/lib/ferengi_tree.coffee frac_dict = { 'ferengi-0':{ 'a-0':'t01_smooth_or_features_a01_smooth_frac', 'a-1':'t01_smooth_or_features_a02_features_frac', 'a-2':'t01_smooth_or_features_a03_artifact_frac', 'count':'t01_smooth_or_features_count' } , 'ferengi-9':{ 'a-0':'t02_disk_edge_on_a04_yes_frac', 'a-1':'t02_disk_edge_on_a05_no_frac', 'count':'t02_disk_edge_on_count' } , 'ferengi-11':{ 'a-0':'t03_bar_a06_bar_frac', 'a-1':'t03_bar_a07_no_bar_frac', 'count':'t03_bar_count' } , 'ferengi-12':{ 'a-0':'t04_spiral_a08_spiral_frac', 'a-1':'t04_spiral_a09_no_spiral_frac', 'count':'t04_spiral_count' } , 'ferengi-15':{ 'a-0':'t05_bulge_prominence_a10_no_bulge_frac', 'a-1':'t05_bulge_prominence_a11_just_noticeable_frac', 'a-2':'t05_bulge_prominence_a12_obvious_frac', 'a-3':'t05_bulge_prominence_a13_dominant_frac', 'count':'t05_bulge_prominence_count' } , 'ferengi-17':{ 'a-0':'t06_odd_a14_yes_frac', 'a-1':'t06_odd_a15_no_frac', 'count':'t06_odd_count' } , 'ferengi-1':{ 'a-0':'t07_rounded_a16_completely_round_frac', 'a-1':'t07_rounded_a17_in_between_frac', 'a-2':'t07_rounded_a18_cigar_shaped_frac', 'count':'t07_rounded_count' } , 'ferengi-18':{ 'x-0':'t08_odd_feature_a19_ring_frac', 'x-1':'t08_odd_feature_a20_lens_frac', 'x-2':'t08_odd_feature_a21_disturbed_frac', 'x-3':'t08_odd_feature_a22_irregular_frac', 'x-4':'t08_odd_feature_a23_other_frac', 'x-5':'t08_odd_feature_a24_merger_frac', 'x-6':'t08_odd_feature_a38_dustlane_frac', 'count':'t08_odd_feature_count' } , 'ferengi-10':{ 'a-0':'t09_bulge_shape_a25_rounded_frac', 'a-1':'t09_bulge_shape_a26_boxy_frac', 'a-2':'t09_bulge_shape_a27_no_bulge_frac', 'count':'t09_bulge_shape_count' } , 'ferengi-13':{ 'a-0':'t10_arms_winding_a28_tight_frac', 'a-1':'t10_arms_winding_a29_medium_frac', 'a-2':'t10_arms_winding_a30_loose_frac', 'count':'t10_arms_winding_count' } , 'ferengi-14':{ 'a-0':'t11_arms_number_a31_1_frac', 'a-1':'t11_arms_number_a32_2_frac', 'a-2':'t11_arms_number_a33_3_frac', 'a-3':'t11_arms_number_a34_4_frac', 'a-4':'t11_arms_number_a36_more_than_4_frac', 'a-5':'t11_arms_number_a37_cant_tell_frac', 'count':'t11_arms_number_count' } , 'ferengi-2':{ 'a-0':'t14_clumpy_a39_yes_frac', 'a-1':'t14_clumpy_a40_no_frac', 'count':'t14_clumpy_count' } , 'ferengi-5':{ 'a-0':'t16_bright_clump_a43_yes_frac', 'a-1':'t16_bright_clump_a44_no_frac', 'count':'t16_bright_clump_count' } , 'ferengi-6':{ 'a-0':'t17_bright_clump_central_a45_yes_frac', 'a-1':'t17_bright_clump_central_a46_no_frac', 'count':'t17_bright_clump_central_count' } , 'ferengi-4':{ 'a-0':'t18_clumps_arrangement_a47_line_frac', 'a-1':'t18_clumps_arrangement_a48_chain_frac', 'a-2':'t18_clumps_arrangement_a49_cluster_frac', 'a-3':'t18_clumps_arrangement_a59_spiral_frac', 'count':'t18_clumps_arrangement_count' } , 'ferengi-3':{ 'a-0':'t19_clumps_count_a50_2_frac', 'a-1':'t19_clumps_count_a51_3_frac', 'a-2':'t19_clumps_count_a52_4_frac', 'a-3':'t19_clumps_count_a53_more_than_4_frac', 'a-4':'t19_clumps_count_a54_cant_tell_frac', 'a-5':'t19_clumps_count_a60_1_frac', 'count':'t19_clumps_count_count' } , 'ferengi-16':{ 'a-0':'t22_discuss_a61_yes_frac', 'a-1':'t22_discuss_a62_no_frac', 'count':'t22_discuss_count' } , 'ferengi-7':{ 'a-0':'t20_clumps_symmetrical_a55_yes_frac', 'a-1':'t20_clumps_symmetrical_a56_no_frac', 'count':'t20_clumps_symmetrical_count' } , 'ferengi-8':{ 'a-0':'t21_clumps_embedded_a57_yes_frac', 'a-1':'t21_clumps_embedded_a58_no_frac', 'count':'t21_clumps_embedded_count' } } #print len(frac_dict['ferengi-3']) weird_question = 'ferengi-18' classifications = pyfits.new_table([c01,c02,c03,c04,c05,c06,c07,c08,c09,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,c54,c55,c56,c57,c58,c59,c60,c61,c62,c63,c64,c65,c66,c67,c68,c69,c70,c71,c72,c73,c74,c75,c76,c77,c78,c79]) subjDB = pyfits.new_table(classifications.columns) questions = ['ferengi-%i' % j for j in np.arange(len(frac_dict))] questions.remove(weird_question) print 'Counting classifications...' print 'new' for q in questions: print q, datetime.datetime.now().strftime('%H:%M:%S.%f') # groups all answers to question q by subject id and counts instances of each non-blank answer separately # ON ONE LINE and 12x speed of previous method = WIN this_question = this_data[q].groupby(this_data.subject_id).apply(lambda x:x.value_counts()) # all of these comments below are because I'm not yet too familiar with pandas # example output of this_question.head(10) for ferengi-1: # In [59]: this_question.head(10) # Out[59]: # subject_id # 5249ce0c3ae74072a30033c1 a-1 12 # a-0 3 # 5249ce0c3ae74072a30033c2 a-1 20 # a-0 5 # 5249ce0c3ae74072a30033c3 a-1 17 # 5249ce0c3ae74072a30033c4 a-1 14 # a-0 4 # a-2 1 # 5249ce0c3ae74072a30033c5 a-1 15 # a-0 1 # dtype: int64 # counts total answers to all non-blank for this question (per subject id) N_answer_total = this_question.sum(level=0) # example output of this_question.head(10).sum(level=0): #In [60]: this_question.head(10).sum(level=0) #Out[60]: #subject_id #5249ce0c3ae74072a30033c1 15 #5249ce0c3ae74072a30033c2 25 #5249ce0c3ae74072a30033c3 17 #5249ce0c3ae74072a30033c4 19 #5249ce0c3ae74072a30033c5 16 #dtype: int64 #also note: #In [67]: this_question['5249ce0c3ae74072a30033c1'] #Out[67]: #a-1 12 #a-0 3 #dtype: int64 # #In [68]: this_question['5249ce0c3ae74072a30033c1']['a-1'] #Out[68]: 12 # #In [77]: this_question.head(10).sum(level=0)['5249ce0c3ae74072a30033c1'] #Out[77]: 15 # for some reason about 1/4 of the objects weren't actually classified # and those will give a key error, so ignore them (but count them) errors=0 for idx, s in enumerate(subjects): # assign subject id if q == 'ferengi-0': subjDB.data.field('subject_id')[idx] = s # assign total number count for this question try: subjDB.data.field(frac_dict[q]['count'])[idx] = N_answer_total[s] except KeyError: errors+=1 pass answers = ['a-%i' % j for j in np.arange(len(frac_dict[q]))] # assign vote fractions for a in answers: try: subjDB.data.field(frac_dict[q][a])[idx] = this_question[s][a]/float(N_answer_total[s]) if N_answer_total[s] > 0 else 0. except KeyError: pass # now do the weird question(s) print weird_question, datetime.datetime.now().strftime('%H:%M:%S.%f') this_question = this_data[weird_question].groupby(this_data.subject_id).apply(lambda x:x.value_counts()) # here's why this question is weird: users can click on more than one option, # and answers are stored as unique combinations of answer choices. e.g.: #In [219]: this_question #Out[219]: #subject_id #5249ce0c3ae74072a30033c1 a-0;x-3 2 #5249ce0c3ae74072a30033c2 a-0;x-0 1 # a-0 1 # a-0;x-0;x-1;x-2;x-3;x-4;x-5;x-6 1 #5249ce0c3ae74072a30033c3 a-0 3 # a-0;x-2;x-4;x-6 1 # a-0;x-3 1 # a-0;x-4 1 # # So we have to parse each answer combination for each subject separately. for idx, s in enumerate(subjects): try: n_answers = this_question.sum(level=0)[s] answer_combos = this_question[s].index # e.g. second subject above: #In [230]: this_question['5249ce0c3ae74072a30033c2'].index #Out[230]: Index([u'a-0;x-0', u'a-0', u'a-0;x-0;x-1;x-2;x-3;x-4;x-5;x-6'], dtype='object') # # Now loop through these answers n_combos = answer_combos.size for i_combo in range(0, n_combos): #unpack separate answers for this index these_answers = answer_combos[i_combo].split(';') for this_ans in these_answers: #need to add the number of votes for the answer within this combination to the total #count, not frac (yet) # note there is an a-0, which is clicking the "next" button, and sometimes people do # get to "odd" and then not click anything but "next", but as you *must* click next # to advance, the fraction of people answering a-0 should always be 1.0, so we're skipping a-0 # (it's not included in subjDB so it will throw an error when looping through keys). try: subjDB.data.field(frac_dict[q][this_ans])[idx] += this_question[s][answer_combos[i_combo]] except KeyError: pass answers = ['x-%i' % j for j in np.arange(len(frac_dict[q]))] #answers == np.append(aa, 'a-0') #now loop through answers and calculate fractions (which need not add to 1) for a in answers: try: subjDB.data.field(frac_dict[q][a])[idx] = subjDB.data.field(frac_dict[q][a])[idx]/float(n_answers) if n_answers > 0 else 0. except KeyError: pass except KeyError: pass print 'Finished looping over classifications', datetime.datetime.now().strftime('%H:%M:%S.%f') # Write final data to FITS file subjDB.writeto('%s/ferengi_classifications_collated.fits' % path_class, clobber=True)
'mu_max_i'] > 0: #these have measurements for z and SB, put in NEI category_list_ferengi.append({ 'objid': row['subject_id'], 'Correctable_Category': 'nei' }) else: #these have nan or infinite values of z or mu, put in need_redshift_list category_list_ferengi.append({ 'objid': row['subject_id'], 'Correctable_Category': 'nei_needs_redshift' }) #create fits file of galaxies with Correctable_Category Label for FERENGI strcolumn = np.array([' '] * len(category_list_ferengi), dtype='S24') c0 = Column(name='objid', format='A24', array=strcolumn) c01 = Column(name='Correctable_Category', format='A24', array=strcolumn) cols = fits.TableHDU.from_columns([c0, c01]) category_table_ferengi = fits.TableHDU.from_columns(cols.columns) for i, gal in enumerate(category_list_ferengi): category_table_ferengi.data.field('objid')[i] = gal['objid'] category_table_ferengi.data.field( 'Correctable_Category')[i] = gal['Correctable_Category'] #category_table_ferengi.writeto('ferengi_debiasable_data.fits',clobber=True) return p_range_correctable_dct, p_range_uncorrectable_dct, interval_dct def categorize_hubble_data(p_range_correctable_dct, p_range_uncorrectable_dct, interval_dct):
weight_data=weights[1].data print 'organizing subjects...' subjects = set(data['subject_id']) print 'Creating columns for vote fractions...' # Create column of integer zeros and float zeros intcolumn = np.zeros(len(subjects),dtype=int) floatcolumn = np.zeros(len(subjects),dtype=float) strcolumn = np.array([' ']*len(subjects),dtype='S24') #S24=24 character string #c01 = Column(name='num_classifications', format='J', array=intcolumn) # c05 = c01, by definition #format for Columns: D = double precision floating point, J = integer c01 = Column(name='subject_id', format='A24', array=strcolumn) # c05 = c01, by definition c02 = Column(name='t00_smooth_or_features_a0_smooth_frac_weighted_%s'%run, format='D', array=floatcolumn) c03 = Column(name='t00_smooth_or_features_a1_features_frac_weighted_%s'%run, format='D', array=floatcolumn) c04 = Column(name='t00_smooth_or_features_a2_artifact_frac_weighted_%s'%run, format='D', array=floatcolumn) c05 = Column(name='t00_smooth_or_features_count_weighted_%s'%run, format='D', array=floatcolumn) c06 = Column(name='t01_rounded_a0_completely_round_frac_weighted_%s'%run, format='D', array=floatcolumn) c07 = Column(name='t01_rounded_a1_in_between_frac_weighted_%s'%run, format='D', array=floatcolumn) c08 = Column(name='t01_rounded_a2_cigar_shaped_frac_weighted_%s'%run, format='D', array=floatcolumn) c09 = Column(name='t01_rounded_count_weighted_%s'%run, format='D', array=floatcolumn) c10 = Column(name='t02_clumps_a0_yes_frac_weighted_%s'%run, format='D', array=floatcolumn) c11 = Column(name='t02_clumps_a1_no_frac_weighted_%s'%run, format='D', array=floatcolumn) c12 = Column(name='t02_clumps_count_weighted_%s'%run, format='D', array=floatcolumn)
def categorize_hubble_data(p_range_correctable_dct, p_range_uncorrectable_dct, interval_dct): ''' Run zeta method on Hubble data to determine which galaxies are correctable as function of SB, z, p_features based on the FERENGI values ''' hubble_data = fits.getdata('../data/Hubble_t01_data.fits', 1) yedges, redshifts = bins() # Determine which parts of hubble sample are correctable (white), uncorrectable (green), or nei (purple) category_list = [] ''' Possibilities 1. Galaxy was within FERENGI space, was correctable 2. Galaxy was within FERENGI space, was uncorrectable 3. Galaxy was within FERENGI space, had nei 4. Galaxy was outside FERENGI space, had redshift z < 0.3 5. Galaxy was outside FERENGI space, had no redshift 6. Galaxy was outside FERENGI space, had nei Trying to run array-wise instead of looping everything (which takes 5-10 minutes). Haven't succeeded, probably not worth the effort. ''' within_ferengi = (hubble_data['MU_HI'] > yedges[0]) & ( hubble_data['MU_HI'] <= yedges[len(yedges) - 1]) & (hubble_data['Z'] > redshifts[0]) & ( hubble_data['Z'] <= redshifts[len(redshifts) - 1] + .05) outside_ferengi = np.logical_not(within_ferengi) import itertools yz = list(itertools.product(*[yedges, redshifts])) for y, z in yz: con1 = [ p_range_correctable_dct[entry[1]][entry[0]] for row in hubble_data[within_ferengi] if p_range_correctable_dct[entry[1]][entry[0]][0] <= row['p_features'] < p_range_correctable_dct[entry[1]][entry[0]][1] ] for row in hubble_data: # If galaxy is within FERENGI space, check where it is. else, consider NEI or uncorrectable. if row['MU_HI'] > yedges[0] and row['MU_HI'] <= yedges[ len(yedges) - 1] and row['Z'] > redshifts[0] and row[ 'Z'] <= redshifts[len(redshifts) - 1] + .05: for y in range(0, len(yedges) - 1): if row['MU_HI'] > yedges[y] and row['MU_HI'] <= yedges[y + 1]: for i, z in enumerate(redshifts): if row['Z'] > redshifts[i] - .05 and row[ 'Z'] <= redshifts[ i] + .05: # pick out where it is in SB/z and check color if row['p_features'] > p_range_correctable_dct[ z, yedges[y]][0] and row[ 'p_features'] <= p_range_correctable_dct[ z, yedges[y]][ 1]: # if it's in correctable range:: category_list.append({ 'objid': row['OBJNO'], 'Correctable_Category': 'correctable', 'Imaging': row['Imaging'] }) elif row['p_features'] > p_range_uncorrectable_dct[ z, yedges[y]][0] and row[ 'p_features'] <= p_range_uncorrectable_dct[ z, yedges[y]][ 1]: # if it's in uncorrectable range:: category_list.append({ 'objid': row['OBJNO'], 'Correctable_Category': 'uncorrectable', 'Imaging': row['Imaging'] }) else: #not in correctable or uncorrectable range, so nei category_list.append({ 'objid': row['OBJNO'], 'Correctable_Category': 'nei', 'Imaging': row['Imaging'] }) else: #galaxies outside FERENGI SB and z limits - still need to have meaasureable z and SB to possibly correct. if row['Z'] >= .3 and row['Z'] < 9 and row[ 'MU_HI'] > 0: #these have measurements for z and SB, put in NEI category_list.append({ 'objid': row['OBJNO'], 'Correctable_Category': 'nei', 'Imaging': row['Imaging'] }) elif row['Z'] > 0 and row[ 'Z'] < .3: #don't need to be corrected, z < z0 category_list.append({ 'objid': row['OBJNO'], 'Correctable_Category': 'z_lt_3', 'Imaging': row['Imaging'] }) else: #these have nan or infinite values of z or mu, put in need_redshift_list category_list.append({ 'objid': row['OBJNO'], 'Correctable_Category': 'nei_needs_redshift', 'Imaging': row['Imaging'] }) low_hi_limit_list = [] for row in hubble_data: if row['MU_HI'] > yedges[0] and row['MU_HI'] <= yedges[ len(yedges) - 1] and row['Z'] > redshifts[0] - .05 and row[ 'Z'] <= redshifts[len(redshifts) - 1] + .05: for y in range(0, len(yedges) - 1): if row['MU_HI'] > yedges[y] and row['MU_HI'] <= yedges[y + 1]: for i, z in enumerate(redshifts): if row['Z'] > redshifts[i] - .05 and row[ 'Z'] <= redshifts[ i] + .05: #now we have mu,z info: for bin_range in interval_dct[z, yedges[y]]: if row['p_features'] >= bin_range[ 'bin_bottom'] and row[ 'p_features'] < bin_range[ 'bin_top']: low_hi_limit_list.append({ 'objid': row['OBJNO'], 'low_limit': bin_range['low_limit'], 'hi_limit': bin_range['hi_limit'] }) imaging_list = set(hubble_data['Imaging']) total_correctable = 0 total_uncorrectable = 0 total_z_lt_3 = 0 total_nei = 0 total_nr = 0 for survey in imaging_list: c = 0 u = 0 z_lt_3 = 0 nei = 0 nr = 0 for row in category_list: if row['Correctable_Category'] == 'correctable' and row[ 'Imaging'] == survey: c += 1 if row['Correctable_Category'] == 'uncorrectable' and row[ 'Imaging'] == survey: u += 1 if row['Correctable_Category'] == 'z_lt_3' and row[ 'Imaging'] == survey: z_lt_3 += 1 if row['Correctable_Category'] == 'nei' and row[ 'Imaging'] == survey: nei += 1 if row['Correctable_Category'] == 'nei_needs_redshift' and row[ 'Imaging'] == survey: nr += 1 total_correctable += c total_uncorrectable += u total_z_lt_3 += z_lt_3 total_nei += nei total_nr += nr print 'the number of correctable galaxies in %s is %i' % (survey, c) print 'the number of uncorrectable galaxies in %s is %i' % (survey, u) print 'the number of galaxies with z < 0.3 in %s is %i' % (survey, z_lt_3) print 'the number of NEI galaxies in %s is (due to not enough FERENGI galaxies in bin) is %i' % ( survey, nei) print 'the number of NEI galaxies in %s is (due to needing redshift measurements) is %i' % ( survey, nr) print 'total correctable: %i' % total_correctable print 'total uncorrectable: %i' % total_uncorrectable print 'total z less than .3: %i' % total_z_lt_3 print 'total nei: %i' % total_nei print 'total nr: %i' % total_nr print 'total: %i' % len(category_list) #create fits file of galaxies with Correctable_Category Label strcolumn = np.array([' '] * len(category_list), dtype='S24') c0 = Column(name='objid', format='A24', array=strcolumn) c01 = Column(name='Correctable_Category', format='A24', array=strcolumn) cols = fits.TableHDU.from_columns([c0, c01]) category_table = fits.TableHDU.from_columns(cols.columns) for i, gal in enumerate(category_list): category_table.data.field('objid')[i] = gal['objid'] category_table.data.field( 'Correctable_Category')[i] = gal['Correctable_Category'] #category_table.writeto('category_table.fits',clobber=True) #create fits file of lower and upper limits for p_features floatcolumn = np.zeros(len(low_hi_limit_list), dtype=float) strcolumn = np.array([' '] * len(low_hi_limit_list), dtype='S24') c2 = Column(name='objid', format='A24', array=strcolumn) c3 = Column(name='low_limit', format='D', array=floatcolumn) c4 = Column(name='high_limit', format='D', array=floatcolumn) cols2 = fits.TableHDU.from_columns([c2, c3, c4]) limit_table = fits.TableHDU.from_columns(cols2.columns) for i, gal in enumerate(low_hi_limit_list): limit_table.data.field('objid')[i] = gal['objid'] limit_table.data.field('low_limit')[i] = gal['low_limit'] limit_table.data.field('high_limit')[i] = gal['hi_limit'] #limit_table.writeto('limits_table.fits',clobber=True) return None
def create_fits_file(msid, data, dtype): """ create a fits file input: msid --- msid data --- a list of list of data dtype --- data type (week, short, or others) output: ./<msid>_<dtype>_data.fits """ cols = col_names cols[1] = msid c1 = Column(name=cols[0], format=col_format[0], array=data[0]) c2 = Column(name=cols[1], format=col_format[1], array=data[1]) c3 = Column(name=cols[2], format=col_format[2], array=data[2]) c4 = Column(name=cols[3], format=col_format[3], array=data[3]) c5 = Column(name=cols[4], format=col_format[4], array=data[4]) c6 = Column(name=cols[5], format=col_format[5], array=data[5]) c7 = Column(name=cols[6], format=col_format[6], array=data[6]) c8 = Column(name=cols[7], format=col_format[7], array=data[7]) c9 = Column(name=cols[8], format=col_format[8], array=data[8]) c10 = Column(name=cols[9], format=col_format[9], array=data[9]) c11 = Column(name=cols[10], format=col_format[10], array=data[10]) c12 = Column(name=cols[11], format=col_format[11], array=data[11]) c13 = Column(name=cols[12], format=col_format[12], array=data[12]) c14 = Column(name=cols[13], format=col_format[13], array=data[13]) c15 = Column(name=cols[14], format=col_format[14], array=data[14]) c16 = Column(name=cols[15], format=col_format[15], array=data[15]) coldefs = pyfits.ColDefs([ c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 ]) tbhdu = pyfits.BinTableHDU.from_columns(coldefs) if dtype == 'week': ofits = msid + '_week_data.fits' elif dtype == 'short': ofits = msid + '_short_data.fits' else: ofits = msid + '_data.fits' mcf.rm_files(ofits) tbhdu.writeto(ofits)
def collate_classifications(ukidss_filename): # This is the fits file that maps all the IDs to one another: # print '' print 'Reading %s ...' % ukidss_filename data = ascii.read(ukidss_filename, 'b') subjects = set(data['subject_id']) # Now set up the collated classification columns. # Each question has a question number from ukidss-0 to ukidss-11 # Each of those questions has some number of possible answers a-0, a-1, etc. # One question = odd features (07) has click boxes where multiple answers can be selected. # This question alone needs to be treated differently than the others. # In GZ2/GZH the answer numbers were themselves unique but in Ouroboros they start at a-0 for each question number. # print 'Creating columns for vote fractions...' # Create column of integer zeros and float zeros intcolumn = np.zeros(len(subjects), dtype=int) floatcolumn = np.zeros(len(subjects), dtype=float) strcolumn = np.array([' '] * len(subjects), dtype='S24') #c01 = Column(name='num_classifications', format='J', array=intcolumn) # c05 = c01, by definition c01 = Column(name='subject_id', format='A24', array=strcolumn) # c05 = c01, by definition c02 = Column(name='t00_smooth_or_features_a0_smooth_frac', format='D', array=floatcolumn) c03 = Column(name='t00_smooth_or_features_a1_features_frac', format='D', array=floatcolumn) c04 = Column(name='t00_smooth_or_features_a2_artifact_frac', format='D', array=floatcolumn) c05 = Column(name='t00_smooth_or_features_count', format='J', array=intcolumn) c06 = Column(name='t01_disk_edge_on_a0_yes_frac', format='D', array=floatcolumn) c07 = Column(name='t01_disk_edge_on_a1_no_frac', format='D', array=floatcolumn) c08 = Column(name='t01_disk_edge_on_count', format='J', array=intcolumn) c09 = Column(name='t02_bar_a0_bar_frac', format='D', array=floatcolumn) c10 = Column(name='t02_bar_a1_no_bar_frac', format='D', array=floatcolumn) c11 = Column(name='t02_bar_count', format='J', array=intcolumn) c12 = Column(name='t03_spiral_a0_spiral_frac', format='D', array=floatcolumn) c13 = Column(name='t03_spiral_a1_no_spiral_frac', format='D', array=floatcolumn) c14 = Column(name='t03_spiral_count', format='J', array=intcolumn) c15 = Column(name='t04_bulge_prominence_a0_no_bulge_frac', format='D', array=floatcolumn) c16 = Column(name='t04_bulge_prominence_a1_just_noticeable_frac', format='D', array=floatcolumn) c17 = Column(name='t04_bulge_prominence_a2_obvious_frac', format='D', array=floatcolumn) c18 = Column(name='t04_bulge_prominence_a3_dominant_frac', format='D', array=floatcolumn) c19 = Column(name='t04_bulge_prominence_count', format='J', array=intcolumn) c20 = Column(name='t05_odd_a0_yes_frac', format='D', array=floatcolumn) c21 = Column(name='t05_odd_a1_no_frac', format='D', array=floatcolumn) c22 = Column(name='t05_odd_count', format='J', array=intcolumn) c23 = Column(name='t06_odd_feature_x0_ring_frac', format='D', array=floatcolumn) c24 = Column(name='t06_odd_feature_x1_lens_frac', format='D', array=floatcolumn) c25 = Column(name='t06_odd_feature_x2_disturbed_frac', format='D', array=floatcolumn) c26 = Column(name='t06_odd_feature_x3_irregular_frac', format='D', array=floatcolumn) c27 = Column(name='t06_odd_feature_x4_other_frac', format='D', array=floatcolumn) c28 = Column(name='t06_odd_feature_x5_merger_frac', format='D', array=floatcolumn) c29 = Column(name='t06_odd_feature_x6_dustlane_frac', format='D', array=floatcolumn) c30 = Column(name='t06_odd_feature_a0_discuss_frac', format='D', array=floatcolumn) c31 = Column(name='t06_odd_feature_count', format='J', array=intcolumn) c32 = Column(name='t07_rounded_a0_completely_round_frac', format='D', array=floatcolumn) c33 = Column(name='t07_rounded_a1_in_between_frac', format='D', array=floatcolumn) c34 = Column(name='t07_rounded_a2_cigar_shaped_frac', format='D', array=floatcolumn) c35 = Column(name='t07_rounded_count', format='J', array=intcolumn) c36 = Column(name='t08_bulge_shape_a0_rounded_frac', format='D', array=floatcolumn) c37 = Column(name='t08_bulge_shape_a1_boxy_frac', format='D', array=floatcolumn) c38 = Column(name='t08_bulge_shape_a2_no_bulge_frac', format='D', array=floatcolumn) c39 = Column(name='t08_bulge_shape_count', format='J', array=intcolumn) c40 = Column(name='t09_arms_winding_a0_tight_frac', format='D', array=floatcolumn) c41 = Column(name='t09_arms_winding_a1_medium_frac', format='D', array=floatcolumn) c42 = Column(name='t09_arms_winding_a2_loose_frac', format='D', array=floatcolumn) c43 = Column(name='t09_arms_winding_count', format='J', array=intcolumn) c44 = Column(name='t10_arms_number_a0_1_frac', format='D', array=floatcolumn) c45 = Column(name='t10_arms_number_a1_2_frac', format='D', array=floatcolumn) c46 = Column(name='t10_arms_number_a2_3_frac', format='D', array=floatcolumn) c47 = Column(name='t10_arms_number_a3_4_frac', format='D', array=floatcolumn) c48 = Column(name='t10_arms_number_a4_more_than_4_frac', format='D', array=floatcolumn) c49 = Column(name='t10_arms_number_a5_cant_tell_frac', format='D', array=floatcolumn) c50 = Column(name='t10_arms_number_count', format='J', array=intcolumn) c51 = Column(name='t11_discuss_a0_yes_frac', format='D', array=floatcolumn) c52 = Column(name='t11_discuss_a1_no_frac', format='D', array=floatcolumn) c53 = Column(name='t11_discuss_count', format='J', array=intcolumn) frac_dict = { 'ukidss-0': { 'a-0': 't00_smooth_or_features_a0_smooth_frac', 'a-1': 't00_smooth_or_features_a1_features_frac', 'a-2': 't00_smooth_or_features_a2_artifact_frac', 'count': 't00_smooth_or_features_count' }, 'ukidss-1': { 'a-0': 't01_disk_edge_on_a0_yes_frac', 'a-1': 't01_disk_edge_on_a1_no_frac', 'count': 't01_disk_edge_on_count' }, 'ukidss-2': { 'a-0': 't02_bar_a0_bar_frac', 'a-1': 't02_bar_a1_no_bar_frac', 'count': 't02_bar_count' }, 'ukidss-3': { 'a-0': 't03_spiral_a0_spiral_frac', 'a-1': 't03_spiral_a1_no_spiral_frac', 'count': 't03_spiral_count' }, 'ukidss-4': { 'a-0': 't04_bulge_prominence_a0_no_bulge_frac', 'a-1': 't04_bulge_prominence_a1_just_noticeable_frac', 'a-2': 't04_bulge_prominence_a2_obvious_frac', 'a-3': 't04_bulge_prominence_a3_dominant_frac', 'count': 't04_bulge_prominence_count' }, 'ukidss-5': { 'a-0': 't05_odd_a0_yes_frac', 'a-1': 't05_odd_a1_no_frac', 'count': 't05_odd_count' }, 'ukidss-6': { 'x-0': 't06_odd_feature_x0_ring_frac', 'x-1': 't06_odd_feature_x1_lens_frac', 'x-2': 't06_odd_feature_x2_disturbed_frac', 'x-3': 't06_odd_feature_x3_irregular_frac', 'x-4': 't06_odd_feature_x4_other_frac', 'x-5': 't06_odd_feature_x5_merger_frac', 'x-6': 't06_odd_feature_x6_dustlane_frac', 'x-0': 't06_odd_feature_a0_discuss_frac', 'count': 't06_odd_feature_count' }, 'ukidss-7': { 'a-0': 't07_rounded_a0_completely_round_frac', 'a-1': 't07_rounded_a1_in_between_frac', 'a-2': 't07_rounded_a2_cigar_shaped_frac', 'count': 't07_rounded_count' }, 'ukidss-8': { 'a-0': 't08_bulge_shape_a0_rounded_frac', 'a-1': 't08_bulge_shape_a1_boxy_frac', 'a-2': 't08_bulge_shape_a2_no_bulge_frac', 'count': 't08_bulge_shape_count' }, 'ukidss-9': { 'a-0': 't09_arms_winding_a0_tight_frac', 'a-1': 't09_arms_winding_a1_medium_frac', 'a-2': 't09_arms_winding_a2_loose_frac', 'count': 't09_arms_winding_count' }, 'ukidss-10': { 'a-0': 't10_arms_number_a0_1_frac', 'a-1': 't10_arms_number_a1_2_frac', 'a-2': 't10_arms_number_a2_3_frac', 'a-3': 't10_arms_number_a3_4_frac', 'a-4': 't10_arms_number_a4_more_than_4_frac', 'a-5': 't10_arms_number_a5_cant_tell_frac', 'count': 't10_arms_number_count' }, 'ukidss-11': { 'a-0': 't11_discuss_a0_yes_frac', 'a-1': 't11_discuss_a1_no_frac', 'count': 't11_discuss_count' } } classifications = pyfits.new_table([ c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37, c38, c39, c40, c41, c42, c43, c44, c45, c46, c47, c48, c49, c50, c51, c52, c53 ]) subjDB = pyfits.new_table(classifications.columns) questions = ['ukidss-%i' % j for j in np.arange(len(frac_dict))] questions.remove('ukidss-6') print 'Counting classifications...' print 'new' for idx, s in enumerate(subjects): if idx % 1000 == 0: print idx, datetime.datetime.now().strftime('%H:%M:%S.%f') # Find each classification for this subject this_subj = (data['subject_id'] == s) subjDB.data.field('subject_id')[idx] = s # Loop over each question in the tree and record count, vote fractions for q in questions: ctr = Counter(data[this_subj][q]) N_total = np.sum(ctr.values()) subjDB.data.field(frac_dict[q]['count'])[idx] = N_total for key in ctr.keys(): try: subjDB.data.field( frac_dict[q][key] )[idx] = ctr[key] / float(N_total) if N_total > 0 else 0. except KeyError: pass # Question 6 (odd features) is treated differently, since more than one answer can be selected ctr6 = Counter(data[this_subj]['ukidss-6']) N_total = np.sum(ctr6.values()) subjDB.data.field(frac_dict['ukidss-6']['count'])[idx] = N_total for key in ctr6.keys(): strkey = str(key) splitkey = strkey.split(';') if len(splitkey) > 1: for sk in splitkey: try: subjDB.data.field(frac_dict['ukidss-6'] [sk])[idx] += ctr6[sk] / float( N_total) if N_total > 0 else 0. except KeyError: pass else: try: subjDB.data.field( frac_dict['ukidss-6'][key] )[idx] = ctr6[key] / float(N_total) if N_total > 0 else 0. except KeyError: pass print 'Finished looping over classifications' # Write final data to FITS file subjDB.writeto('%s/ukidss_classifications_collated.fits' % path_class, clobber=True)