Exemple #1
0
def xmlColorData(infile, outfile, excludeList=None):
    """
    xmlColorData takes a color catalog file produced by the colorCatalog module and marks it with the 
    pipeline/archive protocol markup. Must use at least the xmlStartCat function above to initialise
    the xml file and use the xmlEndCat function to close things up nicely.
    This function is a variation of the xmlData function above but designed specifically for 
    the multicolor catalog produced by the colorCatalog module.  This was necessitated by the
    non-generalised nature of the multicolor catalog, where column headers were dependant on 
    the nature of the data (i.e. different filter names).

    Usage:
      xmlColorData('catalogfile','outputfile')

    K Anderson 02-01-02
      
    """

    if excludeList:
        for field in excludeList:
            print "Note:", field, "being excluded from xml markup", os.path.basename(
                infile)

    catalog = open(infile)
    rows = catalog.readlines()
    catalog.close()

    headerList = []
    headerList = fillHeader(infile)

    # Number of sources is the first field of the last row of the rows list.
    lastrow = rows[len(rows) - 1]
    # This is just getting rid of trailing zeros
    det_sources = string.split(string.split(lastrow)[0], '.')[0]
    xmlcatalog = open(outfile, 'a')
    xmlcatalog.write(" <data Nobjects=\"%s\">\n" % det_sources)

    # the function fillHeader returns a list of tuples  from the header of
    # the color catalog.  The fillHeader function _may_ return tuples of length
    # 2 or 3, but in  the case of the color catalog, length 2 is expected only.
    # The crux of the next block is to find all occurances of different filter names
    # which might be present in a color catalog header and keep a count of those
    # filter names.  it does this by appending new filter names to the list fList.
    # This count will be an attribute of the xml markup.  This is done for the general
    # case where MAG and MAGERR columns are not necessarily paired.

    format2 = '\t<field name=\"%s\">%s</field>\n'

    for f in range(len(rows)):
        if '#' in rows[f]: continue
        cols = string.split(rows[f])
        source = cols[0]
        xmlcatalog.write("  <object number=\"" + source + "\">\n")
        for j in range(1, len(headerList)):
            param = cols[j]
            colId = headerList[j]
            if colId[1] in excludeList:
                continue
            if string.find(colId[1], "_BPZ") != -1 or string.find(
                    colId[1], "_APER_") != -1:
                # the length of the filter lines will be based on this format:
                # single filter,  len == 6: HST_ACS_HRC_F250W_APER_CORR
                # single filter,  len == 6: HST_ACS_HRC_F250W_MAG_BPZ
                # crossed filters len == 7: HST_ACS_HRC_POL120UV_F435W_MAGERR_BPZ
                # raises and exception if len is not one of these values 'cause if'n
                # it ain't, sumpin's horrible wrong.
                filterId = string.split(colId[1], "_")
                if len(filterId) == 6:
                    filterName = filterId[0] + "_" + filterId[
                        1] + "_" + filterId[2] + "_" + filterId[3]
                    fieldName = filterId[4] + "_" + filterId[5]
                    xmlcatalog.write(format2 % (fieldName, param))
                elif len(filterId) == 7:
                    filterName = filterId[0] + "_" + filterId[
                        1] + "_" + filterId[2] + "_" + filterId[
                            3] + "_" + filterId[4]
                    fieldName = filterId[5] + "_" + filterId[6]
                    xmlcatalog.write(format2 % (fieldName, param))
                else:
                    raise SyntaxError, "Error: unexpected filter syntax in multicolor catalog header."
            else:
                column_no, name = colId
                xmlcatalog.write(format2 % (name, param))
        xmlcatalog.write("  </object>\n")
    xmlcatalog.write(" </data>\n")
    xmlcatalog.close()
    return
Exemple #2
0
def xmlData(infile, outfile, excludeList=None):
    """
    xmlData takes a SExtractor catalog file and marks it with the pipeline/archive
    protocol markup. Must use at least the xmlStartCat function above to initialise
    the xml file and use the xmlEndCat function to close things up nicely.
    Caller can pass a list of fields to exclude from the xml markup, if that is
    desired, if not, the list is None.

    Usage:
      xmlData('catalog','outputfile'[,excludeList=['blah']])

      where catalogType is one of SExtractor, BPZ, etc.
      (maybe more later), catalog is the catalog's
      filename, and outputfile is just that.

      
    """

    if excludeList:
        for field in excludeList:
            print "Note:", field, "being excluded from xml markup of", os.path.basename(
                infile)

    catalog = open(infile)
    rows = catalog.readlines()
    catalog.close()

    headerList = []
    headerList = fillHeader(infile)

    # Number of sources is the first field of the last row of the rows list.
    lastrow = rows[len(rows) - 1]
    # This is just getting rid of trailing zeros
    det_sources = string.split(string.split(lastrow)[0], '.')[0]
    xmlcatalog = open(outfile, 'a')
    xmlcatalog.write(" <data Nobjects=\"%s\">\n" % det_sources)

    # the function fillHeader returns a list of tuples  from the header of
    # the Sextractor catalog.  These tuples may be of length 2 or 3, depending
    # on whether a particular parameter has multiple occurrences (eg. FLUX_APER).
    # The formats will be used to handle those cases for writing the xml catalog.

    format2 = '\t<field name=\"%s\">%s</field>\n'
    format3 = '\t<field name=\"%s\" number=\"%s\">%s</field>\n'

    for f in range(len(rows)):
        if '#' in rows[f]: continue
        cols = string.split(rows[f])
        source = cols[0]
        xmlcatalog.write("  <object number=\"" + source + "\">\n")
        for j in range(1, len(headerList)):
            param = cols[j]
            if param.lower() == "nan":
                param = "-9999"
                print "Found a nan in catalog ", os.path.basename(infile)
                print "set to -9999 in the markup."
            if len(headerList[j]) == 2:
                column_no, name = headerList[j]
                if excludeList and name in excludeList:
                    continue
                xmlcatalog.write(format2 % (name, param))

            elif len(headerList[j]) == 3:
                column_no, name, number = headerList[j]
                if excludeList and name in excludeList:
                    continue
                xmlcatalog.write(format3 % (name, str(number), param))
            else:
                raise TypeError, "Incompatible tuple size"

        # In the case where some parameter has multiple columns  of output and is
        # the last parameter listed, this for loop attachs that last parameter name
        # to all remaining values. This behaviour is seen in the case where the parameter
        # VIGNET is the last parameter listed and all subsequent columns in the Sextractor
        # catalog are elements of the VIGNET array.

        for j in range(len(headerList), len(cols)):
            param = cols[j]
            xmlcatalog.write(format2 % (name, param))
        xmlcatalog.write("  </object>\n")
        #break
    xmlcatalog.write(" </data>\n")
    xmlcatalog.close()
    return
Exemple #3
0
    def _magFix(self, catalogFile):
        """This private method receives a path to a catalog file and sifts through the
        MAGERR field looking for values > 10.  It sets the corresponding MAG field = -99 and
        sets that object's MAGERR field to 0.0.  catalogFile is a path not a file object."""

        # fillHeader will return a list of tuples where which looks like
        #
        # [(1, 'NUMBER'),
        # (2, 'X_IMAGE'),
        # (3, 'Y_IMAGE'),
        # ...
        # (12, 'MAG_ISOCOR'),
        # (13, 'MAGERR_ISOCOR'),
        # (14, 'FLUX_APER', 1)
        # (15, 'FLUX_APER', 2),
        # (16, 'FLUX_APER', 3),
        # ...
        # ]
        #
        # The tuples are either of length 2 or 3.  If len is 3, the 3rd item of the
        # tuple is the nth occurance of that column identifier.  This occurs on those
        # columns of MAGs and MAGERRs for a series of increasingly larger apertures.

        # newFieldList will be a list of Numeric arrays containing the columns of the catalogs.
        # This list will contain fields which have not been altered, i.e. all fields other than
        # MAG_* and MAGERR_*, and the new MAG and MAGERR fields which have been corrected.
        # Once the list is complete, it is tuple-ized and send to the tableio pu_data function.

        newFieldList = []
        newMagsList = []
        newMagErrsList = []
        newMagHeaders = []
        newMagErrHeaders = []
        newHeaders = []
        magCols = []
        magErrCols = []
        selectSet = fillHeader(catalogFile)

        print "Searching catalog for required columns, MAG, MAGERR"
        for i in range(len(selectSet)):
            if len(selectSet[i]) == 2:
                column, name = selectSet[i]
                paramNames = name.split("_")
                if "MAG" in paramNames:
                    magCols.append((column, name))
                elif "MAGERR" in paramNames:
                    magErrCols.append((column, name))
                else:
                    oldField = tableio.get_data(catalogFile, (column - 1))
                    newFieldList.append(oldField)
                    newHeaders.append(name)
                    continue
            else:
                column, name, id = selectSet[i]
                paramNames = name.split("_")
                if "MAG" in paramNames:
                    magCols.append((column, name, id))
                elif "MAGERR" in paramNames:
                    magErrCols.append((column, name, id))
                else:
                    oldField = tableio.get_data(catalogFile, (column - 1))
                    newFieldList.append(oldField)
                    newHeaders.append(name)
                    continue

        # We now have
        #  catalog field  --> list
        # --------------------------------
        #        MAG_*    --> magCols
        #     MAGERR_*    --> magErrCols
        #
        # The algorithm will be to step through the magErrCols columns, extracting those fields
        # via get_data and getting Numeric arrays.  The matching mag columns are slurped as well.
        # We search the magErrCols arrays looking for >= 10 values and then marking the those mags
        # as -99.0 and the matching magerrs as 0.0
        # See Bugzilla bug #2700

        for item in magErrCols:
            magErrAperId = None
            # item may be of len 2 or 3
            if len(item) == 2:
                magErrColId, magErrColName = item
            else:
                magErrColId, magErrColName, magErrAperId = item

            magErrKind = magErrColName.split("_")[1]  # ISO, ISOCORR, etc.

            print "\n\nMAG type:", magErrKind
            if magErrAperId: print magErrColName, "Aper id is", magErrAperId
            print "Getting\t", magErrColName, "\tfield", magErrColId

            # MAGERR array:
            magErrs = tableio.get_data(catalogFile, magErrColId - 1)

            matchingMagColName = None
            matchingMagColId = None

            #----------------------- Search for matching MAG_* field -----------------------#

            for magitems in magCols:

                # We know that the magErrColName is MAGERR and if magErrNameId is true then
                # the tuple is of len 3, i.e. a MAGERR_APER field.  We look for the matching
                # MAG_APER field id, 1, 2, 3... etc.

                if len(magitems) == 3:
                    magColId, magColName, magAperId = magitems
                    if magColName == "MAG_" + magErrKind:
                        matchingMagColName = magColName
                        #print "Found matching field type:",magColName,"in field",magColId
                        if magAperId == magErrAperId:
                            print "Found matching aperture id."
                            print "MAG_APER id: ", magAperId, "MAGERR_APER id: ", magErrAperId
                            matchingMagColId = magColId
                            matchingMags = tableio.get_data(
                                catalogFile, magColId - 1)
                            break
                    else:
                        continue
                else:
                    magColId, magColName = magitems
                    if magColName == "MAG_" + magErrKind:
                        print "Found matching field type:", magColName, "in field", magColId
                        matchingMagColName = magColName
                        matchingMagColId = magColId
                        matchingMags = tableio.get_data(
                            catalogFile, magColId - 1)
                        break
                    else:
                        continue

            #--------------------------------------------------------------------------------#

            print " MAG err field:", magErrColName, magErrColId
            print "     Mag field:", matchingMagColName, matchingMagColId

            # Now the grunt work on the arrays,
            # magErrs, matchingMags
            #
            # update: flagging all MAGs as -99 when the corresponding MAGERR > 10
            # introduced a bug which unintentionally reset the magnitudes
            # SExtractor had flagged with a MAG = 99.0 and a MAGERR = 99.0
            # This now checks for a MAGERR of 99 and does not reset the MAG value
            # if MAGERR = 99.0 but does for all other MAGERRS > 10.0

            badMagErrs1 = Numeric.where(magErrs >= 10, 1, 0)
            badMagErrs2 = Numeric.where(magErrs != 99.0, 1, 0)
            badMagErrs = badMagErrs1 * badMagErrs2
            del badMagErrs1, badMagErrs2
            newMags = Numeric.where(badMagErrs, -99.0, matchingMags)
            newMagErrs = Numeric.where(badMagErrs, 0.0, magErrs)

            newMagsList.append(newMags)
            newMagHeaders.append(matchingMagColName)
            newMagErrsList.append(newMagErrs)
            newMagErrHeaders.append(magErrColName)

        # concatenate the lists.  This is done to preserve the MAG_APER and MAGERR_APER
        # grouping of the original SExtractor catalog.

        newFieldList = newFieldList + newMagsList
        newFieldList = newFieldList + newMagErrsList
        newHeaders = newHeaders + newMagHeaders
        newHeaders = newHeaders + newMagErrHeaders

        newVariables = tuple(newFieldList)

        # rename the old catalog file as catalogFile.old
        os.rename(catalogFile, catalogFile + ".old")
        self.outputList[os.path.basename(catalogFile) +
                        ".old"] = [os.path.basename(catalogFile)]
        fob = open(catalogFile, 'w')
        fob.write("## " + ptime() + "\n")
        fob.write("## " + self.modName +
                  " catalog regenerated by _magFix method.\n")
        fob.write(
            '## (This file was generated automatically by the ACS Pipeline.)\n##\n'
        )
        fob.write(
            "## This catalog has been photometrically corrected to remove\n")
        fob.write("## 'bad' magnitude values.\n")
        fob.write("##\n")
        for i in range(len(newHeaders)):
            fob.write("# " + str(i + 1) + "\t" + newHeaders[i] + "\n")
        fob.close()
        tableio.put_data(catalogFile, newVariables, append="yes")

        return
Exemple #4
0
    def splice(self):
        """Method splices the photo-z catalog (.bpz) file and the multicolor.cat
	   file to produce a final photometric redshift catalog.  Raises an exception
	   if one or both files cannot be found.  All these files will be in dir defined 
	   by self.obsCats.
        """

        self.bpzCat = os.path.join(self.obsCats, 'bpz.cat')
        if not os.path.exists(self.colorCat):
            raise IOError, "Multicolor catalog file not found."
        elif not os.path.exists(self.bpzCat):
            raise IOError, "BPZ catalog file not found."

        # Use the fillHeader function to get a list of header lines from each catalog.

        bpzHeaders = fillHeader(self.bpzCat)
        colorHeaders = fillHeader(self.colorCat)
        allH = bpzHeaders + colorHeaders

        # delete the extra element containing the 'NUMBER' column.

        for i in range(len(allH)):
            col, name = allH[i]
            if name == 'NUMBER':
                del allH[i]
                break

        # Renumber the columns via a counter

        for i in range(len(allH)):
            col, name = allH[i]
            allH[i] = (i + 1, name)

        # open the new catalog file and write these headers

        newCat = open(os.path.join(self.obsCats, 'final_photometry.cat'), 'w')
        newCat.write('## Photometry Catalog for Observation: ' + self.obsName +
                     '\n')
        newCat.write('## Generated by the ACS Pipeline, ' + ptime() + '\n')
        newCat.write('##\n')

        f1 = open(self.bpzCat)
        while 1:
            line = f1.readline()
            fields = string.split(line)
            if fields[0] == '##':
                newCat.write(line)
            else:
                break

        f1.close()
        del f1

        for col, name in allH:
            newCat.write('# ' + str(col) + '\t' + name + '\n')

        # slurp up the data from each catalog.

        cat1 = open(self.bpzCat).readlines()
        cat2 = open(self.colorCat).readlines()

        # grab just the data lines
        cat1Data = []
        cat2Data = []

        for line in cat1:
            if '#' in line:
                pass
            else:
                cat1Data.append(string.rstrip(line))

        # Delete the extra field 'NUMBER' from colorCat data as was done (above) for the header.

        for line in cat2:
            if '#' in line:
                pass
            else:
                fields = string.split(string.rstrip(line))
                del fields[0]
                newline = string.joinfields(fields)
                cat2Data.append(newline)

        # Write the concatenated line to the new catalog

        if len(cat1Data) != len(cat2Data):
            raise IndexError, ("Catalog length mismatch.")

        for i in range(len(cat1Data)):
            newCat.write(cat1Data[i] + '  ' + cat2Data[i] + '\n')
        newCat.close()
        return
    def _hackit(self, cat, keep_apertures=[1, 2, 3]):
        """hack the detectionCatalog.cat file to take out a bunch of the aperture data. 
        Default is only to keep the first three apertures in the final catalog but caller
        can change this by passing a keep_apertures list (this is aperture number and *not*
        the radius).  This will hack the columns indicated by 

        MAG_APER
        MAGERR_APER
        FLUX_APER
        FLUXERR_APER
        """
        dir, old_file = os.path.split(cat)
        headerList = []
        headerList = pUtil.fillHeader(
            cat)  # this returns a list of the catalog header.

        # go through the header and find the columns to keep.  We are looking for
        #'FLUX_APER', 1)
        #'FLUX_APER', 2)
        #'FLUX_APER', 3)
        #'FLUXERR_APER', 1)
        #'FLUXERR_APER', 2)
        #'FLUXERR_APER', 3)
        #'MAG_APER', 1)
        #'MAG_APER', 2)
        #'MAG_APER', 3)
        #'MAGERR_APER', 1)
        #'MAGERR_APER', 2)
        #'MAGERR_APER', 3)

        newheader = []
        for i in headerList:
            if len(i) == 2:
                newheader.append(i)
            else:
                if i[2] not in keep_apertures:
                    continue
                else:
                    newheader.append(i)

        #return newheader

        cols = []
        for i in newheader:
            cols.append(i[0] - 1)

        new_rows = []
        for row in open(cat).readlines():
            if '#' in row: continue
            fields = row.split()
            arow = ''
            for column in cols:
                arow += '  ' + fields[column]
            new_rows.append(arow)

        # OK, we have the newheader and the new data .
        # We need to renumber the columns in the header.  the newheader
        # list has the old catalog's column identifiers and that needs to get fixed.

        new_newheader = []
        for i in range(len(newheader)):
            if len(newheader[i]) == 2:
                new_newheader.append((i + 1, newheader[i][1]))
            else:
                new_newheader.append((i + 1, newheader[i][1], newheader[i][2]))

        # Now we are just going to overwrite the original detectionImage.cat file
        # (well, at least whatever was passed to this func, anyway)

        file = open(cat, 'w')
        self.logfile.write(
            "private method _hackit, trimming aperture parameters")
        self.logfile.write("_hackit overwriting detectionImage.cat file.")
        file.write("## Date: " + pUtil.ptime() + "\n")
        file.write(
            "## This file has been modified from its original form by the WFP Pipeline.\n"
        )
        file.write("## Some aperture fields have been removed.\n")
        file.write("## This file written by the WFP Pipeline.  Do not edit.\n")
        for item in new_newheader:
            file.write('# ' + str(item[0]) + '\t' + str(item[1]) + '\n')
        for row in new_rows:
            file.write(row + '\n')
        file.close()
        return