コード例 #1
ファイル: xmlUtil.py プロジェクト: XingxingHuang/APLUS
def xmlColorData(infile, outfile, excludeList=None):
    xmlColorData takes a color catalog file produced by the colorCatalog module and marks it with the 
    pipeline/archive protocol markup. Must use at least the xmlStartCat function above to initialise
    the xml file and use the xmlEndCat function to close things up nicely.
    This function is a variation of the xmlData function above but designed specifically for 
    the multicolor catalog produced by the colorCatalog module.  This was necessitated by the
    non-generalised nature of the multicolor catalog, where column headers were dependant on 
    the nature of the data (i.e. different filter names).


    K Anderson 02-01-02

    if excludeList:
        for field in excludeList:
            print "Note:", field, "being excluded from xml markup", os.path.basename(

    catalog = open(infile)
    rows = catalog.readlines()

    headerList = []
    headerList = fillHeader(infile)

    # Number of sources is the first field of the last row of the rows list.
    lastrow = rows[len(rows) - 1]
    # This is just getting rid of trailing zeros
    det_sources = string.split(string.split(lastrow)[0], '.')[0]
    xmlcatalog = open(outfile, 'a')
    xmlcatalog.write(" <data Nobjects=\"%s\">\n" % det_sources)

    # the function fillHeader returns a list of tuples  from the header of
    # the color catalog.  The fillHeader function _may_ return tuples of length
    # 2 or 3, but in  the case of the color catalog, length 2 is expected only.
    # The crux of the next block is to find all occurances of different filter names
    # which might be present in a color catalog header and keep a count of those
    # filter names.  it does this by appending new filter names to the list fList.
    # This count will be an attribute of the xml markup.  This is done for the general
    # case where MAG and MAGERR columns are not necessarily paired.

    format2 = '\t<field name=\"%s\">%s</field>\n'

    for f in range(len(rows)):
        if '#' in rows[f]: continue
        cols = string.split(rows[f])
        source = cols[0]
        xmlcatalog.write("  <object number=\"" + source + "\">\n")
        for j in range(1, len(headerList)):
            param = cols[j]
            colId = headerList[j]
            if colId[1] in excludeList:
            if string.find(colId[1], "_BPZ") != -1 or string.find(
                    colId[1], "_APER_") != -1:
                # the length of the filter lines will be based on this format:
                # single filter,  len == 6: HST_ACS_HRC_F250W_APER_CORR
                # single filter,  len == 6: HST_ACS_HRC_F250W_MAG_BPZ
                # crossed filters len == 7: HST_ACS_HRC_POL120UV_F435W_MAGERR_BPZ
                # raises and exception if len is not one of these values 'cause if'n
                # it ain't, sumpin's horrible wrong.
                filterId = string.split(colId[1], "_")
                if len(filterId) == 6:
                    filterName = filterId[0] + "_" + filterId[
                        1] + "_" + filterId[2] + "_" + filterId[3]
                    fieldName = filterId[4] + "_" + filterId[5]
                    xmlcatalog.write(format2 % (fieldName, param))
                elif len(filterId) == 7:
                    filterName = filterId[0] + "_" + filterId[
                        1] + "_" + filterId[2] + "_" + filterId[
                            3] + "_" + filterId[4]
                    fieldName = filterId[5] + "_" + filterId[6]
                    xmlcatalog.write(format2 % (fieldName, param))
                    raise SyntaxError, "Error: unexpected filter syntax in multicolor catalog header."
                column_no, name = colId
                xmlcatalog.write(format2 % (name, param))
        xmlcatalog.write("  </object>\n")
    xmlcatalog.write(" </data>\n")
コード例 #2
ファイル: xmlUtil.py プロジェクト: XingxingHuang/APLUS
def xmlData(infile, outfile, excludeList=None):
    xmlData takes a SExtractor catalog file and marks it with the pipeline/archive
    protocol markup. Must use at least the xmlStartCat function above to initialise
    the xml file and use the xmlEndCat function to close things up nicely.
    Caller can pass a list of fields to exclude from the xml markup, if that is
    desired, if not, the list is None.


      where catalogType is one of SExtractor, BPZ, etc.
      (maybe more later), catalog is the catalog's
      filename, and outputfile is just that.


    if excludeList:
        for field in excludeList:
            print "Note:", field, "being excluded from xml markup of", os.path.basename(

    catalog = open(infile)
    rows = catalog.readlines()

    headerList = []
    headerList = fillHeader(infile)

    # Number of sources is the first field of the last row of the rows list.
    lastrow = rows[len(rows) - 1]
    # This is just getting rid of trailing zeros
    det_sources = string.split(string.split(lastrow)[0], '.')[0]
    xmlcatalog = open(outfile, 'a')
    xmlcatalog.write(" <data Nobjects=\"%s\">\n" % det_sources)

    # the function fillHeader returns a list of tuples  from the header of
    # the Sextractor catalog.  These tuples may be of length 2 or 3, depending
    # on whether a particular parameter has multiple occurrences (eg. FLUX_APER).
    # The formats will be used to handle those cases for writing the xml catalog.

    format2 = '\t<field name=\"%s\">%s</field>\n'
    format3 = '\t<field name=\"%s\" number=\"%s\">%s</field>\n'

    for f in range(len(rows)):
        if '#' in rows[f]: continue
        cols = string.split(rows[f])
        source = cols[0]
        xmlcatalog.write("  <object number=\"" + source + "\">\n")
        for j in range(1, len(headerList)):
            param = cols[j]
            if param.lower() == "nan":
                param = "-9999"
                print "Found a nan in catalog ", os.path.basename(infile)
                print "set to -9999 in the markup."
            if len(headerList[j]) == 2:
                column_no, name = headerList[j]
                if excludeList and name in excludeList:
                xmlcatalog.write(format2 % (name, param))

            elif len(headerList[j]) == 3:
                column_no, name, number = headerList[j]
                if excludeList and name in excludeList:
                xmlcatalog.write(format3 % (name, str(number), param))
                raise TypeError, "Incompatible tuple size"

        # In the case where some parameter has multiple columns  of output and is
        # the last parameter listed, this for loop attachs that last parameter name
        # to all remaining values. This behaviour is seen in the case where the parameter
        # VIGNET is the last parameter listed and all subsequent columns in the Sextractor
        # catalog are elements of the VIGNET array.

        for j in range(len(headerList), len(cols)):
            param = cols[j]
            xmlcatalog.write(format2 % (name, param))
        xmlcatalog.write("  </object>\n")
    xmlcatalog.write(" </data>\n")
コード例 #3
    def _magFix(self, catalogFile):
        """This private method receives a path to a catalog file and sifts through the
        MAGERR field looking for values > 10.  It sets the corresponding MAG field = -99 and
        sets that object's MAGERR field to 0.0.  catalogFile is a path not a file object."""

        # fillHeader will return a list of tuples where which looks like
        # [(1, 'NUMBER'),
        # (2, 'X_IMAGE'),
        # (3, 'Y_IMAGE'),
        # ...
        # (12, 'MAG_ISOCOR'),
        # (13, 'MAGERR_ISOCOR'),
        # (14, 'FLUX_APER', 1)
        # (15, 'FLUX_APER', 2),
        # (16, 'FLUX_APER', 3),
        # ...
        # ]
        # The tuples are either of length 2 or 3.  If len is 3, the 3rd item of the
        # tuple is the nth occurance of that column identifier.  This occurs on those
        # columns of MAGs and MAGERRs for a series of increasingly larger apertures.

        # newFieldList will be a list of Numeric arrays containing the columns of the catalogs.
        # This list will contain fields which have not been altered, i.e. all fields other than
        # MAG_* and MAGERR_*, and the new MAG and MAGERR fields which have been corrected.
        # Once the list is complete, it is tuple-ized and send to the tableio pu_data function.

        newFieldList = []
        newMagsList = []
        newMagErrsList = []
        newMagHeaders = []
        newMagErrHeaders = []
        newHeaders = []
        magCols = []
        magErrCols = []
        selectSet = fillHeader(catalogFile)

        print "Searching catalog for required columns, MAG, MAGERR"
        for i in range(len(selectSet)):
            if len(selectSet[i]) == 2:
                column, name = selectSet[i]
                paramNames = name.split("_")
                if "MAG" in paramNames:
                    magCols.append((column, name))
                elif "MAGERR" in paramNames:
                    magErrCols.append((column, name))
                    oldField = tableio.get_data(catalogFile, (column - 1))
                column, name, id = selectSet[i]
                paramNames = name.split("_")
                if "MAG" in paramNames:
                    magCols.append((column, name, id))
                elif "MAGERR" in paramNames:
                    magErrCols.append((column, name, id))
                    oldField = tableio.get_data(catalogFile, (column - 1))

        # We now have
        #  catalog field  --> list
        # --------------------------------
        #        MAG_*    --> magCols
        #     MAGERR_*    --> magErrCols
        # The algorithm will be to step through the magErrCols columns, extracting those fields
        # via get_data and getting Numeric arrays.  The matching mag columns are slurped as well.
        # We search the magErrCols arrays looking for >= 10 values and then marking the those mags
        # as -99.0 and the matching magerrs as 0.0
        # See Bugzilla bug #2700

        for item in magErrCols:
            magErrAperId = None
            # item may be of len 2 or 3
            if len(item) == 2:
                magErrColId, magErrColName = item
                magErrColId, magErrColName, magErrAperId = item

            magErrKind = magErrColName.split("_")[1]  # ISO, ISOCORR, etc.

            print "\n\nMAG type:", magErrKind
            if magErrAperId: print magErrColName, "Aper id is", magErrAperId
            print "Getting\t", magErrColName, "\tfield", magErrColId

            # MAGERR array:
            magErrs = tableio.get_data(catalogFile, magErrColId - 1)

            matchingMagColName = None
            matchingMagColId = None

            #----------------------- Search for matching MAG_* field -----------------------#

            for magitems in magCols:

                # We know that the magErrColName is MAGERR and if magErrNameId is true then
                # the tuple is of len 3, i.e. a MAGERR_APER field.  We look for the matching
                # MAG_APER field id, 1, 2, 3... etc.

                if len(magitems) == 3:
                    magColId, magColName, magAperId = magitems
                    if magColName == "MAG_" + magErrKind:
                        matchingMagColName = magColName
                        #print "Found matching field type:",magColName,"in field",magColId
                        if magAperId == magErrAperId:
                            print "Found matching aperture id."
                            print "MAG_APER id: ", magAperId, "MAGERR_APER id: ", magErrAperId
                            matchingMagColId = magColId
                            matchingMags = tableio.get_data(
                                catalogFile, magColId - 1)
                    magColId, magColName = magitems
                    if magColName == "MAG_" + magErrKind:
                        print "Found matching field type:", magColName, "in field", magColId
                        matchingMagColName = magColName
                        matchingMagColId = magColId
                        matchingMags = tableio.get_data(
                            catalogFile, magColId - 1)


            print " MAG err field:", magErrColName, magErrColId
            print "     Mag field:", matchingMagColName, matchingMagColId

            # Now the grunt work on the arrays,
            # magErrs, matchingMags
            # update: flagging all MAGs as -99 when the corresponding MAGERR > 10
            # introduced a bug which unintentionally reset the magnitudes
            # SExtractor had flagged with a MAG = 99.0 and a MAGERR = 99.0
            # This now checks for a MAGERR of 99 and does not reset the MAG value
            # if MAGERR = 99.0 but does for all other MAGERRS > 10.0

            badMagErrs1 = Numeric.where(magErrs >= 10, 1, 0)
            badMagErrs2 = Numeric.where(magErrs != 99.0, 1, 0)
            badMagErrs = badMagErrs1 * badMagErrs2
            del badMagErrs1, badMagErrs2
            newMags = Numeric.where(badMagErrs, -99.0, matchingMags)
            newMagErrs = Numeric.where(badMagErrs, 0.0, magErrs)


        # concatenate the lists.  This is done to preserve the MAG_APER and MAGERR_APER
        # grouping of the original SExtractor catalog.

        newFieldList = newFieldList + newMagsList
        newFieldList = newFieldList + newMagErrsList
        newHeaders = newHeaders + newMagHeaders
        newHeaders = newHeaders + newMagErrHeaders

        newVariables = tuple(newFieldList)

        # rename the old catalog file as catalogFile.old
        os.rename(catalogFile, catalogFile + ".old")
        self.outputList[os.path.basename(catalogFile) +
                        ".old"] = [os.path.basename(catalogFile)]
        fob = open(catalogFile, 'w')
        fob.write("## " + ptime() + "\n")
        fob.write("## " + self.modName +
                  " catalog regenerated by _magFix method.\n")
            '## (This file was generated automatically by the ACS Pipeline.)\n##\n'
            "## This catalog has been photometrically corrected to remove\n")
        fob.write("## 'bad' magnitude values.\n")
        for i in range(len(newHeaders)):
            fob.write("# " + str(i + 1) + "\t" + newHeaders[i] + "\n")
        tableio.put_data(catalogFile, newVariables, append="yes")

コード例 #4
ファイル: photoz_acex.py プロジェクト: XingxingHuang/APLUS
    def splice(self):
        """Method splices the photo-z catalog (.bpz) file and the multicolor.cat
	   file to produce a final photometric redshift catalog.  Raises an exception
	   if one or both files cannot be found.  All these files will be in dir defined 
	   by self.obsCats.

        self.bpzCat = os.path.join(self.obsCats, 'bpz.cat')
        if not os.path.exists(self.colorCat):
            raise IOError, "Multicolor catalog file not found."
        elif not os.path.exists(self.bpzCat):
            raise IOError, "BPZ catalog file not found."

        # Use the fillHeader function to get a list of header lines from each catalog.

        bpzHeaders = fillHeader(self.bpzCat)
        colorHeaders = fillHeader(self.colorCat)
        allH = bpzHeaders + colorHeaders

        # delete the extra element containing the 'NUMBER' column.

        for i in range(len(allH)):
            col, name = allH[i]
            if name == 'NUMBER':
                del allH[i]

        # Renumber the columns via a counter

        for i in range(len(allH)):
            col, name = allH[i]
            allH[i] = (i + 1, name)

        # open the new catalog file and write these headers

        newCat = open(os.path.join(self.obsCats, 'final_photometry.cat'), 'w')
        newCat.write('## Photometry Catalog for Observation: ' + self.obsName +
        newCat.write('## Generated by the ACS Pipeline, ' + ptime() + '\n')

        f1 = open(self.bpzCat)
        while 1:
            line = f1.readline()
            fields = string.split(line)
            if fields[0] == '##':

        del f1

        for col, name in allH:
            newCat.write('# ' + str(col) + '\t' + name + '\n')

        # slurp up the data from each catalog.

        cat1 = open(self.bpzCat).readlines()
        cat2 = open(self.colorCat).readlines()

        # grab just the data lines
        cat1Data = []
        cat2Data = []

        for line in cat1:
            if '#' in line:

        # Delete the extra field 'NUMBER' from colorCat data as was done (above) for the header.

        for line in cat2:
            if '#' in line:
                fields = string.split(string.rstrip(line))
                del fields[0]
                newline = string.joinfields(fields)

        # Write the concatenated line to the new catalog

        if len(cat1Data) != len(cat2Data):
            raise IndexError, ("Catalog length mismatch.")

        for i in range(len(cat1Data)):
            newCat.write(cat1Data[i] + '  ' + cat2Data[i] + '\n')
コード例 #5
    def _hackit(self, cat, keep_apertures=[1, 2, 3]):
        """hack the detectionCatalog.cat file to take out a bunch of the aperture data. 
        Default is only to keep the first three apertures in the final catalog but caller
        can change this by passing a keep_apertures list (this is aperture number and *not*
        the radius).  This will hack the columns indicated by 

        dir, old_file = os.path.split(cat)
        headerList = []
        headerList = pUtil.fillHeader(
            cat)  # this returns a list of the catalog header.

        # go through the header and find the columns to keep.  We are looking for
        #'FLUX_APER', 1)
        #'FLUX_APER', 2)
        #'FLUX_APER', 3)
        #'FLUXERR_APER', 1)
        #'FLUXERR_APER', 2)
        #'FLUXERR_APER', 3)
        #'MAG_APER', 1)
        #'MAG_APER', 2)
        #'MAG_APER', 3)
        #'MAGERR_APER', 1)
        #'MAGERR_APER', 2)
        #'MAGERR_APER', 3)

        newheader = []
        for i in headerList:
            if len(i) == 2:
                if i[2] not in keep_apertures:

        #return newheader

        cols = []
        for i in newheader:
            cols.append(i[0] - 1)

        new_rows = []
        for row in open(cat).readlines():
            if '#' in row: continue
            fields = row.split()
            arow = ''
            for column in cols:
                arow += '  ' + fields[column]

        # OK, we have the newheader and the new data .
        # We need to renumber the columns in the header.  the newheader
        # list has the old catalog's column identifiers and that needs to get fixed.

        new_newheader = []
        for i in range(len(newheader)):
            if len(newheader[i]) == 2:
                new_newheader.append((i + 1, newheader[i][1]))
                new_newheader.append((i + 1, newheader[i][1], newheader[i][2]))

        # Now we are just going to overwrite the original detectionImage.cat file
        # (well, at least whatever was passed to this func, anyway)

        file = open(cat, 'w')
            "private method _hackit, trimming aperture parameters")
        self.logfile.write("_hackit overwriting detectionImage.cat file.")
        file.write("## Date: " + pUtil.ptime() + "\n")
            "## This file has been modified from its original form by the WFP Pipeline.\n"
        file.write("## Some aperture fields have been removed.\n")
        file.write("## This file written by the WFP Pipeline.  Do not edit.\n")
        for item in new_newheader:
            file.write('# ' + str(item[0]) + '\t' + str(item[1]) + '\n')
        for row in new_rows:
            file.write(row + '\n')