Example #1
0
def read(self, filename, definition=3, verbose=True, smart_typing=False):
    '''
    Read a table from a IPAC file

    Required Arguments:

        *filename*: [ string ]
            The IPAC file to read the table from

    Optional Keyword Arguments:

        *definition*: [ 1 | 2 | 3 ]

            The definition to use to read IPAC tables:

            1: any character below a pipe symbol belongs to the
               column on the left, and any characters below the
               first pipe symbol belong to the first column.
            2: any character below a pipe symbol belongs to the
               column on the right.
            3: no characters should be present below the pipe
               symbols (default).

        *smart_typing*: [ True | False ]

            Whether to try and save memory by using the smallest
            integer type that can contain a column. For example,
            a column containing only values between 0 and 255 can
            be stored as an unsigned 8-bit integer column. The
            default is false, so that all integer columns are
            stored as 64-bit integers.
    '''

    if not definition in [1, 2, 3]:
        raise Exception("definition should be one of 1/2/3")

    self.reset()

    # Open file for reading
    f = file(filename, 'rb')

    line = f.readline()

    # Read in comments and keywords
    while True:

        char1 = line[0:1]
        char2 = line[1:2]

        if char1 <> '\\':
            break

        if char2==' ' or not '=' in line: # comment
            self.add_comment(line[1:])
        else:          # keyword
            pos = line.index('=')
            key, value = line[1:pos], line[pos + 1:]
            value = value.replace("'", "").replace('"', '')
            key, value = key.strip(), value.strip()
            self.add_keyword(key, value)

        line = f.readline()


    # Column headers

    l = 0
    units = {}
    nulls = {}

    while True:

        char1 = line[0:1]

        if char1 <> "|":
            break

        if l==0: # Column names

            line = line.replace('-', ' ').strip()

            # Find all pipe symbols
            pipes = []
            for i, c in enumerate(line):
                if c=='|':
                    pipes.append(i)

            # Find all names
            names = line.replace(" ", "").split("|")[1:-1]

        elif l==1: # Data types

            line = line.replace('-', ' ').strip()

            types = dict(zip(names, \
                line.replace(" ", "").split("|")[1:-1]))

        elif l==2: # Units

            units = dict(zip(names, \
                line.replace(" ", "").split("|")[1:-1]))

        else: # Null values

            nulls = dict(zip(names, \
                line.replace(" ", "").split("|")[1:-1]))

        line = f.readline()
        l = l + 1

    if len(pipes) <> len(names) + 1:
        raise "An error occured while reading the IPAC table"

    if len(units)==0:
        for name in names:
            units[name]=''

    if len(nulls)==0:
        nulls_given = False
        for name in names:
            nulls[name]=''
    else:
        nulls_given = True

    # Pre-compute numpy column types
    numpy_types = {}
    for name in names:
        numpy_types[name] = type_dict[types[name]]

    # Data

    array = {}
    for name in names:
        array[name] = []


    while True:

        if line.strip() == '':
            break

        for i in range(len(pipes)-1):

            first, last = pipes[i] + 1, pipes[i + 1]

            if definition==1:
                last = last + 1
                if first==1:
                    first=0
            elif definition==2:
                first = first - 1

            if i + 1==len(pipes)-1:
                item = line[first:].strip()
            else:
                item = line[first:last].strip()

            if item.lower() == 'null' and nulls[names[i]] <> 'null':
                if nulls[names[i]] == '':
                    if verbose:
                        print "WARNING: found unexpected 'null' value. Setting null value for column "+names[i]+" to 'null'"
                    nulls[names[i]] = 'null'
                    nulls_given = True
                else:
                    raise Exception("null value for column "+names[i]+" is set to "+nulls[i]+" but found value 'null'")
            array[names[i]].append(item)

        line = f.readline()

    # Check that null values are of the correct type
    if nulls_given:
        for name in names:
            try:
                n = numpy_types[name](nulls[name])
                nulls[name] = n
            except:
                n = invalid[numpy_types[name]]
                for i, item in enumerate(array[name]):
                    if item == nulls[name]:
                        array[name][i] = n
                if verbose:
                    if len(str(nulls[name]).strip()) == 0:
                        print "WARNING: empty null value for column "+name+" set to "+str(n)
                    else:
                        print "WARNING: null value for column "+name+" changed from "+str(nulls[name])+" to "+str(n)
                nulls[name] = n

    # Convert to numpy arrays
    for name in names:

        if smart_typing:

            dtype = None

            low = min(array[name])
            high = max(array[name])

            if types[name] in ['i', 'int', 'integer']:
                low, high = long(low), long(high)
                for nt in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64]:
                    if low >= np.iinfo(nt).min and high <= np.iinfo(nt).max:
                        dtype = nt
                        break
            elif types[name] in ['long']:
                low, high = long(low), long(high)
                for nt in [np.uint64, np.int64]:
                    if low >= np.iinfo(nt).min and high <= np.iinfo(nt).max:
                        dtype = nt
                        break
            elif types[name] in ['float', 'real']:
                low, high = float(low), float(high)
                for nt in [np.float32, np.float64]:
                    if low >= np.finfo(nt).min and high <= np.finfo(nt).max:
                        dtype = nt
                        break
            else:
                dtype = type_dict[types[name]]

        else:
            dtype = type_dict[types[name]]

            # If max integer is larger than 2**63 then use uint64
            if dtype == np.int64:
                if max([long(x) for x in array[name]]) > 2**63:
                    dtype = np.uint64
                    warnings.warn("using type uint64 for column %s" % name)

        array[name] = np.array(array[name], dtype=dtype)

        if smart_typing:
            if np.min(array) >= 0 and np.max(array) <= 1:
                array = array == 1

        if self._masked:
            self.add_column(name, array[name], \
                mask=smart_mask(array[name], nulls[name]), unit=units[name], \
                fill=nulls[name])
        else:
            self.add_column(name, array[name], \
                null=nulls[name], unit=units[name])
Example #2
0
def read(self, filename, hdu=None, memmap=False, verbose=True):
    '''
    Read a table from a FITS file

    Required Arguments:

        *filename*: [ string ]
            The FITS file to read the table from

    Optional Keyword Arguments:

        *hdu*: [ integer ]
            The HDU to read from the FITS file (this is only required
            if there are more than one table in the FITS file)

        *memmap*: [ bool ]
            Whether PyFITS should use memory mapping
    '''

    _check_pyfits_installed()

    self.reset()

    # If no hdu is requested, check that there is only one table
    if not hdu:
        tables = _list_tables(filename)
        if len(tables) == 0:
            raise Exception("No tables in file")
        elif len(tables) == 1:
            hdu = tables.keys()[0]
        else:
            raise TableException(tables, 'hdu')

    hdulist = pyfits.open(filename, memmap=memmap)
    hdu = hdulist[hdu]

    table = hdu.data
    header = hdu.header
    columns = hdu.columns

    # Construct dtype for table

    dtype = []

    for i in range(len(hdu.data.dtype)):

        name = hdu.data.dtype.names[i]
        type = hdu.data.dtype[name]
        if type.subdtype:
            type, shape = type.subdtype
        else:
            shape = ()

        # Get actual FITS format and zero-point
        format, bzero = hdu.columns[i].format, hdu.columns[i].bzero

        # Remove numbers from format, to find just type
        format = format.strip("1234567890.")

        if type.type is np.string_ and format in ['I', 'F', 'E', 'D']:
            if format == 'I':
                type = np.int64
            elif format in ['F', 'E']:
                type = np.float32
            elif format == 'D':
                type = np.float64

        if format == 'X' and type.type == np.uint8:
            type = np.bool
            if len(shape) == 1:
                shape = (shape[0] * 8,)

        if format == 'L':
            type = np.bool

        if bzero and format in ['B', 'I', 'J']:
            if format == 'B' and bzero == -128:
                dtype.append((name, np.int8, shape))
            elif format == 'I' and bzero == - np.iinfo(np.int16).min:
                dtype.append((name, np.uint16, shape))
            elif format == 'J' and bzero == - np.iinfo(np.int32).min:
                dtype.append((name, np.uint32, shape))
            else:
                dtype.append((name, type, shape))
        else:
            dtype.append((name, type, shape))

    dtype = np.dtype(dtype)

    if self._masked:
        self._setup_table(len(hdu.data), dtype, units=columns.units)
    else:
        self._setup_table(len(hdu.data), dtype, units=columns.units, \
                          nulls=columns.nulls)

    # Populate the table

    for i, name in enumerate(columns.names):

        format, bzero = hdu.columns[i].format[-1], hdu.columns[i].bzero

        if bzero and format in ['B', 'I', 'J']:
            data = pyfits.rec.recarray.field(hdu.data, i)
            if format == 'B' and bzero == -128:
                data = (data.astype(np.int16) + bzero).astype(np.int8)
            elif format == 'I' and bzero == - np.iinfo(np.int16).min:
                data = (data.astype(np.int32) + bzero).astype(np.uint16)
            elif format == 'J' and bzero == - np.iinfo(np.int32).min:
                data = (data.astype(np.int64) + bzero).astype(np.uint32)
            else:
                data = table.field(name)
        else:
            data = table.field(name)

        self.data[name][:] = data[:]

        if self._masked:
            if columns.nulls[i] == 'NAN.0':
                null = np.nan
            elif columns.nulls[i] == 'INF.0':
                null = np.inf
            else:
                null = columns.nulls[i]
            self.data[name].mask = smart_mask(data, null)
            self.data[name].set_fill_value(null)

    for key in header.keys():
        if not key[:4] in ['TFOR', 'TDIS', 'TDIM', 'TTYP', 'TUNI'] and \
            not key in standard_keys:
            self.add_keyword(key, header[key])

    for comment in header.get_comment():
        if isinstance(comment, pyfits.Card):
            self.add_comment(comment.value)
        else:
            self.add_comment(comment)

    if hdu.name:
        self.table_name = str(hdu.name)

    hdulist.close()

    return