def _load_data(cls, fileobj, coldefs=None): """ Read the table data from the ASCII file output by BinTableHDU.dump(). """ close_file = False if isinstance(fileobj, basestring): fileobj = open(fileobj, 'r') close_file = True initialpos = fileobj.tell() # We'll be returning here later linereader = csv.reader(fileobj, dialect=FITSTableDumpDialect) # First we need to do some preprocessing on the file to find out how # much memory we'll need to reserve for the table. This is necessary # even if we already have the coldefs in order to determine how many # rows to reserve memory for vla_lengths = [] recformats = [] names = [] nrows = 0 if coldefs is not None: recformats = coldefs._recformats names = coldefs.names def update_recformats(value, idx): fitsformat = _scalar_to_format(value) recformat = _convert_format(fitsformat) if idx >= len(recformats): recformats.append(recformat) else: if _cmp_recformats(recformats[idx], recformat) < 0: recformats[idx] = recformat # TODO: The handling of VLAs could probably be simplified a bit for row in linereader: nrows += 1 if coldefs is not None: continue col = 0 idx = 0 while idx < len(row): if row[idx] == 'VLA_Length=': if col < len(vla_lengths): vla_length = vla_lengths[col] else: vla_length = int(row[idx + 1]) vla_lengths.append(vla_length) idx += 2 while vla_length: update_recformats(row[idx], col) vla_length -= 1 idx += 1 col += 1 else: if col >= len(vla_lengths): vla_lengths.append(None) update_recformats(row[idx], col) col += 1 idx += 1 # Update the recformats for any VLAs for idx, length in enumerate(vla_lengths): if length is not None: recformats[idx] = str(length) + recformats[idx] dtype = np.rec.format_parser(recformats, names, None).dtype # TODO: In the future maybe enable loading a bit at a time so that we # can convert from this format to an actual FITS file on disk without # needing enough physical memory to hold the entire thing at once; # new_table() could use a similar feature. hdu = new_table(np.recarray(shape=1, dtype=dtype), nrows=nrows, fill=True) data = hdu.data for idx, length in enumerate(vla_lengths): if length is not None: arr = data.columns._arrays[idx] dt = recformats[idx][len(str(length)):] recformats[idx] = _FormatP(dt, max=length) data.columns._recformats[idx] = recformats[idx] data._convert[idx] = _makep(arr, arr, recformats[idx]) # Jump back to the start of the data and create a new line reader fileobj.seek(initialpos) linereader = csv.reader(fileobj, dialect=FITSTableDumpDialect) for row, line in enumerate(linereader): col = 0 idx = 0 while idx < len(line): if line[idx] == 'VLA_Length=': vla_len = vla_lengths[col] idx += 2 data[row][col][:] = line[idx:idx + vla_len] idx += vla_len else: # TODO: This won't work for complex-valued types; fix this # Kind of silly special handling for bools val = line[idx] if recformats[col] == FITS2NUMPY['L']: val = bool(int(val)) elif recformats[col] == FITS2NUMPY['M']: # For some reason, in arrays/fields where numpy expects # a complex it's not happy to take a string # representation (though it's happy to do that in other # contexts), so we have to convert the string # representation for it: val = complex(val) data[row][col] = val idx += 1 col += 1 if close_file: fileobj.close() return data
def new_table(input, header=None, nrows=0, fill=False, tbtype='BinTableHDU'): """ Create a new table from the input column definitions. Warning: Creating a new table using this method creates an in-memory *copy* of all the column arrays in the input. This is because if they are separate arrays they must be combined into a single contiguous array. If the column data is already in a single contiguous array (such as an existing record array) it may be better to create a BinTableHDU instance directly. See the PyFITS documentation for more details. Parameters ---------- input : sequence of Column or ColDefs objects The data to create a table from. header : Header instance Header to be used to populate the non-required keywords. nrows : int Number of rows in the new table. fill : bool If `True`, will fill all cells with zeros or blanks. If `False`, copy the data from input, undefined cells will still be filled with zeros/blanks. tbtype : str Table type to be created ("BinTableHDU" or "TableHDU"). """ # construct a table HDU # TODO: Something needs to be done about this as part of #60.... hdu = eval(tbtype)(header=header) if isinstance(input, ColDefs): # NOTE: This previously raised an error if the tbtype didn't match the # tbtype of the input ColDefs. This should no longer be necessary, but # just beware. columns = hdu.columns = ColDefs(input) elif isinstance(input, FITS_rec): # input is a FITS_rec # Create a new ColDefs object from the input FITS_rec's ColDefs # object and assign it to the ColDefs attribute of the new hdu. columns = hdu.columns = ColDefs(input._coldefs, tbtype) else: # input is a list of Columns or possibly a recarray # Create a new ColDefs object from the input list of Columns and # assign it to the ColDefs attribute of the new hdu. columns = hdu.columns = ColDefs(input, tbtype) # read the delayed data for idx in range(len(columns)): arr = columns._arrays[idx] if isinstance(arr, Delayed): if arr.hdu.data is None: columns._arrays[idx] = None else: columns._arrays[idx] = np.rec.recarray.field(arr.hdu.data, arr.field) # use the largest column shape as the shape of the record if nrows == 0: for arr in columns._arrays: if (arr is not None): dim = arr.shape[0] else: dim = 0 if dim > nrows: nrows = dim if tbtype == 'TableHDU': columns = hdu.columns = _ASCIIColDefs(hdu.columns) _itemsize = columns.spans[-1] + columns.starts[-1] - 1 dtype = {} for j in range(len(columns)): data_type = 'S' + str(columns.spans[j]) dtype[columns.names[j]] = (data_type, columns.starts[j] - 1) hdu.data = np.rec.array((' ' * _itemsize * nrows).encode('ascii'), dtype=dtype, shape=nrows).view(FITS_rec) hdu.data.setflags(write=True) else: formats = ','.join(columns._recformats) hdu.data = np.rec.array(None, formats=formats, names=columns.names, shape=nrows).view(FITS_rec) hdu.data._coldefs = hdu.columns hdu.data.formats = hdu.columns.formats # Populate data to the new table from the ndarrays in the input ColDefs # object. for idx in range(len(columns)): # For each column in the ColDef object, determine the number # of rows in that column. This will be either the number of # rows in the ndarray associated with the column, or the # number of rows given in the call to this function, which # ever is smaller. If the input FILL argument is true, the # number of rows is set to zero so that no data is copied from # the original input data. arr = columns._arrays[idx] recformat = columns._recformats[idx] if arr is None: size = 0 else: size = len(arr) n = min(size, nrows) if fill: n = 0 # Get any scale factors from the FITS_rec scale, zero, bscale, bzero, dim = hdu.data._get_scale_factors(idx)[3:] field = np.rec.recarray.field(hdu.data, idx) if n > 0: # Only copy data if there is input data to copy # Copy all of the data from the input ColDefs object for this # column to the new FITS_rec data array for this column. if isinstance(recformat, _FormatX): # Data is a bit array if arr[:n].shape[-1] == recformat._nx: _wrapx(arr[:n], field[:n], recformat._nx) else: # from a table parent data, just pass it field[:n] = arr[:n] elif isinstance(recformat, _FormatP): hdu.data._convert[idx] = _makep(arr[:n], field, recformat, nrows=nrows) elif recformat[-2:] == FITS2NUMPY['L'] and arr.dtype == bool: # column is boolean field[:n] = np.where(arr == False, ord('F'), ord('T')) else: if tbtype == 'TableHDU': # string no need to convert, if isinstance(arr, chararray.chararray): field[:n] = arr[:n] else: hdu.data._convert[idx] = \ np.zeros(nrows, dtype=arr.dtype) if scale or zero: arr = arr.copy() if scale: arr *= bscale if zero: arr += bzero hdu.data._convert[idx][:n] = arr[:n] else: field[:n] = arr[:n] if n < nrows: # If there are additional rows in the new table that were not # copied from the input ColDefs object, initialize the new data if tbtype == 'BinTableHDU': if isinstance(field, np.ndarray): field[n:] = -bzero / bscale else: field[n:] = '' else: field[n:] = ' ' * hdu.data._coldefs.spans[idx] # Update the HDU header to match the data hdu.update() # Make the ndarrays in the Column objects of the ColDefs object of the HDU # reference the same ndarray as the HDU's FITS_rec object. for idx in range(len(columns)): hdu.columns[idx].array = hdu.data.field(idx) # Delete the _arrays attribute so that it is recreated to point to the # new data placed in the column objects above del hdu.columns._arrays return hdu