def write_import_file(self, fname, data): from esutil import recfile if self.verbose: stdout.write("Writing to temporary file: %s\n" % fname) with recfile.Recfile(fname, mode='w', delim='|', padnull=True) as rec: rec.write(data)
def read_header(self): """ Name: read_header() Calling Sequence: sf = sfile.Open(file) hdr = sf.read_header() Read the header from a simple self-describing file format with an ascii header. See the write() function for information about reading this file format, and read() for reading. The file format: First line: SIZE = --------------number where if possible the number should be formatted as %20d. This is large enough to hold a 64-bit number. This exact formatting is required so SIZE can be updated *in place* when appending rows to a file. Note the file can always be read as long as the first line reads 'SIZE = some_number' but appending requires the exact format. Last two lines of the header region must be: END blank line case does not matter. In between the SIZE and END lines is the header data. This is a string that must eval() to a dictionary. It must contain the following entry: _DTYPE = array data type description in list of tuples or string form (case does not matter, can also be called _dtype). [('field1', 'f8'), ('f2','2i4')] There should also be a _VERSION tag. '_VERSION': '1.0' If '_VERSION' is not present, it is assumed that the version is 1.0, but you should always set this. If you use this module to write data, it will always be set. If the file holds a simple array, and the dtype field is a simple string, then the following keyword, if present, will be used to reshape the array: '_SHAPE' If the total elements in the _shape field matches the size then it will be used to reshape the array before returning or when using memory maps. If the data are ascii then delimiter must be given in the keyword _DELIM This can be for example ',', ' ', or a tab character. Again, case does not matter. The rest of the keywords can by any variable can be used as long as it can be eval()d. An example header: SIZE = 10 {'_VERSION': '1.0', '_DELIM': ',', '_DTYPE': [('x', 'f4'), ('y', 'f4'), ('ra', 'f8'), ('dec', 'f8'), ('exposurename', 'S20'), ('ccd', 'i1'), ('size_flags', 'i4'), ('magi', 'f4'), ('sigma0', 'f4'), ('star_flag', 'i4'), ('shear_flags', 'i4'), ('shapelet_sigma', 'f4'), ('shear1', 'f4'), ('shear2', 'f4'), ('shear_cov00', 'f4'), ('shear_cov01', 'f4'), ('shear_cov11', 'f4')], 'listvar': [1, 2, 3], 'subd': {'subd1': 'subfield', 'sublist': [8.5, 6.6]}, 'svar': 'hello', 'test1': 35} END -- data begins -- """ if self._filename is None: raise ValueError("you opened with filename None") if not have_numpy: raise ImportError("numpy could not be imported") # read first line, which should be # SIZE = ..... # or # NROWS = ... dummy_dtype = [('ra', 'f8')] with recfile.Recfile(self._filename, dtype=dummy_dtype) as robj: hdrstring, offset = robj.robj.read_sfile_header() self._data_start = offset lines = hdrstring.split('\n') size = self._extract_size_from_string(lines[0]) hdrdict_string_lines = lines[1:len(lines) - 3] hdrdict_string = ' '.join(hdrdict_string_lines) hdr = eval(hdrdict_string) hdr['_SIZE'] = size # this will leave open the possibility of changing the header or other # details later if '_version' in hdr or '_VERSION' in hdr: pass else: hdr['_VERSION'] = '1.0' return hdr
def open(self, filename, mode='r', delim=None, padnull=False, ignorenull=False, **keys): """ Open the file. If the file already exists and the mode is 'r*' then a read of the header is attempted. If this succeeds, delim is gotten from the header and the delim= keyword is ignored. """ if not have_numpy: raise ImportError("numpy could not be imported") if not have_recfile: raise ImportError("the recfile package is required") self.close() self._padnull = padnull self._ignorenull = ignorenull self._mode = mode self._filename = filename if filename is None: return # expand shortcut variables fpath = os.path.expanduser(filename) fpath = os.path.expandvars(fpath) self._filename = fpath if mode == 'r+' and not os.path.exists(self._filename): # path doesn't exist but we want to append. Change the # mode to write mode = 'w+' if self._mode[0] == 'r': #if reading: # For 'r' and 'r+' try to read the header self._hdr = self.read_header() self._delim = _match_key(self._hdr, '_delim') self._size = _match_key(self._hdr, '_size', require=True) self._descr = _match_key(self._hdr, '_dtype', require=True) self._dtype = numpy.dtype(self._descr) self._robj = recfile.Recfile( self._filename, mode=self._mode, delim=self._delim, dtype=self._dtype, nrows=self.get_nrows(), offset=self._data_start, padnull=self._padnull, # these get sent in case mode is r+ ignorenull=self._ignorenull, ) else: # we are starting from scratch, so we can't read a header # # get delim from the keyword. This will be used for writing later self._delim = delim self._robj = recfile.Recfile( self._filename, mode=self._mode, delim=self._delim, padnull=self._padnull, ignorenull=self._ignorenull, )