Esempio n. 1
0
    def write_import_file(self, fname, data):
        from esutil import recfile

        if self.verbose:
            stdout.write("Writing to temporary file: %s\n" % fname)

        with recfile.Recfile(fname, mode='w', delim='|', padnull=True) as rec:
            rec.write(data)
Esempio n. 2
0
    def read_header(self):
        """
        Name:
            read_header()

        Calling Sequence:
            sf = sfile.Open(file)
            hdr = sf.read_header()

        Read the header from a simple self-describing file format with an
        ascii header.  See the write() function for information about reading
        this file format, and read() for reading.


        The file format:
          First line:
              SIZE = --------------number

        where if possible the number should be formatted as %20d.  This is
        large enough to hold a 64-bit number.  This exact formatting is
        required so SIZE can be updated *in place* when appending rows to a
        file.  Note the file can always be read as long as the first line reads
        'SIZE = some_number' but appending requires the exact format.

        Last two lines of the header region must be:
                END
                blank line
        case does not matter.

          
        In between the SIZE and END lines is the header data.  This is a
        string that must eval() to a dictionary.  It must contain the
        following entry:

              _DTYPE = array data type description in list of tuples or
                string form (case does not matter, can also be called _dtype).
                  
                    [('field1', 'f8'), ('f2','2i4')]


        There should also be a _VERSION tag.

              '_VERSION': '1.0'

        If '_VERSION' is not present, it is assumed that the version is 1.0,
        but you should always set this.  If you use this module to write data,
        it will always be set.


        If the file holds a simple array, and the dtype field is a simple
        string, then the following keyword, if present, will be used to
        reshape the array:

              '_SHAPE'

        If the total elements in the _shape field matches the size then it
        will be used to reshape the array before returning or when using
        memory maps.

        If the data are ascii then delimiter must be given in the keyword

              _DELIM  
              
        This can be for example ',', ' ', or a tab character.  Again, case does
        not matter.  

        The rest of the keywords can by any variable can be used as long as it
        can be eval()d.

        An example header:
            SIZE =                   10
            {'_VERSION': '1.0',
             '_DELIM': ',',
             '_DTYPE': [('x', 'f4'),
                        ('y', 'f4'),
                        ('ra', 'f8'),
                        ('dec', 'f8'),
                        ('exposurename', 'S20'),
                        ('ccd', 'i1'),
                        ('size_flags', 'i4'),
                        ('magi', 'f4'),
                        ('sigma0', 'f4'),
                        ('star_flag', 'i4'),
                        ('shear_flags', 'i4'),
                        ('shapelet_sigma', 'f4'),
                        ('shear1', 'f4'),
                        ('shear2', 'f4'),
                        ('shear_cov00', 'f4'),
                        ('shear_cov01', 'f4'),
                        ('shear_cov11', 'f4')],
             'listvar': [1, 2, 3],
             'subd': {'subd1': 'subfield', 'sublist': [8.5, 6.6]},
             'svar': 'hello',
             'test1': 35}
            END

            -- data begins --
        """

        if self._filename is None:
            raise ValueError("you opened with filename None")

        if not have_numpy:
            raise ImportError("numpy could not be imported")

        # read first line, which should be
        # SIZE = .....
        # or
        # NROWS = ...

        dummy_dtype = [('ra', 'f8')]
        with recfile.Recfile(self._filename, dtype=dummy_dtype) as robj:
            hdrstring, offset = robj.robj.read_sfile_header()

        self._data_start = offset
        lines = hdrstring.split('\n')
        size = self._extract_size_from_string(lines[0])

        hdrdict_string_lines = lines[1:len(lines) - 3]
        hdrdict_string = ' '.join(hdrdict_string_lines)
        hdr = eval(hdrdict_string)

        hdr['_SIZE'] = size

        # this will leave open the possibility of changing the header or other
        # details later
        if '_version' in hdr or '_VERSION' in hdr:
            pass
        else:
            hdr['_VERSION'] = '1.0'

        return hdr
Esempio n. 3
0
    def open(self,
             filename,
             mode='r',
             delim=None,
             padnull=False,
             ignorenull=False,
             **keys):
        """
        Open the file.  If the file already exists and the mode is 'r*' then
        a read of the header is attempted.  If this succeeds, delim is gotten
        from the header and the delim= keyword is ignored.
        """

        if not have_numpy:
            raise ImportError("numpy could not be imported")
        if not have_recfile:
            raise ImportError("the recfile package is required")

        self.close()

        self._padnull = padnull
        self._ignorenull = ignorenull
        self._mode = mode

        self._filename = filename

        if filename is None:
            return

        # expand shortcut variables
        fpath = os.path.expanduser(filename)
        fpath = os.path.expandvars(fpath)
        self._filename = fpath

        if mode == 'r+' and not os.path.exists(self._filename):
            # path doesn't exist but we want to append.  Change the
            # mode to write
            mode = 'w+'

        if self._mode[0] == 'r':
            #if reading:
            # For 'r' and 'r+' try to read the header
            self._hdr = self.read_header()

            self._delim = _match_key(self._hdr, '_delim')
            self._size = _match_key(self._hdr, '_size', require=True)
            self._descr = _match_key(self._hdr, '_dtype', require=True)
            self._dtype = numpy.dtype(self._descr)

            self._robj = recfile.Recfile(
                self._filename,
                mode=self._mode,
                delim=self._delim,
                dtype=self._dtype,
                nrows=self.get_nrows(),
                offset=self._data_start,
                padnull=self._padnull,  # these get sent in case mode is r+
                ignorenull=self._ignorenull,
            )

        else:
            # we are starting from scratch, so we can't read a header
            #
            # get delim from the keyword.  This will be used for writing later
            self._delim = delim

            self._robj = recfile.Recfile(
                self._filename,
                mode=self._mode,
                delim=self._delim,
                padnull=self._padnull,
                ignorenull=self._ignorenull,
            )