def get_file_obj(fname, mode='r', encoding=None): """ Light wrapper to handle strings and let files (anything else) pass through. It also handle '.gz' files. Parameters ---------- fname: string or file-like object File to open / forward mode: string Argument passed to the 'open' or 'gzip.open' function encoding: string For Python 3 only, specify the encoding of the file Returns ------- A file-like object that is always a context-manager. If the `fname` was already a file-like object, the returned context manager *will not close the file*. """ if _is_string_like(fname): return _open(fname, mode, encoding) try: # Make sure the object has the write methods if 'r' in mode: assert hasattr(fname, 'read') if 'w' in mode or 'a' in mode: assert hasattr(fname, 'write') except AssertionError: # pragma: no cover raise ValueError('fname must be a string or a file-like object') return EmptyContextManager(fname)
def drop_fields(base, drop_names, usemask=True, asrecarray=False): """ Return a new array with fields in `drop_names` dropped. Nested fields are supported. Parameters ---------- base : array Input array drop_names : string or sequence String or sequence of strings corresponding to the names of the fields to drop. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : string or sequence Whether to return a recarray or a mrecarray (`asrecarray=True`) or a plain ndarray or masked array with flexible dtype (`asrecarray=False`) Examples -------- >>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) >>> drop_fields(a, 'a') array([((2.0, 3),), ((5.0, 6),)], dtype=[('b', [('ba', '<f8'), ('bb', '<i4')])]) >>> drop_fields(a, 'ba') array([(1, (3,)), (4, (6,))], dtype=[('a', '<i4'), ('b', [('bb', '<i4')])]) >>> drop_fields(a, ['ba', 'bb']) array([(1,), (4,)], dtype=[('a', '<i4')]) """ if _is_string_like(drop_names): drop_names = [drop_names,] else: drop_names = set(drop_names) # def _drop_descr(ndtype, drop_names): names = ndtype.names newdtype = [] for name in names: current = ndtype[name] if name in drop_names: continue if current.names: descr = _drop_descr(current, drop_names) if descr: newdtype.append((name, descr)) else: newdtype.append((name, current)) return newdtype # newdtype = _drop_descr(base.dtype, drop_names) if not newdtype: return None # output = np.empty(base.shape, dtype=newdtype) output = recursive_fill_fields(base, output) return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
def get_file_obj(fname, mode='r', encoding=None): """ Light wrapper to handle strings and let files (anything else) pass through. It also handle '.gz' files. Parameters ========== fname: string or file-like object File to open / forward mode: string Argument passed to the 'open' or 'gzip.open' function encoding: string For Python 3 only, specify the encoding of the file Returns ======= A file-like object that is always a context-manager. If the `fname` was already a file-like object, the returned context manager *will not close the file*. """ if _is_string_like(fname): return _open(fname, mode, encoding) try: # Make sure the object has the write methods if 'r' in mode: fname.read if 'w' in mode or 'a' in mode: fname.write except AttributeError: raise ValueError('fname must be a string or a file-like object') return EmptyContextManager(fname)
def get_file_obj(fname, mode="r", encoding=None): """ Light wrapper to handle strings, path objects and let files (anything else) pass through. It also handle '.gz' files. Parameters ---------- fname : str, path object or file-like object File to open / forward mode : str Argument passed to the 'open' or 'gzip.open' function encoding : str For Python 3 only, specify the encoding of the file Returns ------- A file-like object that is always a context-manager. If the `fname` was already a file-like object, the returned context manager *will not close the file*. """ if _is_string_like(fname): fname = Path(fname) if isinstance(fname, Path): return fname.open(mode=mode, encoding=encoding) elif hasattr(fname, "open"): return fname.open(mode=mode, encoding=encoding) try: return open(fname, mode, encoding=encoding) except TypeError: try: # Make sure the object has the write methods if "r" in mode: fname.read if "w" in mode or "a" in mode: fname.write except AttributeError: raise ValueError("fname must be a string or a file-like object") return EmptyContextManager(fname)
def savetxt(fname, X, names=None, fmt='%.18e', delimiter=' '): """ Save an array to a text file. This is just a copy of numpy.savetxt patched to support structured arrays or a header of names. Does not include py3 support now in savetxt. Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : array_like Data to be saved to a text file. names : list, optional If given names will be the column header in the text file. If None and X is a structured or recarray then the names are taken from X.dtype.names. fmt : str or sequence of strs A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. delimiter : str Character separating columns. See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into a ``.npz`` compressed archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to preceed result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> savetxt('test.out', x, delimiter=',') # x is an array >>> savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> savetxt('test.out', x, fmt='%1.4e') # use exponential notation """ if _is_string_like(fname): if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: fh = file(fname, 'w') elif hasattr(fname, 'seek'): fh = fname else: raise ValueError('fname must be a string or file handle') X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] # `fmt` can be a string with multiple insertion points or a list of formats. # E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = delimiter.join(fmt) elif type(fmt) is str: if fmt.count('%') == 1: fmt = [fmt, ]*ncol format = delimiter.join(fmt) elif fmt.count('%') != ncol: raise AttributeError('fmt has wrong number of %% formats. %s' % fmt) else: format = fmt # handle names if names is None and X.dtype.names: names = X.dtype.names if names is not None: fh.write(delimiter.join(names) + '\n') for row in X: fh.write(format % tuple(row) + '\n')
def savetxt(fname, X, names=None, fmt='%.18e', delimiter=' '): """ Save an array to a text file. This is just a copy of numpy.savetxt patched to support structured arrays or a header of names. Does not include py3 support now in savetxt. Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : array_like Data to be saved to a text file. names : list, optional If given names will be the column header in the text file. If None and X is a structured or recarray then the names are taken from X.dtype.names. fmt : str or sequence of strs A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. delimiter : str Character separating columns. See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into a ``.npz`` compressed archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to preceed result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> savetxt('test.out', x, delimiter=',') # x is an array >>> savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> savetxt('test.out', x, fmt='%1.4e') # use exponential notation """ if _is_string_like(fname): if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: fh = file(fname, 'w') elif hasattr(fname, 'seek'): fh = fname else: raise ValueError('fname must be a string or file handle') X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] # `fmt` can be a string with multiple insertion points or a list of formats. # E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if type(fmt) in (list, tuple): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) format = delimiter.join(fmt) elif type(fmt) is str: if fmt.count('%') == 1: fmt = [ fmt, ] * ncol format = delimiter.join(fmt) elif fmt.count('%') != ncol: raise AttributeError('fmt has wrong number of %% formats. %s' % fmt) else: format = fmt # handle names if names is None and X.dtype.names: names = X.dtype.names if names is not None: fh.write(delimiter.join(names) + '\n') for row in X: fh.write(format % tuple(row) + '\n')
def savetxt_nice(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# '): """ Reimplmenentation of numpy's savetxt that doesn't complain about bytes when saving to unicode files in Python 3. Save an array to a text file. Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : array_like Data to be saved to a text file. fmt : str or sequence of strs, optional A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. For complex `X`, the legal options for `fmt` are: a) a single specifier, `fmt='%.4e'`, resulting in numbers formatted like `' (%s+%sj)' % (fmt, fmt)` b) a full string specifying every real and imaginary part, e.g. `' %.4e %+.4j %.4e %+.4j %.4e %+.4j'` for 3 columns c) a list of specifiers, one per column - in this case, the real and imaginary part must have separate specifiers, e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns delimiter : str, optional String or character separating columns. newline : str, optional String or character separating lines. .. versionadded:: 1.5.0 header : str, optional String that will be written at the beginning of the file. .. versionadded:: 1.7.0 footer : str, optional String that will be written at the end of the file. .. versionadded:: 1.7.0 comments : str, optional String that will be prepended to the ``header`` and ``footer`` strings, to mark them as comments. Default: '# ', as expected by e.g. ``numpy.loadtxt``. .. versionadded:: 1.7.0 See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into an uncompressed ``.npz`` archive savez_compressed : Save several arrays into a compressed ``.npz`` archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to precede result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> x = y = z = np.arange(0.0,5.0,1.0) >>> np.savetxt('test.out', x, delimiter=',') # X is an array >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation """ # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) own_fh = False if _is_string_like(fname): own_fh = True if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') else: if sys.version_info[0] >= 3: fh = open(fname, 'wb') else: fh = open(fname, 'w') elif hasattr(fname, 'write'): fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if isinstance(fmt, (list, tuple)): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) txt_format = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [ ' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [ fmt, ] * ncol txt_format = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif ((not iscomplex_X) and n_fmt_chars != ncol): raise error else: txt_format = fmt else: raise ValueError('invalid fmt: %r' % (fmt, )) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(asbytes(comments + header + newline)) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) fh.write(asbytes(txt_format % tuple(row2) + newline)) else: for row in X: try: #print('txt_format = %r' % txt_format, type(txt_format)) fh.write(asbytes(txt_format % tuple(row) + newline)) except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), txt_format)) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(asbytes(comments + footer + newline)) finally: if own_fh: fh.close()
def savetxt_nice(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None): """ Save an array to a text file. This is 95% a backport of numpy 1.15.1's savetxt. It does not support: - DataSource's URL - pathlib fnames - gz files Parameters ---------- fname : filename or file handle If the filename ends in ``.gz``, the file is automatically saved in compressed gzip format. `loadtxt` understands gzipped files transparently. X : 1D or 2D array_like Data to be saved to a text file. fmt : str or sequence of strs, optional A single format (%10.5f), a sequence of formats, or a multi-format string, e.g. 'Iteration %d -- %10.5f', in which case `delimiter` is ignored. For complex `X`, the legal options for `fmt` are: * a single specifier, `fmt='%.4e'`, resulting in numbers formatted like `' (%s+%sj)' % (fmt, fmt)` * a full string specifying every real and imaginary part, e.g. `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns * a list of specifiers, one per column - in this case, the real and imaginary part must have separate specifiers, e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns delimiter : str, optional String or character separating columns. newline : str, optional String or character separating lines. .. versionadded:: 1.5.0 header : str, optional String that will be written at the beginning of the file. .. versionadded:: 1.7.0 footer : str, optional String that will be written at the end of the file. .. versionadded:: 1.7.0 comments : str, optional String that will be prepended to the ``header`` and ``footer`` strings, to mark them as comments. Default: '# ', as expected by e.g. ``numpy.loadtxt``. .. versionadded:: 1.7.0 encoding : {None, str}, optional Encoding used to encode the outputfile. Does not apply to output streams. If the encoding is something other than 'bytes' or 'latin1' you will not be able to load the file in NumPy versions < 1.14. Default is 'latin1'. .. versionadded:: 1.14.0 See Also -------- save : Save an array to a binary file in NumPy ``.npy`` format savez : Save several arrays into an uncompressed ``.npz`` archive savez_compressed : Save several arrays into a compressed ``.npz`` archive Notes ----- Further explanation of the `fmt` parameter (``%[flag]width[.precision]specifier``): flags: ``-`` : left justify ``+`` : Forces to precede result with + or -. ``0`` : Left pad the number with zeros instead of space (see width). width: Minimum number of characters to be printed. The value is not truncated if it has more characters. precision: - For integer specifiers (eg. ``d,i,o,x``), the minimum number of digits. - For ``e, E`` and ``f`` specifiers, the number of digits to print after the decimal point. - For ``g`` and ``G``, the maximum number of significant digits. - For ``s``, the maximum number of characters. specifiers: ``c`` : character ``d`` or ``i`` : signed decimal integer ``e`` or ``E`` : scientific notation with ``e`` or ``E``. ``f`` : decimal floating point ``g,G`` : use the shorter of ``e,E`` or ``f`` ``o`` : signed octal ``s`` : string of characters ``u`` : unsigned decimal integer ``x,X`` : unsigned hexadecimal integer This explanation of ``fmt`` is not complete, for an exhaustive specification see [1]_. References ---------- .. [1] `Format Specification Mini-Language <http://docs.python.org/library/string.html# format-specification-mini-language>`_, Python Documentation. Examples -------- >>> x = y = z = np.arange(0.0,5.0,1.0) >>> np.savetxt('test.out', x, delimiter=',') # X is an array >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation """ # Py3 conversions first if isinstance(fmt, bytes): fmt = asstr(fmt) delimiter = asstr(delimiter) class WriteWrap: """Convert to unicode in py2 or to bytes on bytestream inputs.""" def __init__(self, fh, encoding): self.fh = fh self.encoding = encoding self.do_write = self.first_write def close(self): self.fh.close() def write(self, v): self.do_write(v) def write_bytes(self, v): if isinstance(v, bytes): self.fh.write(v) else: self.fh.write(v.encode(self.encoding)) def write_normal(self, v): self.fh.write(asunicode(v)) def first_write(self, v): try: self.write_normal(v) self.write = self.write_normal except TypeError: # input is probably a bytestream self.write_bytes(v) self.write = self.write_bytes own_fh = False #if is_pathlib_path(fname): #fname = str(fname) if _is_string_like(fname): # datasource doesn't support creating a new file ... open(fname, 'wt').close() fh = open(fname, 'wt', encoding=encoding) own_fh = True # need to convert str to unicode for text io output if sys.version_info[0] == 2: fh = WriteWrap(fh, encoding or 'latin1') elif hasattr(fname, 'write'): # wrap to handle byte output streams fh = WriteWrap(fname, encoding or 'latin1') #if _is_string_like(fname): #own_fh = True #if fname.endswith('.gz'): #import gzip #fh = gzip.open(fname, 'wb') #else: #if sys.version_info[0] >= 3: #fh = open(fname, 'wb') #else: #fh = open(fname, 'w') #elif hasattr(fname, 'write'): #fh = fname else: raise ValueError('fname must be a string or file handle') try: X = np.asarray(X) # Handle 1-dimensional arrays if X.ndim == 0 or X.ndim > 2: raise ValueError( "Expected 1D or 2D array, got %dD array instead" % X.ndim) elif X.ndim == 1: # Common case -- 1d array of numbers if X.dtype.names is None: X = np.atleast_2d(X).T ncol = 1 # Complex dtype -- each field indicates a separate column else: ncol = len(X.dtype.descr) else: ncol = X.shape[1] iscomplex_X = np.iscomplexobj(X) # `fmt` can be a string with multiple insertion points or a # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') if isinstance(fmt, (list, tuple)): if len(fmt) != ncol: raise AttributeError('fmt has wrong shape. %s' % str(fmt)) txt_format = asstr(delimiter).join(map(asstr, fmt)) elif isinstance(fmt, str): n_fmt_chars = fmt.count('%') error = ValueError('fmt has wrong number of %% formats: %s' % fmt) if n_fmt_chars == 1: if iscomplex_X: fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol else: fmt = [fmt, ] * ncol txt_format = delimiter.join(fmt) elif iscomplex_X and n_fmt_chars != (2 * ncol): raise error elif ((not iscomplex_X) and n_fmt_chars != ncol): raise error else: txt_format = fmt else: raise ValueError('invalid fmt: %r' % (fmt,)) if len(header) > 0: header = header.replace('\n', '\n' + comments) fh.write(comments + header + newline) if iscomplex_X: for row in X: row2 = [] for number in row: row2.append(number.real) row2.append(number.imag) s = txt_format % tuple(row2) + newline fh.write(s.replace('+-', '-')) else: for row in X: try: v = txt_format % tuple(row) + newline except TypeError: raise TypeError("Mismatch between array dtype ('%s') and " "format specifier ('%s')" % (str(X.dtype), txt_format)) fh.write(v) if len(footer) > 0: footer = footer.replace('\n', '\n' + comments) fh.write(comments + footer + newline) finally: if own_fh: fh.close()
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False,count = -1): """ Load data from a text file. Each row in the text file must have the same number of values. Parameters ---------- fname : file or string File or filename to read. If the filename extension is ``.gz`` or ``.bz2``, the file is first decompressed. dtype : data-type Data type of the resulting array. If this is a record data-type, the resulting array will be 1-dimensional, and each row will be interpreted as an element of the array. In this case, the number of columns used must match the number of fields in the data-type. comments : string, optional The character used to indicate the start of a comment. delimiter : string, optional The string used to separate values. By default, this is any whitespace. converters : {} A dictionary mapping column number to a function that will convert that column to a float. E.g., if column 0 is a date string: ``converters = {0: datestr2num}``. Converters can also be used to provide a default value for missing data: ``converters = {3: lambda s: float(s or 0)}``. skiprows : int Skip the first `skiprows` lines. usecols : sequence Which columns to read, with 0 being the first. For example, ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. unpack : bool If True, the returned array is transposed, so that arguments may be unpacked using ``x, y, z = loadtxt(...)`` Returns ------- out : ndarray Data read from the text file. See Also -------- scipy.io.loadmat : reads Matlab(R) data files Examples -------- >>> from StringIO import StringIO # StringIO behaves like a file object >>> c = StringIO("0 1\\n2 3") >>> np.loadtxt(c) array([[ 0., 1.], [ 2., 3.]]) >>> d = StringIO("M 21 72\\nF 35 58") >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), ... 'formats': ('S1', 'i4', 'f4')}) array([('M', 21, 72.0), ('F', 35, 58.0)], dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')]) >>> c = StringIO("1,0,2\\n3,0,4") >>> x,y = np.loadtxt(c, delimiter=',', usecols=(0,2), unpack=True) >>> x array([ 1., 3.]) >>> y array([ 2., 4.]) """ user_converters = converters if usecols is not None: usecols = list(usecols) isstring = False if _is_string_like(fname): isstring = True if fname.endswith('.gz'): import gzip fh = seek_gzip_factory(fname) elif fname.endswith('.bz2'): import bz2 fh = bz2.BZ2File(fname) else: fh = file(fname) elif hasattr(fname, 'readline'): fh = fname else: raise ValueError('fname must be a string or file handle') X = [] def flatten_dtype(dt): """Unpack a structured data-type.""" if dt.names is None: return [dt] else: types = [] for field in dt.names: tp, bytes = dt.fields[field] flat_dt = flatten_dtype(tp) types.extend(flat_dt) return types def split_line(line): """Chop off comments, strip, and split at delimiter.""" line = line.split(comments)[0].strip() if line: return line.split(delimiter) else: return [] try: # Make sure we're dealing with a proper dtype dtype = np.dtype(dtype) defconv = _getconv(dtype) # Skip the first `skiprows` lines for i in xrange(skiprows): fh.readline() # Read until we find a line with some values, and use # it to estimate the number of columns, N. first_vals = None while not first_vals: first_line = fh.readline() if first_line == '': # EOF reached raise IOError('End-of-file reached before encountering data.') first_vals = split_line(first_line) N = len(usecols or first_vals) dtype_types = flatten_dtype(dtype) if len(dtype_types) > 1: # We're dealing with a structured array, each field of # the dtype matches a column converters = [_getconv(dt) for dt in dtype_types] else: # All fields have the same dtype converters = [defconv for i in xrange(N)] # By preference, use the converters specified by the user for i, conv in (user_converters or {}).iteritems(): if usecols: try: i = usecols.index(i) except ValueError: # Unused converter specified continue converters[i] = conv # Parse each line, including the first vals_gen = ( split_line(line) for line in itertools.chain([first_line], fh) ) if usecols: vals_gen = ( [x for i,x in enumerate(vals) if i in usecols] for vals in vals_gen ) data_gen = ( tuple([conv(val) for (conv, val) in zip(converters, vals)]) for vals in vals_gen if len(vals)>0 ) if len(dtype_types) > 1: X = np.fromiter(data_gen, dtype = np.dtype([('', t) for t in dtype_types]),count=count) X = X.view(dtype) else: X = np.fromiter(itertools.chain(*data_gen), dtype = dtype,count=count) X.shape = (-1,N) finally: if isstring: fh.close() X = np.squeeze(X) if unpack: return X.T else: return X