def time_records(
    data,
    dates=None,
    start_date=None,
    freq=None,
    mask=nomask,
    dtype=None,
    copy=False,
    fill_value=None,
    keep_mask=True,
    hard_mask=False,
):
    """
    Creates a TimeSeriesRecords object.

    Parameters
    ----------
    data : array_like
        Data portion of the array. Any data that is valid for constructing a
        MaskedArray can be used here. May also be a TimeSeries object.
    dates : {None, DateArray}, optional
        A sequence of dates corresponding to each entry.
        If None, the dates will be constructed as a DateArray with the same
        length as ``data``, starting at ``start_date`` with frequency ``freq``.
    start_date : {Date}, optional
        Date corresponding to the first entry of the data (index 0).
        This parameter must be a valid Date object, and is mandatory if ``dates``
        is None and if ``data`` has a length greater or equal to 1.
    freq : {freq_spec}, optional
        A valid frequency specification, as a string or an integer.
        This parameter is mandatory if ``dates`` is None.
    mask : {nomask, sequence}, optional
        Mask.  Must be convertible to an array of booleans with
        the same shape as data: True indicates a masked (eg.,
        invalid) data.
    dtype : {dtype}, optional
        Data type of the output.
        If dtype is None, the type of the data argument (`data.dtype`) is used.
        If dtype is not None and different from `data.dtype`, a copy is performed.
    copy : {False, True}, optional
        Whether to copy the input data (True), or to use a reference instead.
        Note: data are NOT copied by default.
    fill_value : {var}, optional
        Value used to fill in the masked values when necessary.
        If None, a default based on the datatype is used.
    keep_mask : {True, boolean}, optional
        Whether to combine mask with the mask of the input data,
        if any (True), or to use only mask for the output (False).
    hard_mask : {False, boolean}, optional
        Whether to use a hard mask or not.
        With a hard mask, masked values cannot be unmasked.

    Notes
    -----
    * All other parameters that are accepted by the :func:`numpy.ma.array`
      function in the :mod:`numpy.ma` module are also accepted by this function.
    * The date portion of the time series must be specified in one of the
      following ways:

       * specify a TimeSeries object for the ``data`` parameter.
       * pass a DateArray for the ``dates`` parameter.
       * specify a start_date (a continuous DateArray will be automatically
         constructed for the dates portion).
       * specify just a frequency (for TimeSeries of size zero).

    """
    series = time_series(
        data,
        dates=dates,
        start_date=start_date,
        freq=freq,
        mask=mask,
        dtype=dtype,
        copy=copy,
        fill_value=fill_value,
        keep_mask=keep_mask,
        hard_mask=hard_mask,
    )
    return series.view(TimeSeriesRecords)
def tsfromtxt(fname,
              dtype=None,
              freq='U',
              comments='#',
              delimiter=None,
              skip_header=0,
              skip_footer=0,
              skiprows=0,
              converters=None,
              dateconverter=None,
              missing='',
              missing_values=None,
              filling_values=None,
              usecols=None,
              datecols=None,
              names=None,
              excludelist=None,
              deletechars=None,
              autostrip=True,
              case_sensitive=True,
              defaultfmt="f%i",
              unpack=None,
              loose=True,
              asrecarray=False,
              invalid_raise=True):
    """
    Load a TimeSeries from a text file.

    Each line of the input after the first `skiprows` ones is split at
    `delimiter`. Characters occuring after `comments` are discarded.

    If a column is named ``'dates'`` (case insensitive), it is used to define
    the dates. The ``freq`` parameter should be set to the expected frequency of
    the output series.
    If the date information spans several columns (for example, year in col #1,
    month in col #2...), a specific conversion function must be defined with
    the ``dateconverter`` parameter. This function should accept as many inputs
    as date columns, and return a valid :class:`Date` object.

    Parameters
    ----------
    fname : file or string
        File or filename to read.
        If the file extension is ``.gz`` or ``.bz2``, the file is first
        decompressed.
    dtype : data-type, optional
        Data type of the resulting array.
        If it is a structured data-type, the resulting array is 1-dimensional,
        and each row is interpreted as an element of the array. In this case,
        the number of columns used must match the number of fields in the dtype
        and the names of each field are set by the corresponding name of the dtype.
        If None, the dtypes will be determined by the contents of each
        column, individually.
    comments : {string}, optional
        The character used to indicate the start of a comment.
        All the characters occurring on a line after a comment are discarded.
    delimiter : {string}, optional
        The string used to separate values.  By default, any consecutive
        whitespace act as delimiter.
    skip_header : int, optional
        The numbers of lines to skip at the beginning of the file.
    skip_footer : int, optional
        The numbers of lines to skip at the end of the file
    converters : variable or None, optional
        The set of functions that convert the data of a column to a value.
        The converters can also be used to provide a default value
        for missing data: ``converters = {3: lambda s: float(s or 0)}``.
    dateconverter : {function}, optional
        The function to convert the date information to a :class:`Date` object.
        This function requires as many parameters as number of ``datecols``.
        This parameter is mandatory if ``dtype=None``.
    missing_values : variable or None, optional
        The set of strings corresponding to missing data.
    filling_values : variable or None, optional
        The set of values to be used as default when the data are missing.
    usecols : sequence or None, optional
        Which columns to read, with 0 being the first.  For example,
        ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
    datecols : {None, int, sequence}, optional
        Which columns store the date information.
    names : {None, True, str, sequence}, optional
        If `names` is True, the field names are read from the first valid line
        after the first `skiprows` lines.
        If `names` is a sequence or a single-string of comma-separated names,
        the names will be used to define the field names in a structured dtype.
        If `names` is None, the names of the dtype fields will be used, if any.
    excludelist : sequence, optional
        A list of names to exclude. This list is appended to the default list
        ['return','file','print']. Excluded names are appended an underscore:
        for example, `file` would become `file_`.
    deletechars : str, optional
        A string combining invalid characters that must be deleted from the
        names.
    defaultfmt : str, optional
        A format used to define default field names, such as "f%i" or "f_%02i".
    autostrip : bool, optional
        Whether to automatically strip white spaces from the variables.
    case_sensitive : {True, False, 'upper', 'lower'}, optional
        If True, field names are case sensitive.
        If False or 'upper', field names are converted to upper case.
        If 'lower', field names are converted to lower case.
    unpack : bool, optional
        If True, the returned array is transposed, so that arguments may be
        unpacked using ``x, y, z = loadtxt(...)``.
    asrecarray : {False, True}, optional
        Whether to return a TimeSeriesRecords or a series with a structured
        dtype.
    invalid_raise : bool, optional
        If True, an exception is raised if an inconsistency is detected in the
        number of columns.
        If False, a warning is emitted and the offending lines are skipped.


    Returns
    -------
    out : MaskedArray
        Data read from the text file.

    See Also
    --------
    numpy.lib.io.genfromtxt
        Equivalent function for standard arrays

    Notes
    -----
    * When spaces are used as delimiters, or when no delimiter has been given
      as input, there should not be any missing data between two fields.
    * When the variable are named (either by a flexible dtype or with `names`,
      there must not be any header in the file (else a :exc:`ValueError`
      exception is raised).
    * If ``names`` is True or a sequence of strings, these names overwrite
      the fields names of a structured array.
    * The sequence of names must NOT take the date columns into account.
    * If the datatype is not given explicitly (``dtype=None``),
      a :keyword:`dateconverter` must be given explicitly.
    * If the ``dtype`` is given explicitly,
      it must NOT refer to the date columns.
    * By default, the types of variables is defined from the values encountered
      in the file (``dtype=None``). This is *NOT* the default for np.genfromtxt.

    Examples
    --------
    >>> data = "year, month, a, b\\n 2001, 01, 0.0, 10.\\n 2001, 02, 1.1, 11."
    >>> dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
    >>> series = tsfromtxt(StringIO.StringIO(data), delimiter=',', names=True,
    ...                    datecols=(0,1), dateconverter=dateconverter,)
    >>> series
    timeseries([(0.0, 10.0) (1.1, 11.0)],
       dtype = [('a', '<f8'), ('b', '<f8')],
       dates = [Jan-2001 Feb-2001],
       freq  = M)
    >>> series = tsfromtxt(StringIO.StringIO(data), delimiter=",",
    ...                    datecols=(0, 1), dateconverter=dateconverter,
    ...                    names="A, B", skip_header=1)
    timeseries([(0.0, 10.0) (1.1000000000000001, 11.0)],
       dtype = [('A', '<f8'), ('B', '<f8')],
       dates = [Jan-2001 Feb-2001],
       freq  = M)

    """
    # Update the date converter ...........................
    converters = converters or {}
    dateconv = dateconverter or None
    if dateconv is None:
        dateconv = lambda s: Date(freq, string=s)
    if 'dates' in converters:
        dateconv = converters['dates']
        del (converters['dates'])

    # Make sure `datecols` is a sequence ..................
    if datecols is not None:
        try:
            datecols = [_.strip() for _ in datecols.split(",")]
        except AttributeError:
            try:
                datecols = list(datecols)
            except TypeError:
                datecols = [
                    datecols,
                ]
        # ... and update the converters
        converters.update((i, str) for i in datecols)

    # Save the initial names and dtypes ...................
    idtype = dtype
    if isinstance(names, basestring):
        names = names.split(",")
    inames = names

    # Update the dtype (if needed) ........................
    if (dtype is not None):
        # Crash if we can't find the datecols
        if datecols is None:
            raise TypeError("No column selected for the dates!")
        # Make sure dtype is a valid np.dtype and make a copy
        dtype = easy_dtype(dtype, names=names)
        idtype = dtype
        inames = dtype.names
        if inames is not None:
            nbfields = len(inames) + len(datecols)
            # Create a new dtype description and a set of names
            dtype = [''] * nbfields
            names = [''] * nbfields
            idx = range(nbfields)
            for i in datecols:
                if i < 0:
                    i += nbfields
                del idx[idx.index(i)]
                # Set the default dtype for date columns, as np.object
                # (we can't use string as we don't know the final size)
                dtype[i] = ('', np.object)
            convdict = {
                'b': bool,
                'i': int,
                'l': int,
                'u': int,
                'f': float,
                'd': float,
                'g': float,
                'c': complex,
                'D': complex,
                'S': str,
                'U': str,
                'a': str
            }
            converter_update = []
            for (i, name) in zip(idx, inames):
                field = idtype[name]
                dtype[i] = (name, field)
                converter_update.append((i, convdict[field.char]))
                names[i] = name
            converters.update(converter_update)
    elif names not in (True, None):
        # Make sure that we saved the names as a list
        inames = list(inames)
        # Get the list of columns to use
        if usecols is None:
            nbcols = len(datecols) + len(inames)
            names = [''] * nbcols
            ucols = range(nbcols)
        else:
            names = [''] * (max(usecols) + 1)
            ucols = usecols
        # Fill the list of names:
        for i in ucols:
            if i in datecols:
                names[i] = "__%i" % i
            else:
                names[i] = inames.pop(0)
    #
    # Update the optional arguments ...
    kwargs = dict(dtype=dtype,
                  comments=comments,
                  delimiter=delimiter,
                  skiprows=skiprows,
                  converters=converters,
                  skip_header=skip_header,
                  skip_footer=skip_footer,
                  missing=missing,
                  missing_values=missing_values,
                  filling_values=filling_values,
                  usecols=usecols,
                  unpack=unpack,
                  names=names,
                  excludelist=excludelist,
                  deletechars=deletechars,
                  case_sensitive=case_sensitive,
                  defaultfmt=defaultfmt,
                  autostrip=autostrip,
                  loose=loose,
                  invalid_raise=invalid_raise,
                  usemask=True)
    # Get the raw data ................
    mrec = genfromtxt(fname, **kwargs)
    if not mrec.shape:
        mrec.shape = -1
    names = mrec.dtype.names
    # Revert to the original dtype.....
    dtype = idtype
    # Get the date columns ................................
    if datecols is None:
        import re
        datespattern = re.compile("'?_?dates?'?", re.IGNORECASE)
        datecols = [
            i for (i, name) in enumerate(names or ())
            if datespattern.search(name)
        ]
        if not datecols:
            raise TypeError("No column selected for the dates!")
    else:
        # We have `datecols` already, make sure the indices are positive
        # (the nb of fields might still be undefined)
        nbfields = len(names)
        for (i, v) in enumerate(datecols):
            if (v < 0):
                datecols[i] = v + nbfields
    # Fix the date columns if usecols was given
    if usecols is not None:
        datecols = tuple([list(usecols).index(d) for d in datecols])
    # Get the date info ...............
    if names:
        _dates = [mrec[names[i]] for i in datecols]
    else:
        _dates = [mrec[:, i] for i in datecols]
    # Convert the date columns to a date_array
    if len(_dates) == 1:
        _dates = np.array(_dates[0], copy=False, ndmin=1)
        dates = date_array([dateconv(args) for args in _dates],
                           freq=freq,
                           autosort=False)
    else:
        dates = date_array([dateconv(*args) for args in zip(*_dates)],
                           freq=freq,
                           autosort=False)
    # Resort the array according to the dates
    sortidx = dates.argsort()
    dates = dates[sortidx]
    mrec = mrec[sortidx]
    # Get the dtype from the named columns (if any), or just use the initial one
    mdtype = mrec.dtype
    if mdtype.names:
        newdescr = [
            descr for (i, descr) in enumerate(mdtype.descr)
            if i not in datecols
        ]
        output = time_series(ma.empty((len(mrec), ), dtype=newdescr),
                             dates=dates)
        for name in output.dtype.names:
            output[name] = mrec[name]
        if (idtype is not None):
            if (idtype.names is None):
                dtype = (idtype, len(output.dtype.names))
            else:
                dtype = idtype
            output = output.view(dtype)
    else:
        dataidx = [i for i in range(mrec.shape[-1]) if i not in datecols]
        if len(dataidx) == 1:
            dataidx = dataidx[0]
        output = time_series(mrec[:, dataidx], dates=dates)
    #
    if asrecarray:
        from trecords import TimeSeriesRecords
        return output.view(TimeSeriesRecords)
    return output
Beispiel #3
0
def time_records(data, dates=None, start_date=None, freq=None, mask=nomask,
                dtype=None, copy=False, fill_value=None, keep_mask=True,
                hard_mask=False):
    """
    Creates a TimeSeriesRecords object.

    Parameters
    ----------
    data : array_like
        Data portion of the array. Any data that is valid for constructing a
        MaskedArray can be used here. May also be a TimeSeries object.
    dates : {None, DateArray}, optional
        A sequence of dates corresponding to each entry.
        If None, the dates will be constructed as a DateArray with the same
        length as ``data``, starting at ``start_date`` with frequency ``freq``.
    start_date : {Date}, optional
        Date corresponding to the first entry of the data (index 0).
        This parameter must be a valid Date object, and is mandatory if ``dates``
        is None and if ``data`` has a length greater or equal to 1.
    freq : {freq_spec}, optional
        A valid frequency specification, as a string or an integer.
        This parameter is mandatory if ``dates`` is None.
    mask : {nomask, sequence}, optional
        Mask.  Must be convertible to an array of booleans with
        the same shape as data: True indicates a masked (eg.,
        invalid) data.
    dtype : {dtype}, optional
        Data type of the output.
        If dtype is None, the type of the data argument (`data.dtype`) is used.
        If dtype is not None and different from `data.dtype`, a copy is performed.
    copy : {False, True}, optional
        Whether to copy the input data (True), or to use a reference instead.
        Note: data are NOT copied by default.
    fill_value : {var}, optional
        Value used to fill in the masked values when necessary.
        If None, a default based on the datatype is used.
    keep_mask : {True, boolean}, optional
        Whether to combine mask with the mask of the input data,
        if any (True), or to use only mask for the output (False).
    hard_mask : {False, boolean}, optional
        Whether to use a hard mask or not.
        With a hard mask, masked values cannot be unmasked.

    Notes
    -----
    * All other parameters that are accepted by the :func:`numpy.ma.array`
      function in the :mod:`numpy.ma` module are also accepted by this function.
    * The date portion of the time series must be specified in one of the
      following ways:

       * specify a TimeSeries object for the ``data`` parameter.
       * pass a DateArray for the ``dates`` parameter.
       * specify a start_date (a continuous DateArray will be automatically
         constructed for the dates portion).
       * specify just a frequency (for TimeSeries of size zero).

    """
    series =  time_series(data, dates=dates, start_date=start_date, freq=freq,
                          mask=mask, dtype=dtype, copy=copy,
                          fill_value=fill_value, keep_mask=keep_mask,
                          hard_mask=hard_mask)
    return series.view(TimeSeriesRecords)
Beispiel #4
0
def tsfromtxt(fname, dtype=None, freq='U', comments='#', delimiter=None,
              skip_header=0, skip_footer=0, skiprows=0,
              converters=None, dateconverter=None,
              missing='', missing_values=None, filling_values=None,
              usecols=None, datecols=None,
              names=None, excludelist=None, deletechars=None, autostrip=True,
              case_sensitive=True, defaultfmt="f%i", unpack=None, loose=True,
              asrecarray=False, invalid_raise=True):
    """
    Load a TimeSeries from a text file.

    Each line of the input after the first `skiprows` ones is split at
    `delimiter`. Characters occuring after `comments` are discarded.

    If a column is named ``'dates'`` (case insensitive), it is used to define
    the dates. The ``freq`` parameter should be set to the expected frequency of
    the output series.
    If the date information spans several columns (for example, year in col #1,
    month in col #2...), a specific conversion function must be defined with
    the ``dateconverter`` parameter. This function should accept as many inputs
    as date columns, and return a valid :class:`Date` object.

    Parameters
    ----------
    fname : file or string
        File or filename to read.
        If the file extension is ``.gz`` or ``.bz2``, the file is first
        decompressed.
    dtype : data-type, optional
        Data type of the resulting array.
        If it is a structured data-type, the resulting array is 1-dimensional,
        and each row is interpreted as an element of the array. In this case,
        the number of columns used must match the number of fields in the dtype
        and the names of each field are set by the corresponding name of the dtype.
        If None, the dtypes will be determined by the contents of each
        column, individually.
    comments : {string}, optional
        The character used to indicate the start of a comment.
        All the characters occurring on a line after a comment are discarded.
    delimiter : {string}, optional
        The string used to separate values.  By default, any consecutive
        whitespace act as delimiter.
    skip_header : int, optional
        The numbers of lines to skip at the beginning of the file.
    skip_footer : int, optional
        The numbers of lines to skip at the end of the file
    converters : variable or None, optional
        The set of functions that convert the data of a column to a value.
        The converters can also be used to provide a default value
        for missing data: ``converters = {3: lambda s: float(s or 0)}``.
    dateconverter : {function}, optional
        The function to convert the date information to a :class:`Date` object.
        This function requires as many parameters as number of ``datecols``.
        This parameter is mandatory if ``dtype=None``.
    missing_values : variable or None, optional
        The set of strings corresponding to missing data.
    filling_values : variable or None, optional
        The set of values to be used as default when the data are missing.
    usecols : sequence or None, optional
        Which columns to read, with 0 being the first.  For example,
        ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
    datecols : {None, int, sequence}, optional
        Which columns store the date information.
    names : {None, True, str, sequence}, optional
        If `names` is True, the field names are read from the first valid line
        after the first `skiprows` lines.
        If `names` is a sequence or a single-string of comma-separated names,
        the names will be used to define the field names in a structured dtype.
        If `names` is None, the names of the dtype fields will be used, if any.
    excludelist : sequence, optional
        A list of names to exclude. This list is appended to the default list
        ['return','file','print']. Excluded names are appended an underscore:
        for example, `file` would become `file_`.
    deletechars : str, optional
        A string combining invalid characters that must be deleted from the
        names.
    defaultfmt : str, optional
        A format used to define default field names, such as "f%i" or "f_%02i".
    autostrip : bool, optional
        Whether to automatically strip white spaces from the variables.
    case_sensitive : {True, False, 'upper', 'lower'}, optional
        If True, field names are case sensitive.
        If False or 'upper', field names are converted to upper case.
        If 'lower', field names are converted to lower case.
    unpack : bool, optional
        If True, the returned array is transposed, so that arguments may be
        unpacked using ``x, y, z = loadtxt(...)``
    usemask : bool, optional
        If True, return a masked array.
        If False, return a regular array.
    asrecarray : {False, True}, optional
        Whether to return a TimeSeriesRecords or a series with a structured
        dtype.
    invalid_raise : bool, optional
        If True, an exception is raised if an inconsistency is detected in the
        number of columns.
        If False, a warning is emitted and the offending lines are skipped.


    Returns
    -------
    out : MaskedArray
        Data read from the text file.

    See Also
    --------
    numpy.lib.io.genfromtxt
        Equivalent function for standard arrays

    Notes
    -----
    * When spaces are used as delimiters, or when no delimiter has been given
      as input, there should not be any missing data between two fields.
    * When the variable are named (either by a flexible dtype or with `names`,
      there must not be any header in the file (else a :exc:`ValueError`
      exception is raised).
    * If ``names`` is True or a sequence of strings, these names overwrite
      the fields names of a structured array.
    * The sequence of names must NOT take the date columns into account.
    * If the datatype is not given explicitly (``dtype=None``),
      a :keyword:`dateconverter` must be given explicitly.
    * If the ``dtype`` is given explicitly,
      it must NOT refer to the date columns.

    Examples
    --------
    >>> data = "year, month, a, b\\n 2001, 01, 0.0, 10.\\n 2001, 02, 1.1, 11."
    >>> dateconverter = lambda y, m: Date('M', year=int(y), month=int(m))
    >>> series = tsfromtxt(StringIO.StringIO(data), delimiter=',', names=True,
    ...                    datecols=(0,1), dateconverter=dateconverter,)
    >>> series
    timeseries([(0.0, 10.0) (1.1, 11.0)],
       dtype = [('a', '<f8'), ('b', '<f8')],
       dates = [Jan-2001 Feb-2001],
       freq  = M)
    >>> series = tsfromtxt(StringIO.StringIO(data), delimiter=",",
    ...                    datecols=(0, 1), dateconverter=dateconverter,
    ...                    names="A, B", skip_header=1)
    timeseries([(0.0, 10.0) (1.1000000000000001, 11.0)],
       dtype = [('A', '<f8'), ('B', '<f8')],
       dates = [Jan-2001 Feb-2001],
       freq  = M)

    """
    # Update the date converter ...........................
    converters = converters or {}
    dateconv = dateconverter or None
    if dateconv is None:
        dateconv = lambda s: Date(freq, string=s)
    if 'dates' in converters:
        dateconv = converters['dates']
        del(converters['dates'])

    # Make sure `datecols` is a sequence ..................
    if datecols is not None:
        try:
            datecols = [_.strip() for _ in datecols.split(",")]
        except AttributeError:
            try:
                datecols = list(datecols)
            except TypeError:
                datecols = [datecols, ]
        # ... and update the converters
        converters.update((i, str) for i in datecols)

    # Save the initial names and dtypes ...................
    idtype = dtype
    if isinstance(names, basestring):
        names = names.split(",")
    inames = names

    # Update the dtype (if needed) ........................
    if (dtype is not None):
        # Crash if we can't find the datecols
        if datecols is None:
            raise TypeError("No column selected for the dates!")
        # Make sure dtype is a valid np.dtype and make a copy
        dtype = easy_dtype(dtype, names=names)
        idtype = dtype
        inames = dtype.names
        if inames is not None:
            nbfields = len(inames) + len(datecols)
            # Create a new dtype description and a set of names
            dtype = [''] * nbfields
            names = [''] * nbfields
            idx = range(nbfields)
            for i in datecols:
                if i < 0:
                    i += nbfields
                del idx[idx.index(i)]
                # Set the default dtype for date columns, as np.object
                # (we can't use string as we don't know the final size)
                dtype[i] = ('', np.object)
            convdict = {'b': bool, 'i': int, 'l':int, 'u': int,
                        'f': float, 'd': float, 'g': float,
                        'c': complex, 'D': complex,
                        'S': str, 'U': str, 'a': str}
            converter_update = []
            for (i, name) in zip(idx, inames):
                field = idtype[name]
                dtype[i] = (name, field)
                converter_update.append((i, convdict[field.char]))
                names[i] = name
            converters.update(converter_update)
    elif names not in (True, None):
        # Store the initial names and create a new list
        nbnames = len(datecols) + len(inames)
        names = [''] * nbnames
        # Find where the names should go in the new list
        idx = range(nbnames)
        for (i, k) in enumerate(datecols):
            if k < 0:
                k += nbnames
            del idx[idx.index(k)]
            names[k] = "_tmp%i" % i
        for (i, k) in zip(idx, inames):
            names[i] = k
    #
    # Update the optional arguments ...
    kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
                  skiprows=skiprows, converters=converters,
                  skip_header=skip_header, skip_footer=skip_footer,
                  missing=missing, missing_values=missing_values,
                  filling_values=filling_values,
                  usecols=usecols, unpack=unpack, names=names,
                  excludelist=excludelist, deletechars=deletechars,
                  case_sensitive=case_sensitive, defaultfmt=defaultfmt,
                  autostrip=autostrip, loose=loose, invalid_raise=invalid_raise,
                  usemask=True)
    # Get the raw data ................
    mrec = genfromtxt(fname, **kwargs)
    if not mrec.shape:
        mrec.shape = -1
    names = mrec.dtype.names
    # Revert to the original dtype.....
    dtype = idtype
    # Get the date columns ................................
    if datecols is None:
        import re
        datespattern = re.compile("'?_?dates?'?", re.IGNORECASE)
        datecols = [i for (i, name) in enumerate(names or ())
                     if datespattern.search(name)]
        if not datecols:
            raise TypeError("No column selected for the dates!")
    else:
        # We have `datecols` already, make sure the indices are positive
        # (the nb of fields might still be undefined)
        nbfields = len(names)
        for (i, v) in enumerate(datecols):
            if (v < 0):
                datecols[i] = v + nbfields
    # Fix the date columns if usecols was given
    if usecols is not None:
        datecols = tuple([list(usecols).index(d) for d in datecols])
    # Get the date info ...............
    if names:
        _dates = [mrec[names[i]] for i in datecols]
    else:
        _dates = [mrec[:, i] for i in datecols]
    # Convert the date columns to a date_array
    if len(_dates) == 1:
        _dates = np.array(_dates[0], copy=False, ndmin=1)
        dates = date_array([dateconv(args) for args in _dates],
                           freq=freq, autosort=False)
    else:
        dates = date_array([dateconv(*args) for args in zip(*_dates)],
                           freq=freq, autosort=False)
    # Resort the array according to the dates
    sortidx = dates.argsort()
    dates = dates[sortidx]
    mrec = mrec[sortidx]
    # Get the dtype from the named columns (if any), or just use the initial one
    mdtype = mrec.dtype
    if mdtype.names:
        newdescr = [descr for (i, descr) in enumerate(mdtype.descr)
                    if i not in datecols]
        output = time_series(ma.empty((len(mrec),), dtype=newdescr),
                             dates=dates)
        for name in output.dtype.names:
            output[name] = mrec[name]
        if (idtype is not None):
            if (idtype.names is None):
                dtype = (idtype, len(output.dtype.names))
            else:
                dtype = idtype
            output = output.view(dtype)
    else:
        dataidx = [i for i in range(mrec.shape[-1]) if i not in datecols]
        if len(dataidx) == 1:
            dataidx = dataidx[0]
        output = time_series(mrec[:, dataidx], dates=dates)
    #
    if asrecarray:
        from trecords import TimeSeriesRecords
        return output.view(TimeSeriesRecords)
    return output
Beispiel #5
0
################################################################################
if __name__ == "__main__":
    import numpy as N
    from maskedarray.testutils import assert_equal

    if 1:
        d = N.arange(5)
        m = MA.make_mask([1, 0, 0, 1, 1])
        base_d = N.r_[d, d[::-1]].reshape(2, -1).T
        base_m = N.r_[[m, m[::-1]]].T
        base = MA.array(base_d, mask=base_m)
        mrec = MR.fromarrays(base.T)
        dlist = ["2007-%02i" % (i + 1) for i in d]
        dates = date_array(dlist)
        ts = time_series(mrec, dates)
        mts = MultiTimeSeries(mrec, dates)
        self_data = [d, m, mrec, dlist, dates, ts, mts]

        assert isinstance(mts.f0, TimeSeries)

    if 0:
        mts[:2] = 5
        assert_equal(mts.f0._data, [5, 5, 2, 3, 4])
        assert_equal(mts.f1._data, [5, 5, 2, 1, 0])
        assert_equal(mts.f0._mask, [0, 0, 0, 1, 1])
        assert_equal(mts.f1._mask, [0, 0, 0, 0, 1])
        mts.harden_mask()
        mts[-2:] = 5
        assert_equal(mts.f0._data, [5, 5, 2, 3, 4])
        assert_equal(mts.f1._data, [5, 5, 2, 5, 0])