Example #1
0
    def test_frame_tz_convert(self):
        rng = date_range('1/1/2011', periods=200, freq='D', tz='US/Eastern')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_convert('Europe/Berlin')
        expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin'))
        assert result.index.tz.zone == 'Europe/Berlin'
        tm.assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_convert('Europe/Berlin', axis=1)
        assert result.columns.tz.zone == 'Europe/Berlin'
        tm.assert_frame_equal(result, expected.T)
Example #2
0
    def test_frame_align_aware(self):
        idx1 = date_range('2001', periods=5, freq='H', tz='US/Eastern')
        idx2 = date_range('2001', periods=5, freq='2H', tz='US/Eastern')
        df1 = DataFrame(np.random.randn(len(idx1), 3), idx1)
        df2 = DataFrame(np.random.randn(len(idx2), 3), idx2)
        new1, new2 = df1.align(df2)
        assert df1.index.tz == new1.index.tz
        assert df2.index.tz == new2.index.tz

        # different timezones convert to UTC

        # frame with frame
        df1_central = df1.tz_convert('US/Central')
        new1, new2 = df1.align(df1_central)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC

        # frame with Series
        new1, new2 = df1.align(df1_central[0], axis=0)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC

        df1[0].align(df1_central, axis=0)
        assert new1.index.tz == pytz.UTC
        assert new2.index.tz == pytz.UTC
Example #3
0
    def update(self, df_in: pd.DataFrame, symbol: str=None, datatype: str=None,
               barsize: str=None, tz: str=None, standardize_index=True):
        """
        Input data is combined with self.df. Overlapped data will be
        overwritten by non-null values of input data. Indexes and Columns
        will be unioned.
        """
        # Check input data type
        if not (isinstance(df_in, pd.DataFrame)):
            raise TypeError('Input data must be a pandas.DataFrame.')

        # Check empty data
        if df_in.empty:
            return self

        # Standardize index
        if standardize_index:
            df_in = self._standardize_index(
                df_in.copy(), symbol=symbol, datatype=datatype,
                barsize=barsize, tz=tz)

        # Combine input DataFrame with internal self.df
        if self.df.empty:  # Initialize self.df
            self.df = df_in.sort_index()
        else:
            df_in = df_in.tz_convert(self.tzinfo, level=self.__class__.dtlevel)
            self.df = df_in.combine_first(self.df).sort_index()

        # Post-combination processing
        # Fill NaN, and enforce barcount and volume columns dtype to int64
        self.df.fillna(-1, inplace=True)
        for col in self.df.columns:
            if col.lower() in ('barcount', 'volume'):
                self.df[col] = self.df[col].astype(np.int64)
Example #4
0
    def test_tz_convert(self):
        rng = date_range('1/1/2011', periods=100, freq='H')
        ts = Series(1, index=rng)

        result = ts.tz_convert('utc')
        self.assert_(result.index.tz.zone == 'UTC')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_convert('utc')
        expected = DataFrame({'a': 1}, rng.tz_convert('UTC'))
        self.assert_(result.index.tz.zone == 'UTC')
        assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_convert('utc', axis=1)
        self.assert_(result.columns.tz.zone == 'UTC')
        assert_frame_equal(result, expected.T)
Example #5
0
    def test_frame_add_tz_mismatch_converts_to_utc(self):
        rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern')
        df = DataFrame(np.random.randn(len(rng)), index=rng, columns=['a'])

        df_moscow = df.tz_convert('Europe/Moscow')
        result = df + df_moscow
        assert result.index.tz is pytz.utc

        result = df_moscow + df
        assert result.index.tz is pytz.utc
Example #6
0
    def test_to_records_datetimeindex_with_tz(self, tz):
        # GH13937
        dr = date_range('2016-01-01', periods=10,
                        freq='S', tz=tz)

        df = DataFrame({'datetime': dr}, index=dr)

        expected = df.to_records()
        result = df.tz_convert("UTC").to_records()

        # both converted to UTC, so they are equal
        tm.assert_numpy_array_equal(result, expected)
    def test_series_frame_tz_convert(self):
        rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern")
        ts = Series(1, index=rng)

        result = ts.tz_convert("Europe/Berlin")
        self.assertEqual(result.index.tz.zone, "Europe/Berlin")

        df = DataFrame({"a": 1}, index=rng)
        result = df.tz_convert("Europe/Berlin")
        expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin"))
        self.assertEqual(result.index.tz.zone, "Europe/Berlin")
        assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_convert("Europe/Berlin", axis=1)
        self.assertEqual(result.columns.tz.zone, "Europe/Berlin")
        assert_frame_equal(result, expected.T)

        # can't convert tz-naive
        rng = date_range("1/1/2011", periods=200, freq="D")
        ts = Series(1, index=rng)
        tm.assertRaisesRegexp(TypeError, "Cannot convert tz-naive", ts.tz_convert, "US/Eastern")
Example #8
0
    def test_series_frame_tz_convert(self):
        rng = date_range('1/1/2011', periods=200, freq='D',
                         tz='US/Eastern')
        ts = Series(1, index=rng)

        result = ts.tz_convert('Europe/Berlin')
        self.assert_(result.index.tz.zone == 'Europe/Berlin')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_convert('Europe/Berlin')
        expected = DataFrame({'a': 1}, rng.tz_convert('Europe/Berlin'))
        self.assert_(result.index.tz.zone == 'Europe/Berlin')
        assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_convert('Europe/Berlin', axis=1)
        self.assert_(result.columns.tz.zone == 'Europe/Berlin')
        assert_frame_equal(result, expected.T)

        # can't convert tz-naive
        rng = date_range('1/1/2011', periods=200, freq='D')
        ts = Series(1, index=rng)
        self.assertRaises(Exception, ts.tz_convert, 'US/Eastern')
Example #9
0
    def _standardize_index(
            self, df_in: pd.DataFrame, symbol: str=None, datatype: str=None,
            barsize: str=None, tz: str=None):
        """Normalize input DataFrame index to MarketDataBlock standard.
        """
        # Add or starndardize index names in the input.
        if isinstance(df_in.index, pd.MultiIndex):
            df_in.reset_index(inplace=True)

        # Rename ambiguous column names.
        df_in.columns = [
            col_rename.get(col.strip().lower(), col.strip().lower())
            for col in df_in.columns]

        # Insert Symbol, DataType, Barsize columns from arguments if not
        # found in the input dataframe.
        for col in MarketDataBlock.data_index:
            if col not in df_in.columns:
                if locals().get(col.lower(), None) is None:
                    raise KeyError(
                        'No {0} argument and no {0} column in the DataFrame.'
                        .format(col))
                df_in.insert(0, col, locals()[col.lower()])

        # Convert datetime strings to pandas DatetimeIndex
        df_in['TickerTime'] = pd.DatetimeIndex(
            df_in['TickerTime'].apply(pd.Timestamp))

        # Standardize BarSize strings
        df_in['BarSize'] = df_in['BarSize'].map(timedur_standardize)

        # Set index to class-defined MultiIndex
        df_in.set_index(MarketDataBlock.data_index, inplace=True)

        # Set time zone so all DatetimeIndex are tz-aware
        df_in_tz = df_in.index.levels[self.__class__.dtlevel].tz
        if df_in_tz is None or isinstance(df_in_tz, timezone) or \
           isinstance(df_in_tz, pytz._FixedOffset):
            # Input df has naive time index, or tzinfo is not pytz.timezone()
            if tz is None:
                raise ValueError(
                    'Argument tz=None, and TickerTime.tzinfo is None(naive),'
                    'datetime.timezone, or pytz._FixedOffset.')
            if df_in_tz is None:
                df_in = df_in.tz_localize(tz, level=self.__class__.dtlevel)
            else:
                df_in = df_in.tz_convert(tz, level=self.__class__.dtlevel)

        return df_in
Example #10
0
    def test_equal_join_ensure_utc(self):
        rng = date_range('1/1/2011', periods=10, freq='H', tz='US/Eastern')
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts_moscow = ts.tz_convert('Europe/Moscow')

        result = ts + ts_moscow
        self.assert_(result.index.tz is pytz.utc)

        result = ts_moscow + ts
        self.assert_(result.index.tz is pytz.utc)

        df = DataFrame({'a': ts})
        df_moscow = df.tz_convert('Europe/Moscow')
        result = df + df_moscow
        self.assert_(result.index.tz is pytz.utc)

        result = df_moscow + df
        self.assert_(result.index.tz is pytz.utc)
    def test_equal_join_ensure_utc(self):
        rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern")
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts_moscow = ts.tz_convert("Europe/Moscow")

        result = ts + ts_moscow
        self.assertIs(result.index.tz, pytz.utc)

        result = ts_moscow + ts
        self.assertIs(result.index.tz, pytz.utc)

        df = DataFrame({"a": ts})
        df_moscow = df.tz_convert("Europe/Moscow")
        result = df + df_moscow
        self.assertIs(result.index.tz, pytz.utc)

        result = df_moscow + df
        self.assertIs(result.index.tz, pytz.utc)
class Timeseries(object):
    def __init__(self, data):

        """
        Can be called with either:

        A DataFrame. Preferred.

        timeseries_dict, a dict with UTC datetimes as keys and floats
        as values.

        A list of such dicts.

        This works like a pandas DataFrame, except we keep track of
        the order of column names."""

        if isinstance(data, DataFrame):
            self._dataframe = data
            self._columns = tuple(data.columns)
        elif isinstance(data, dict):
            series = Series(data)
            self._dataframe = DataFrame({'data': series})
            self._columns = ('data',)
        else:
            self._dataframe = DataFrame(dict([
                        ('data_{0}'.format(i), series)
                        for i, series in enumerate(data)]))
            self._columns = tuple(
                'data_{0}'.format(i) for i, series in enumerate(data))

    def add(self, timeseries):
        """Add the columns from timeseries to the dataframe of this
        timeseries."""
        self._dataframe = self._dataframe.combineAdd(timeseries._dataframe)
        self._columns = self.columns + timeseries.columns

    @property
    def dataframe(self):
        return self._dataframe

    @property
    def timeseries(self):
        """Return the first of the series in dataframe"""
        return self._dataframe[self._columns[0]].dropna()

    def get_series(self, columnname):
        return self._dataframe[columnname].dropna()

    def to_csv(self, outfile, sep=',', timezone=None, date_format='%Y-%m-%d %H:%M',
               header_date_format='Datum + tijd'):
        """Note: changes the timezone of all datetimes!

        Write the data of all timeseries to a CSV file."""
        if timezone is not None:
            self.set_timezone(timezone)
        headers = [header_date_format] + [
            self.label_and_unit(column) for column in self.columns]
        outfile.write(sep.join(headers) + "\n")
        self._dataframe.to_csv(outfile, sep=sep, mode='a', header=None,
                               date_format=date_format)

    def set_timezone(self, timezone):
        """Sets this timezone on all datetimes. Timezone is a pytz timezone
        object."""
        self._dataframe = self._dataframe.tz_convert(timezone)

    @property
    def columns(self):
        return self._columns

    def label(self, series_name):
        """Only the part of the columns before '||'."""
        return series_name.split('||')[0]

    def unit(self, series_name):
        """Only the part of the columns after '||', or None."""
        return series_name.split('||')[1] if '||' in series_name else None

    def label_and_unit(self, series_name):
        unit = self.unit(series_name)
        if unit:
            return "{} ({})".format(self.label(series_name), unit)
        else:
            return self.label(series_name)

    def dates(self):
        return self.timeseries.keys()

    def values(self):
        return list(self.timeseries)

    def latest(self):
        return self.timeseries.tail(1)

    def data(self):
        return [[key, value]
                for key, value in izip(self.dates(), self.values())]

    def __len__(self):
        return len(self._dataframe) if self._dataframe is not None else 0