Beispiel #1
0
    def test_frame_tz_localize(self):
        rng = date_range("1/1/2011", periods=100, freq="H")

        df = DataFrame({"a": 1}, index=rng)
        result = df.tz_localize("utc")
        expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
        assert result.index.tz.zone == "UTC"
        tm.assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_localize("utc", axis=1)
        assert result.columns.tz.zone == "UTC"
        tm.assert_frame_equal(result, expected.T)
Beispiel #2
0
    def test_frame_tz_localize(self):
        rng = date_range('1/1/2011', periods=100, freq='H')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_localize('utc')
        expected = DataFrame({'a': 1}, rng.tz_localize('UTC'))
        assert result.index.tz.zone == 'UTC'
        tm.assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_localize('utc', axis=1)
        assert result.columns.tz.zone == 'UTC'
        tm.assert_frame_equal(result, expected.T)
Beispiel #3
0
    def test_frame_tz_localize(self):
        rng = date_range('1/1/2011', periods=100, freq='H')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_localize('utc')
        expected = DataFrame({'a': 1}, rng.tz_localize('UTC'))
        assert result.index.tz.zone == 'UTC'
        tm.assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_localize('utc', axis=1)
        assert result.columns.tz.zone == 'UTC'
        tm.assert_frame_equal(result, expected.T)
Beispiel #4
0
 def correct_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
     level = self.level_name if isinstance(df.index,
                                           pd.MultiIndex) else None
     res = df.tz_localize(self.tz_source,
                          ambiguous=self.ambiguous,
                          copy=False,
                          level=level).tz_convert(self.tz_dest, level=level)
     if self.neutralize_tz:
         res = res.tz_convert(None, level=level)
     res.sort_index(inplace=True)
     return res
Beispiel #5
0
    def _standardize_index(self,
                           df_in: pd.DataFrame,
                           symbol: str = None,
                           datatype: str = None,
                           barsize: str = None,
                           tz: str = None):
        """Normalize input DataFrame index to MarketDataBlock standard.
        """
        # Add or starndardize index names in the input.
        if isinstance(df_in.index, pd.MultiIndex):
            df_in.reset_index(inplace=True)

        # Rename ambiguous column names.
        df_in.columns = [
            col_rename.get(col.strip().lower(),
                           col.strip().lower()) for col in df_in.columns
        ]

        # Insert Symbol, DataType, Barsize columns from arguments if not
        # found in the input dataframe.
        for col in MarketDataBlock.data_index:
            if col not in df_in.columns:
                if locals().get(col.lower(), None) is None:
                    raise KeyError(
                        'No {0} argument and no {0} column in the DataFrame.'.
                        format(col))
                df_in.insert(0, col, locals()[col.lower()])

        # Convert datetime strings to pandas DatetimeIndex
        df_in['TickerTime'] = pd.DatetimeIndex(df_in['TickerTime'].apply(
            pd.Timestamp))

        # Standardize BarSize strings
        df_in['BarSize'] = df_in['BarSize'].map(timedur_standardize)

        # Set index to class-defined MultiIndex
        df_in.set_index(MarketDataBlock.data_index, inplace=True)

        # Set time zone so all DatetimeIndex are tz-aware
        df_in_tz = df_in.index.levels[self.__class__.dtlevel].tz
        if df_in_tz is None or isinstance(df_in_tz, timezone) or \
           isinstance(df_in_tz, pytz._FixedOffset):
            # Input df has naive time index, or tzinfo is not pytz.timezone()
            if tz is None:
                raise ValueError(
                    'Argument tz=None, and TickerTime.tzinfo is None(naive),'
                    'datetime.timezone, or pytz._FixedOffset.')
            if df_in_tz is None:
                df_in = df_in.tz_localize(tz, level=self.__class__.dtlevel)
            else:
                df_in = df_in.tz_convert(tz, level=self.__class__.dtlevel)

        return df_in
    def test_tz_localize(self, frame_or_series):
        rng = date_range("1/1/2011", periods=100, freq="H")

        obj = DataFrame({"a": 1}, index=rng)
        obj = tm.get_obj(obj, frame_or_series)

        result = obj.tz_localize("utc")
        expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
        expected = tm.get_obj(expected, frame_or_series)

        assert result.index.tz.zone == "UTC"
        tm.assert_equal(result, expected)
Beispiel #7
0
    def test_tz_localize(self, frame_or_series):
        rng = date_range("1/1/2011", periods=100, freq="H")

        obj = DataFrame({"a": 1}, index=rng)
        if frame_or_series is not DataFrame:
            obj = obj["a"]

        result = obj.tz_localize("utc")
        expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
        if frame_or_series is not DataFrame:
            expected = expected["a"]

        assert result.index.tz.zone == "UTC"
        tm.assert_equal(result, expected)
Beispiel #8
0
    def test_series_frame_tz_localize(self):

        rng = date_range('1/1/2011', periods=100, freq='H')
        ts = Series(1, index=rng)

        result = ts.tz_localize('utc')
        self.assert_(result.index.tz.zone == 'UTC')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_localize('utc')
        expected = DataFrame({'a': 1}, rng.tz_localize('UTC'))
        self.assert_(result.index.tz.zone == 'UTC')
        assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_localize('utc', axis=1)
        self.assert_(result.columns.tz.zone == 'UTC')
        assert_frame_equal(result, expected.T)

        # Can't localize if already tz-aware
        rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
        ts = Series(1, index=rng)
        self.assertRaises(Exception, ts.tz_localize, 'US/Eastern')
Beispiel #9
0
    def test_series_frame_tz_localize(self):

        rng = date_range('1/1/2011', periods=100, freq='H')
        ts = Series(1, index=rng)

        result = ts.tz_localize('utc')
        self.assert_(result.index.tz.zone == 'UTC')

        df = DataFrame({'a': 1}, index=rng)
        result = df.tz_localize('utc')
        expected = DataFrame({'a': 1}, rng.tz_localize('UTC'))
        self.assert_(result.index.tz.zone == 'UTC')
        assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_localize('utc', axis=1)
        self.assert_(result.columns.tz.zone == 'UTC')
        assert_frame_equal(result, expected.T)

        # Can't localize if already tz-aware
        rng = date_range('1/1/2011', periods=100, freq='H', tz='utc')
        ts = Series(1, index=rng)
        self.assertRaises(Exception, ts.tz_localize, 'US/Eastern')
    def test_series_frame_tz_localize(self):

        rng = date_range("1/1/2011", periods=100, freq="H")
        ts = Series(1, index=rng)

        result = ts.tz_localize("utc")
        self.assertEqual(result.index.tz.zone, "UTC")

        df = DataFrame({"a": 1}, index=rng)
        result = df.tz_localize("utc")
        expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
        self.assertEqual(result.index.tz.zone, "UTC")
        assert_frame_equal(result, expected)

        df = df.T
        result = df.tz_localize("utc", axis=1)
        self.assertEqual(result.columns.tz.zone, "UTC")
        assert_frame_equal(result, expected.T)

        # Can't localize if already tz-aware
        rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
        ts = Series(1, index=rng)
        tm.assertRaisesRegexp(TypeError, "Already tz-aware", ts.tz_localize, "US/Eastern")
Beispiel #11
0
    def _standardize_index(
            self, df_in: pd.DataFrame, symbol: str=None, datatype: str=None,
            barsize: str=None, tz: str=None):
        """Normalize input DataFrame index to MarketDataBlock standard.
        """
        # Add or starndardize index names in the input.
        if isinstance(df_in.index, pd.MultiIndex):
            df_in.reset_index(inplace=True)

        # Rename ambiguous column names.
        df_in.columns = [
            col_rename.get(col.strip().lower(), col.strip().lower())
            for col in df_in.columns]

        # Insert Symbol, DataType, Barsize columns from arguments if not
        # found in the input dataframe.
        for col in MarketDataBlock.data_index:
            if col not in df_in.columns:
                if locals().get(col.lower(), None) is None:
                    raise KeyError(
                        'No {0} argument and no {0} column in the DataFrame.'
                        .format(col))
                df_in.insert(0, col, locals()[col.lower()])

        # Convert datetime strings to pandas DatetimeIndex
        df_in['TickerTime'] = pd.DatetimeIndex(
            df_in['TickerTime'].apply(pd.Timestamp))

        # Standardize BarSize strings
        df_in['BarSize'] = df_in['BarSize'].map(timedur_standardize)

        # Set index to class-defined MultiIndex
        df_in.set_index(MarketDataBlock.data_index, inplace=True)

        # Set time zone so all DatetimeIndex are tz-aware
        df_in_tz = df_in.index.levels[self.__class__.dtlevel].tz
        if df_in_tz is None or isinstance(df_in_tz, timezone) or \
           isinstance(df_in_tz, pytz._FixedOffset):
            # Input df has naive time index, or tzinfo is not pytz.timezone()
            if tz is None:
                raise ValueError(
                    'Argument tz=None, and TickerTime.tzinfo is None(naive),'
                    'datetime.timezone, or pytz._FixedOffset.')
            if df_in_tz is None:
                df_in = df_in.tz_localize(tz, level=self.__class__.dtlevel)
            else:
                df_in = df_in.tz_convert(tz, level=self.__class__.dtlevel)

        return df_in
Beispiel #12
0
def get_home(data: pd.DataFrame, periods: dict) -> tuple:
    # NOTE this is to match the names library expects. Ideally library should work with the
    #      same names as the AWARE database schema
    data.rename(index=str,
                columns={
                    'double_latitude': 'latitude',
                    'double_longitude': 'longitude'
                },
                inplace=True)

    # NOTE this is to imitate data in CMU's use of the library and mainly because
    #      there has been cases (e.g. screen) where columns have been accessed by
    #      their position and not their name
    data = data[['timestamp', 'latitude', 'longitude']]

    # NOTE this is again to imitation data in CMU's used of the library and out of
    #      caution. I do not know if being sorted is assumed by the library.
    data.sort_values(by='timestamp', ascending=True, inplace=True)

    convert_timezone(data, 'US/Pacific', {'timestamp': 'time'})
    data.set_index("time", inplace=True)
    data = data.tz_localize(None)
    # QUESTION why to first set timezone as UTC and then change it back?
    #          why not setting it as local time zone from the beginning?

    periodranges = np.ndarray(shape=(len(periods), 2), dtype=np.int64)
    for index, period in enumerate(periods):
        start = period['start']
        start = datetime.datetime(start['year'], start['month'], start['day'],
                                  start['hour'], start['minute'],
                                  start['second'])
        start = time.mktime(start.timetuple())
        end = period['end']
        end = datetime.datetime(end['year'], end['month'], end['day'],
                                end['hour'], end['minute'], end['second'])
        end = time.mktime(end.timetuple())
        periodranges[index, 0] = start
        periodranges[index, 1] = end
    nightranges = getDaywiseSplitsForEpoch("night")
    home_location = infer_home(data, periodranges, nightranges)

    return home_location
Beispiel #13
0
    def __init__(self,
                 name: str,
                 data: pd.DataFrame,
                 interval: str = DEFAULT_FREQ,
                 indicators: List[str] = None,
                 drop_columns=None):
        # Init OHLC data
        self.name = name
        self._data = data.tz_localize(DEFAULT_TIMEZONE,
                                      ambiguous='infer').tz_convert('UTC')
        self.upsample(DEFAULT_FREQ)
        if interval != DEFAULT_FREQ:
            self.downsample(interval)
        self._data = self._data.astype(COLUMN_DTYPES)

        # Init indicators
        self._indicators = {name: Function(name) for name in indicators or []}
        self._apply_indicators()

        if drop_columns:
            self._data = self._data.drop(drop_columns, axis=1)
    def test_getitem_with_datestring_with_UTC_offset(self, start, end):
        # GH 24076
        idx = date_range(
            start="2018-12-02 14:50:00-07:00",
            end="2018-12-02 14:50:00-07:00",
            freq="1min",
        )
        df = DataFrame(1, index=idx, columns=["A"])
        result = df[start:end]
        expected = df.iloc[0:3, :]
        tm.assert_frame_equal(result, expected)

        # GH 16785
        start = str(start)
        end = str(end)
        with pytest.raises(ValueError, match="Both dates must"):
            df[start:end[:-4] + "1:00"]

        with pytest.raises(ValueError, match="The index must be timezone"):
            df = df.tz_localize(None)
            df[start:end]
Beispiel #15
0
month = lambda x: np.random.choice([abc for abc in x],
                                   np.random.choice([1]))

contracts = np.ravel([[(''.join(month(string.letters[:26])) +
                        str(np.random.choice([14, 15, 16])))] * len(cols)
                      for x in xrange(len(source.columns) / len(cols) / 2)])

level_1 = len(source.columns) / len(contracts) * list(contracts)

numsyms = len(source.columns) / (len(set(level_1)) * len(cols))
underlyings = [''.join(sym(string.letters[:26])) for x in xrange(numsyms)]
level_0 = np.ravel([[sym] * len(set(level_1)) * len(cols) for sym in underlyings])

source.columns = pd.MultiIndex.from_tuples(zip(level_0, level_1, source.columns))
source.index = pd.date_range(start=dt.datetime.utcnow() - dt.timedelta(days=len(source.index) - 1),
                             end=dt.datetime.utcnow(), freq='D')

futdata = FuturesDataFrameSource(source.tz_localize('UTC'))


class FrontTrader(TradingAlgorithm):
    @roll(lambda x: x[x['open_interest'] == x['open_interest'].max()])
    def handle_data(self, data):
        for sym in data.keys():
            self.order((sym, data[sym]['contract']), 2)
        return data


bot = FrontTrader()
stats = bot.run(futdata)
Beispiel #16
0
def get_canonical_data(
        df: pd.DataFrame,
        time_col: str = TIME_COL,
        value_col: str = VALUE_COL,
        freq: str = None,
        date_format: str = None,
        tz: str = None,
        train_end_date: datetime = None,
        regressor_cols: List[str] = None,
        anomaly_info: Optional[Union[Dict, List[Dict]]] = None):
    """Loads data to internal representation. Parses date column,
    sets timezone aware index.
    Checks for irregularities and raises an error if input is invalid.
    Adjusts for anomalies according to ``anomaly_info``.

    Parameters
    ----------
    df : `pandas.DataFrame`
        Input timeseries. A data frame which includes the timestamp column
        as well as the value column.
    time_col : `str`
        The column name in ``df`` representing time for the time series data.
        The time column can be anything that can be parsed by pandas DatetimeIndex.
    value_col: `str`
        The column name which has the value of interest to be forecasted.
    freq : `str`, optional, default None
        Timeseries frequency, DateOffset alias, If None automatically inferred.
    date_format : `str`, optional, default None
        strftime format to parse time column, eg ``%m/%d/%Y``.
        Note that ``%f`` will parse all the way up to nanoseconds.
        If None (recommended), inferred by `pandas.to_datetime`.
    tz : `str` or pytz.timezone object, optional, default None
        Passed to `pandas.tz_localize` to localize the timestamp.
    train_end_date : `datetime.datetime`, optional, default None
        Last date to use for fitting the model. Forecasts are generated after this date.
        If None, it is set to the minimum of ``self.last_date_for_val`` and
        ``self.last_date_for_reg``.
    regressor_cols: `list` [`str`], optional, default None
        A list of regressor columns used in the training and prediction DataFrames.
        If None, no regressor columns are used.
        Regressor columns that are unavailable in ``df`` are dropped.
        anomaly_info : `dict` or None, default None
    anomaly_info : `dict` or `list` [`dict`] or None, default None
        Anomaly adjustment info. Anomalies in ``df``
        are corrected before any forecasting is done.

        If None, no adjustments are made.

        A dictionary containing the parameters to
        `~greykite.common.features.adjust_anomalous_data.adjust_anomalous_data`.
        See that function for details.
        The possible keys are:

            ``"value_col"`` : `str`
                The name of the column in ``df`` to adjust. You may adjust the value
                to forecast as well as any numeric regressors.
            ``"anomaly_df"`` : `pandas.DataFrame`
                Adjustments to correct the anomalies.
            ``"start_date_col"``: `str`, default START_DATE_COL
                Start date column in ``anomaly_df``.
            ``"end_date_col"``: `str`, default END_DATE_COL
                End date column in ``anomaly_df``.
            ``"adjustment_delta_col"``: `str` or None, default None
                Impact column in ``anomaly_df``.
            ``"filter_by_dict"``: `dict` or None, default None
                Used to filter ``anomaly_df`` to the relevant anomalies for
                the ``value_col`` in this dictionary.
                Key specifies the column name, value specifies the filter value.
            ``"filter_by_value_col""``: `str` or None, default None
                Adds ``{filter_by_value_col: value_col}`` to ``filter_by_dict``
                if not None, for the ``value_col`` in this dictionary.
            ``"adjustment_method"`` : `str` ("add" or "subtract"), default "add"
                How to make the adjustment, if ``adjustment_delta_col`` is provided.

        Accepts a list of such dictionaries to adjust multiple columns in ``df``.

    Returns
    -------
    canonical_data_dict : `dict`
        Dictionary containing the dataset in canonical form, and information such as
        train end date. Keys:

            ``"df"`` : `pandas.DataFrame`
                Data frame containing timestamp and value, with standardized column names for internal use
                (TIME_COL, VALUE_COL). Rows are sorted by time index, and missing gaps between dates are filled
                in so that dates are spaced at regular intervals. Values are adjusted for anomalies
                according to ``anomaly_info``.
                The index can be timezone aware (but TIME_COL is not).
            ``"df_before_adjustment"`` : `pandas.DataFrame` or None
                ``df`` before adjustment by ``anomaly_info``.
                If ``anomaly_info`` is None, this is None.
            ``"fit_df"`` : `pandas.DataFrame`
                A subset of the returned ``df``, with data up until ``train_end_date``.
            ``"freq"`` : `pandas.DataFrame`
                timeseries frequency, inferred if not provided
            ``"time_stats"`` : `dict`
                Information about the time column:

                    ``"gaps"``: missing_dates
                    ``"added_timepoints"``: added_timepoints
                    ``"dropped_timepoints"``: dropped_timepoints

            ``"regressor_cols"`` : `list` [`str`]
                A list of regressor columns.
            ``"fit_cols"`` : `list` [`str`]
                Names of time column, value column, and regressor columns.
            ``"train_end_date"`` : `datetime.datetime`
                Last date or timestamp for training. It is always less than or equal to
                minimum non-null values of ``last_date_for_val`` and ``last_date_for_reg``.
            ``"last_date_for_val"`` : `datetime.datetime`
                Date or timestamp corresponding  to last non-null value in ``df[value_col]``.
            ``"last_date_for_reg"`` : `datetime.datetime` or None
                Date or timestamp corresponding to last non-null value in ``df[regressor_cols]``.
                If ``regressor_cols`` is None, ``last_date_for_reg`` is None.
    """
    if time_col not in df.columns:
        raise ValueError(f"{time_col} column is not in input data")
    if value_col not in df.columns:
        raise ValueError(f"{value_col} column is not in input data")
    if df.shape[0] <= 2:
        raise ValueError(
            f"Time series has < 3 observations. More data are needed for forecasting.")

    # Standardizes the time column name.
    # `value_col` is standardized after anomalies are adjusted.
    df_standardized = df.rename({
        time_col: TIME_COL,
    }, axis=1)
    df_standardized[TIME_COL] = pd.to_datetime(
        df_standardized[TIME_COL],
        format=date_format,
        infer_datetime_format=True)
    # Drops data points from duplicate time stamps
    df_standardized.drop_duplicates(
        subset=[TIME_COL],
        keep='first',
        inplace=True)
    if df.shape[0] > df_standardized.shape[0]:
        warnings.warn(
            f"Duplicate timestamps have been removed.",
            UserWarning)
    df = df_standardized.sort_values(by=TIME_COL)
    # Infers data frequency
    inferred_freq = pd.infer_freq(df[TIME_COL])
    if freq is None:
        freq = inferred_freq
    elif inferred_freq is not None and freq != inferred_freq:
        warnings.warn(
            f"Provided frequency '{freq}' does not match inferred frequency '{inferred_freq}'."
            f" Using '{freq}'.", UserWarning)  # NB: with missing data, it's better to provide freq
    # Handles gaps in time series
    missing_dates = find_missing_dates(df[TIME_COL])
    df, added_timepoints, dropped_timepoints = fill_missing_dates(
        df,
        time_col=TIME_COL,
        freq=freq)
    time_stats = {
        "gaps": missing_dates,
        "added_timepoints": added_timepoints,
        "dropped_timepoints": dropped_timepoints
    }
    # Creates index with localized timestamp
    df.index = df[TIME_COL]
    df.index.name = None
    if tz is not None:
        df = df.tz_localize(tz)

    df_before_adjustment = None
    if anomaly_info is not None:
        # Saves values before adjustment.
        df_before_adjustment = df.copy()
        # Adjusts columns in df (e.g. `value_col`, `regressor_cols`)
        # using the anomaly info. One dictionary of parameters
        # for `adjust_anomalous_data` is provided for each column to adjust.
        if not isinstance(anomaly_info, (list, tuple)):
            anomaly_info = [anomaly_info]
        for single_anomaly_info in anomaly_info:
            adjusted_df_dict = adjust_anomalous_data(
                df=df,
                time_col=TIME_COL,
                **single_anomaly_info)
            # `self.df` with values for single_anomaly_info["value_col"] adjusted.
            df = adjusted_df_dict["adjusted_df"]

        # Standardizes `value_col` name
        df_before_adjustment.rename({
            value_col: VALUE_COL
        }, axis=1, inplace=True)
    # Standardizes `value_col` name
    df.rename({
        value_col: VALUE_COL
    }, axis=1, inplace=True)

    # Finds date of last available value
    last_date_available = df[TIME_COL].max()
    last_date_for_val = df[df[VALUE_COL].notnull()][TIME_COL].max()
    last_date_for_reg = None
    if regressor_cols:
        available_regressor_cols = [col for col in df.columns if col not in [TIME_COL, VALUE_COL]]
        cols_not_selected = set(regressor_cols) - set(available_regressor_cols)
        regressor_cols = [col for col in regressor_cols if col in available_regressor_cols]
        if cols_not_selected:
            warnings.warn(f"The following columns are not available to use as "
                          f"regressors: {sorted(cols_not_selected)}")
        last_date_for_reg = df[df[regressor_cols].notnull().any(axis=1)][TIME_COL].max()
        max_train_end_date = min(last_date_for_val, last_date_for_reg)
    else:
        max_train_end_date = last_date_for_val
    # Chooses appropriate train_end_date
    if train_end_date is None:
        train_end_date = max_train_end_date
        if train_end_date < last_date_available:
            warnings.warn(
                f"{value_col} column of the provided TimeSeries contains "
                f"null values at the end. Setting 'train_end_date' to the last timestamp with a "
                f"non-null value ({train_end_date}).",
                UserWarning)
    elif train_end_date > max_train_end_date:
        warnings.warn(
            f"Input timestamp for the parameter 'train_end_date' "
            f"({train_end_date}) either exceeds the last available timestamp or"
            f"{value_col} column of the provided TimeSeries contains null "
            f"values at the end. Setting 'train_end_date' to the last timestamp with a "
            f"non-null value ({max_train_end_date}).",
            UserWarning)
        train_end_date = max_train_end_date

    if regressor_cols is None:
        regressor_cols = []
    fit_cols = [TIME_COL, VALUE_COL] + regressor_cols
    fit_df = df[df[TIME_COL] <= train_end_date][fit_cols]

    return {
        "df": df,
        "df_before_adjustment": df_before_adjustment,
        "fit_df": fit_df,
        "freq": freq,
        "time_stats": time_stats,
        "regressor_cols": regressor_cols,
        "fit_cols": fit_cols,
        "train_end_date": train_end_date,
        "last_date_for_val": last_date_for_val,
        "last_date_for_reg": last_date_for_reg,
    }
Beispiel #17
0
def make_tz_unaware_df(df: DataFrame) -> DataFrame:
    return df.tz_localize(None)
Beispiel #18
0
    (''.join(month(string.letters[:26])) + str(np.random.choice([14, 15, 16])))
] * len(cols) for x in range(len(source.columns) / len(cols) / 2)])

level_1 = len(source.columns) / len(contracts) * list(contracts)

numsyms = len(source.columns) / (len(set(level_1)) * len(cols))
underlyings = [''.join(sym(string.letters[:26])) for x in range(numsyms)]
level_0 = np.ravel([[sym] * len(set(level_1)) * len(cols)
                    for sym in underlyings])

source.columns = pd.MultiIndex.from_tuples(
    list(zip(level_0, level_1, source.columns)))
source.index = pd.date_range(start=dt.datetime.utcnow() -
                             dt.timedelta(days=len(source.index) - 1),
                             end=dt.datetime.utcnow(),
                             freq='D')

futdata = FuturesDataFrameSource(source.tz_localize('UTC'))


class FrontTrader(TradingAlgorithm):
    @roll(lambda x: x[x['open_interest'] == x['open_interest'].max()])
    def handle_data(self, data):
        for sym in list(data.keys()):
            self.order((sym, data[sym]['contract']), 2)
        return data


bot = FrontTrader()
stats = bot.run(futdata)
 def set_timezone(self, df: pd.DataFrame) -> pd.DataFrame:
     return df.tz_localize(self.data_tz)