Python DataFrame.asof Examples

Programming Language: Python

Namespace/Package Name: pandas

Class/Type: DataFrame

Method/Function: asof

Examples at hotexamples.com: 10

Python DataFrame.asof - 10 examples found. These are the top rated real world Python examples of pandas.DataFrame.asof extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

A(8)

B(7)

C(7)

Age(4)

__eq__(2)

__ge__(2)

__gt__(2)

__le__(2)

__ne__(2)

__lt__(2)

__contains__(2)

__neg__(2)

__nonzero__(2)

__setitem__(2)

__sub__(2)

__truediv__(2)

__div__(2)

__add__(2)

Day(1)

annotate(1)

Accession(1)

ASKHI(1)

ANNUITY_INCOME_RATIO(1)

__union__(1)

_tkind(1)

airline(1)

bath(1)

Accessions(1)

channel(1)

crosstab(1)

ctime(1)

datescheduled(1)

datetime1(1)

datetimestatus(1)

gray(1)

__pow__(1)

Action(1)

DayOfYear(1)

WeekOfYear(1)

Month(1)

Names(1)

Quarter(1)

StandardDate(1)

V(1)

WeekOfMonth(1)

AMT_ANNUITY(1)

Appearances(1)

DateSK(1)

DOWInMonth(1)

Cabin(1)

Example #1

Show file

File: test_asof.py Project: AllenDowney/pandas

    def test_subset(self):
        N = 10
        rng = date_range('1/1/1990', periods=N, freq='53s')
        df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
                       index=rng)
        df.loc[4:8, 'A'] = np.nan
        dates = date_range('1/1/1990', periods=N * 3,
                           freq='25s')

        # with a subset of A should be the same
        result = df.asof(dates, subset='A')
        expected = df.asof(dates)
        tm.assert_frame_equal(result, expected)

        # same with A/B
        result = df.asof(dates, subset=['A', 'B'])
        expected = df.asof(dates)
        tm.assert_frame_equal(result, expected)

        # B gives self.df.asof
        result = df.asof(dates, subset='B')
        expected = df.resample('25s', closed='right').ffill().reindex(dates)
        expected.iloc[20:] = 9

        tm.assert_frame_equal(result, expected)

Example #2

Show file

    def test_subset(self):

        N = 10
        rng = date_range('1/1/1990', periods=N, freq='53s')
        df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
                       index=rng)
        df.loc[4:8, 'A'] = np.nan
        dates = date_range('1/1/1990', periods=N * 3,
                           freq='25s')

        # with a subset of A should be the same
        result = df.asof(dates, subset='A')
        expected = df.asof(dates)
        assert_frame_equal(result, expected)

        # same with A/B
        result = df.asof(dates, subset=['A', 'B'])
        expected = df.asof(dates)
        assert_frame_equal(result, expected)

        # B gives self.df.asof
        result = df.asof(dates, subset='B')
        expected = df.resample('25s', closed='right').ffill().reindex(dates)
        expected.iloc[20:] = 9

        assert_frame_equal(result, expected)

Example #3

Show file

File: test_asof.py Project: AlexeyDzyubaP/LinearReg

    def test_asof_periodindex_mismatched_freq(self):
        N = 50
        rng = period_range("1/1/1990", periods=N, freq="H")
        df = DataFrame(np.random.randn(N), index=rng)

        # Mismatched freq
        msg = "Input has different freq"
        with pytest.raises(IncompatibleFrequency, match=msg):
            df.asof(rng.asfreq("D"))

Example #4

Show file

    def test_missing(self):
        # GH 15118
        # no match found - `where` value before earliest date in index
        N = 10
        rng = date_range('1/1/1990', periods=N, freq='53s')
        df = DataFrame({'A': np.arange(N), 'B': np.arange(N)}, index=rng)
        result = df.asof('1989-12-31')

        expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31'))
        tm.assert_series_equal(result, expected)

        result = df.asof(to_datetime(['1989-12-31']))
        expected = DataFrame(index=to_datetime(['1989-12-31']),
                             columns=['A', 'B'],
                             dtype='float64')
        tm.assert_frame_equal(result, expected)

Example #5

Show file

File: test_asof.py Project: AllenDowney/pandas

    def test_missing(self):
        # GH 15118
        # no match found - `where` value before earliest date in index
        N = 10
        rng = date_range('1/1/1990', periods=N, freq='53s')
        df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
                       index=rng)
        result = df.asof('1989-12-31')

        expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31'))
        tm.assert_series_equal(result, expected)

        result = df.asof(to_datetime(['1989-12-31']))
        expected = DataFrame(index=to_datetime(['1989-12-31']),
                             columns=['A', 'B'], dtype='float64')
        tm.assert_frame_equal(result, expected)

Example #6

Show file

File: test_asof.py Project: bashtage/pandas

 def test_time_zone_aware_index(self, stamp, expected):
     # GH21194
     # Testing awareness of DataFrame index considering different
     # UTC and timezone
     df = DataFrame(data=[1, 2],
                    index=[Timestamp('2018-01-01 21:00:05.001+00:00'),
                           Timestamp('2018-01-01 22:35:10.550+00:00')])
     result = df.asof(stamp)
     tm.assert_series_equal(result, expected)

Example #7

Show file

 def test_time_zone_aware_index(self, stamp, expected):
     # GH21194
     # Testing awareness of DataFrame index considering different
     # UTC and timezone
     df = DataFrame(data=[1, 2],
                    index=[
                        Timestamp('2018-01-01 21:00:05.001+00:00'),
                        Timestamp('2018-01-01 22:35:10.550+00:00')
                    ])
     result = df.asof(stamp)
     tm.assert_series_equal(result, expected)

Example #8

Show file

def _concat_executions(market_data: pd.DataFrame,
                       executions: Union[List, pd.DataFrame]):
    if isinstance(executions, List):
        executions_df = pd.DataFrame(executions).set_index('datetime')
    elif isinstance(executions, pd.DataFrame):
        executions_df = executions.set_index('datetime')
    else:
        raise Exception(
            f'executions只支持格式:{_concat_executions.__annotations__["executions"]}'
        )

    executions_df.index = pd.to_datetime(executions_df.index)
    executions_df = executions_df.sort_index()
    market_data = market_data.sort_index()
    executions_df.index = market_data.asof(executions_df.index)['datetime']
    executions_df_grouped = executions_df.groupby('datetime').apply(
        lambda df: df.to_dict('records'))
    executions_df_grouped.name = 'trades'
    market_data = market_data.merge(executions_df_grouped,
                                    'left',
                                    left_index=True,
                                    right_index=True)
    return market_data

Example #9

Show file

class AsOfDataFrame(object):
    goal_time = 0.2

    def setup(self):
        self.N = 10000
        self.M = 100
        self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s')
        self.dates = date_range(start='1/1/1990',
                                periods=(self.N * 10),
                                freq='5s')
        self.ts = DataFrame(np.random.randn(self.N, self.M), index=self.rng)
        self.ts2 = self.ts.copy()
        self.ts2.iloc[250:5000] = np.nan
        self.ts3 = self.ts.copy()
        self.ts3.iloc[-5000:] = np.nan

    # test speed of pre-computing NAs.
    def time_asof(self):
        self.ts.asof(self.dates)

    # should be roughly the same as above.
    def time_asof_nan(self):
        self.ts2.asof(self.dates)

    # test speed of the code path for a scalar index
    # with pre-computing all NAs.
    def time_asof_single(self):
        self.ts.asof(self.dates[0])

    # should be roughly the same as above.
    def time_asof_nan_single(self):
        self.ts3.asof(self.dates[-1])

    # test speed of the code path for a scalar index
    # before the start. should be without the cost of
    # pre-computing all the NAs.
    def time_asof_single_early(self):
        self.ts.asof(self.dates[0] - dt.timedelta(10))

Example #10

Show file

File: DatasetCreation.py Project: PredM/SiameseNeuralNetwork

def split_into_examples(df: pd.DataFrame, label: str, examples: [np.ndarray],
                        labels_of_examples: [str], time_series_length,
                        interval_in_seconds, config,
                        failure_times_of_examples: [str], failure_time,
                        window_times_of_examples: [str], y, i_dataset):
    thread_list = []

    # sample time_series_length many values form each of the intervals if their length is near the configured value
    if not config.use_over_lapping_windows:

        # split case into single intervals with the configured length
        interval_list = [
            g for c, g in df.groupby(
                pd.Grouper(level='timestamp',
                           freq=str(interval_in_seconds) + 's'))
        ]

        for g in interval_list:
            g_len = (g.index[-1] - g.index[0]).total_seconds()

            # ensure time interval is long enough
            if interval_in_seconds - 0.5 <= g_len <= interval_in_seconds + 0.5:
                t = DFConverter(g, time_series_length, False)
                thread_list.append(t)
    else:
        # print("df.index[0]: ", df.index[0], "df.index[-1]: ", df.index[-1])
        start_time = df.index[0]
        end_time = df.index[-1]
        # slide over data frame and extract windows until the window would exceed the last time step
        while start_time + pd.to_timedelta(
                config.over_lapping_window_interval_in_seconds,
                unit='s') < end_time:
            # generate a list with indexes for window
            index = pd.date_range(start_time,
                                  periods=config.time_series_length,
                                  freq=config.resample_frequency)
            # print("from: ", index[0], "to: ", index[-1])

            # for use_over_lapping_windows doesn't do more than converting the part of the df into a numpy array
            # using the converter thread overhead to be able to so no further different handling is needed
            t = DFConverter(df.asof(index), time_series_length, True)
            thread_list.append(t)

            # update next start time for next window
            start_time = start_time + pd.to_timedelta(
                config.over_lapping_window_interval_in_seconds, unit='s')

    # sampling done multi threaded with the amount of cores configured
    thread_limit = config.max_parallel_cores if len(
        thread_list) > config.max_parallel_cores else len(thread_list)
    threads_finished = 0

    while threads_finished < len(thread_list):
        if threads_finished + thread_limit > len(thread_list):
            thread_limit = len(thread_list) - threads_finished

        r = threads_finished + thread_limit
        for i in range(threads_finished, r):
            thread_list[i].start()

        for i in range(threads_finished, r):
            thread_list[i].join()

        for i in range(threads_finished, r):
            examples.append(thread_list[i].result)
            labels_of_examples.append(label)

            if failure_time == "":
                failure_times_of_examples.append("noFailure-" +
                                                 str(i_dataset) + "-" + str(y))
            else:
                failure_times_of_examples.append(str(failure_time))

            window_times_of_examples.append(thread_list[i].windowTimesAsString)

        threads_finished += thread_limit