コード例 #1
0
def download_owid(run_date: pandas.Timestamp = None) -> pandas.DataFrame:
    """ Downloads the OurWorldInData COVID-19 dataset.

    Parameters
    ----------
    run_date : optional, pandas.Timestamp
        the date for which to download the data
        THIS IS CURRENTLY NOT IMPLEMENTED

    Raises
    ------
    NotImplementedError
        when a run_date earlier than today is passed
    """
    if run_date.date() > datetime.date.today():
        raise ValueError("Run date is in the future. Nice try.")
    if run_date.date() < datetime.date.today():
        # TODO: implement downloading of historic data
        raise NotImplementedError(
            "Downloading with a run_date is not yet supported. "
            f"Today: {datetime.date.today()}, run_date: {run_date}"
        )

    df_raw = pandas.read_csv(
        "https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-data.csv?raw=true",
        parse_dates=["date"],
    ).rename(columns={"iso_code": "iso_alpha3"})
    df_raw["iso_alpha2"] = [
        iso3166.countries.get(alpha3).alpha2 if alpha3 in iso3166.countries else None
        for alpha3 in df_raw.iso_alpha3
    ]
    df_raw["region"] = "all"
    return df_raw.set_index(["iso_alpha2", "region", "date"])
コード例 #2
0
ファイル: data_us.py プロジェクト: rtcovidlive/rtlive-global
def get_raw_covidtracking_data(run_date: pd.Timestamp):
    """ Gets the current daily CSV from COVIDTracking """
    if run_date.date() > datetime.date.today():
        raise ValueError("Run date is in the future. Nice try.")
    if run_date.date() < datetime.date.today():
        # TODO: implement downloading of historic data
        raise NotImplementedError(
            "Downloading with a run_date is not yet supported. "
            f"Today: {datetime.date.today()}, run_date: {run_date}")

    url = "https://covidtracking.com/api/v1/states/daily.csv"
    data = pd.read_csv(url).rename(columns={
        "state": "region",
    })
    data["date"] = pd.to_datetime(data["date"], format="%Y%m%d")
    data = data.set_index(["region", "date"]).sort_index()

    # Too little data or unreliable reporting in the data source.
    df_raw = data.drop(["MP", "GU", "AS", "PR", "VI"])

    # the data in these columns is crap. But it will be corrected by the process_covidtracking_data function
    # here we just add the columns so the original data is kept
    for region in df_raw.reset_index().region.unique():
        df_raw.loc[idx[region, :],
                   "new_cases"] = df_raw.xs(region).positive.diff().values
        df_raw.loc[idx[region, :],
                   "new_tests"] = df_raw.xs(region).total.diff().values

    # calculate the sum over all states
    df_all = df_raw.sum(level='date')
    df_all.insert(0, column='region', value='all')
    df_all = df_all.reset_index().set_index(['region', 'date'])
    df_merged = pd.concat([df_raw, df_all]).sort_index()
    return df_merged
コード例 #3
0
    def load_to_cache(self, start_time: pd.Timestamp, end_time: pd.Timestamp):
        """
        Метод загрузки данных в диапазоне (end_time - start_time) дней
        с сервера на файл формата CSV.
        start_time - Начальное время (тип: pd.Timestamp())
        end_time   - Конечное время (тип: pd.Timestamp())
        """
        # TODO: Переделать надо, так как за последние сутки bitmex вылаживает не полностью
        #assert end_time < (pd.Timestamp.today() - dt.timedelta(days=1))

        assert start_time < end_time

        if start_time.date() == end_time.date():
            if not self.check_cache(start_time):
                self.load_bar_day(start_time).to_csv(
                    path.join(self.path_cash, self.symbol, self.data_frequency,
                              start_time.strftime("%Y-%m-%d") + '.csv'))
        else:
            for day in pd.date_range(start_time,
                                     end_time,
                                     freq='D',
                                     closed='left'):
                if not self.check_cache(day):
                    self.load_bar_day(day).to_csv(
                        path.join(self.path_cash, self.symbol,
                                  self.data_frequency,
                                  day.strftime("%Y-%m-%d") + '.csv'))
コード例 #4
0
ファイル: datatype.py プロジェクト: doncat99/FinanceCenter
    def is_finished_kdata_timestamp(cls, timestamp: pd.Timestamp, level: IntervalLevel):

        timestamp = pd.Timestamp(timestamp)

        for t in cls.get_interval_timestamps(timestamp.date(), timestamp.date(), level=level):
            if is_same_time(t, timestamp):
                return True

        return False
コード例 #5
0
ファイル: time.py プロジェクト: newlyedward/qlib
def cal_sam_minute(x: pd.Timestamp,
                   sam_minutes: int,
                   region: str = REG_CN) -> pd.Timestamp:
    """
    align the minute-level data to a down sampled calendar

    e.g. align 10:38 to 10:35 in 5 minute-level(10:30 in 10 minute-level)

    Parameters
    ----------
    x : pd.Timestamp
        datetime to be aligned
    sam_minutes : int
        align to `sam_minutes` minute-level calendar
    region: str
        Region, for example, "cn", "us"

    Returns
    -------
    pd.Timestamp:
        the datetime after aligned
    """
    cal = get_min_cal(C.min_data_shift, region)[::sam_minutes]
    idx = bisect.bisect_right(cal, x.time()) - 1
    _date, new_time = x.date(), cal[idx]
    return concat_date_time(_date, new_time)
コード例 #6
0
def weather_adjustment(start, end, meter, basis_dates):
    '''
    Provides the gross adjustment factor for weather adjusted baselines
    
    Parameters:
        start (str) : A str coercile to timestamp for the start of the event
        end (str) : A str coercile to timestamp for the end of the event
        meter (dataframe): A dataframe consisting of datetime and load values
        basis_dates (list) : A list of dates 
    
    Return:
        float : A float which gives the weather adjustment factor
        
    '''
    start = Timestamp(start)
    end = Timestamp(end)
    # adjustment hours 4 hour prior to start of event
    adj_hrs = list(range(start.hour -4, start.hour - 2))
    # get adjustment on the day of the event
    adj_usage = meter[(meter.date == start.date()) & (meter.hour.isin(adj_hrs))]
    adj_usage = adj_usage.groupby('hour').mean()
    adj_usage = adj_usage.mean()
    
    adj_basis = meter[(meter.date.isin(basis_dates)) & (meter.hour.isin(adj_hrs))]
    adj_basis = adj_basis.groupby('hour').mean()
    adj_basis = adj_basis.mean()
    
    return adj_usage/adj_basis
コード例 #7
0
ファイル: schema.py プロジェクト: doncat99/FinanceAnalysis
    def is_finished_kdata_timestamp(cls, timestamp: pd.Timestamp, level: IntervalLevel):
        """
        :param timestamp: the timestamp could be recorded in kdata of the level
        :type timestamp: pd.Timestamp
        :param level:
        :type level: zvt.domain.common.IntervalLevel
        :return:
        :rtype: bool
        """
        timestamp = pd.Timestamp(timestamp)

        for t in cls.get_interval_timestamps(timestamp.date(), timestamp.date(), level=level):
            if is_same_time(t, timestamp):
                return True

        return False
コード例 #8
0
ファイル: pdf.py プロジェクト: ildarin22/poptimizer
def make_report_files_path(report_name: str, date: pd.Timestamp):
    """Прокладывает путь и возвращает путь к файлу pdf-отчета и xlsx-отчета."""
    date = date.date()
    file_name = f"{report_name} {date}"
    report_folder = PDF_PATH / f"{file_name}"
    if not report_folder.exists():
        report_folder.mkdir(parents=True)
    return report_folder / f"{date}.pdf", report_folder / f"{date}.xlsx"
コード例 #9
0
ファイル: decision.py プロジェクト: you-n-g/qlib
 def clip_time_range(
         self, start_time: pd.Timestamp,
         end_time: pd.Timestamp) -> Tuple[pd.Timestamp, pd.Timestamp]:
     start_date = start_time.date()
     val_start, val_end = concat_date_time(
         start_date,
         self.start_time), concat_date_time(start_date, self.end_time)
     # NOTE: `end_date` should not be used. Because the `end_date` is for slicing. It may be in the next day
     # Assumption: start_time and end_time is for intraday trading. So it is OK for only using start_date
     return max(val_start, start_time), min(val_end, end_time)
コード例 #10
0
 def _download_many(self, aliases: List[str],
                    reg_date: pd.Timestamp) -> List[Dict[str, Any]]:
     with futures.ThreadPoolExecutor(max_workers=len(aliases)) as executor:
         rez = [
             executor.submit(
                 apimoex.get_market_candles,
                 self._session,
                 ticker,
                 start=reg_date.date(),
                 end=self.LAST_HISTORY_DATE.date(),
             ) for ticker in aliases
         ]
         data = []
         for future in rez:
             data.extend(future.result())
     return self._clean_up(data)
コード例 #11
0
ファイル: data.py プロジェクト: zkwiatkowska/buses-warsaw
def _adjust_date(column: pd.Series, start_from: pd.Timestamp):
    validate_data_is_type(column, pd.Series)
    validate_data_is_time_column(column)

    pre_start = column[column.dt.time < start_from.time()]
    post_start = column[column.dt.time >= start_from.time()]

    post_start = pd.to_datetime(
        post_start.apply(lambda x: f'{start_from.date()} {x.time()}'))
    day_after = start_from.date() + pd.Timedelta(days=1)
    pre_start = pd.to_datetime(
        pre_start.apply(lambda x: f'{day_after} {x.time()}'))

    output = pd.concat((pre_start, post_start))
    assert output.shape == column.shape

    return output
コード例 #12
0
    def isRebalanceTriggered(self, current_date: pd.Timestamp=None,
        log_flag: bool=True) -> bool:
        """Checks if a rebalance is triggered at the current backtesting
        date (indicated by `get_datetime`).
        
        Keyword Arguments:
            current_date {pd.Timestamp} -- Current date to use as an override
                                           (default: {None}).
            log_flag {bool} -- Flag for logging (default: {True}).

        Returns:
            bool -- True if rebalance is triggered, false otherwise.
        """

        # Override current date with sim date if not provided
        if not current_date:
            current_date = get_datetime()

        # Checking if correct week
        if (self.reb_week_start < current_date.day <= self.reb_week_end):
            # Flag if specific day matches
            is_triggered = (current_date.weekday_name ==
                config.rebalance_trigger['day'])

            # Handling wildcard (need to check flag)
            if ((config.rebalance_trigger['day'] == '*') and
                (self.last_month_rebalance != current_date.month)):
                # Set flag to true
                is_triggered = True
                # Update flag
                self.last_month_rebalance = current_date.month

            # Log and return
            if is_triggered and log_flag:
                logging.info('ETF Portfolio rebalance triggered on {0} ({1})'
                    .format(current_date.weekday_name, current_date.date()))
        
            return is_triggered
        # Not in correct week
        return False
コード例 #13
0
ファイル: data_fr.py プロジェクト: davideferre/rtlive-global
def get_data_FR(run_date: pandas.Timestamp) -> pandas.DataFrame:
    """
    Retrieve daily CSV from https://www.data.gouv.fr/fr/datasets/donnees-relatives-aux
    # -resultats-des-tests-virologiques-covid-19 for all French regions.
    Limitations:
    * Data by regions only contain tests for which residence regions of tested
    people could be known. Hence, countrywide data contain more tests than sum of all
    regions.
    * Data transmission can sometimes excess 9 days. Indicators are updated daily on test
    results reception.

    Parameters
    ----------
    run_date : pandas.Timestamp
        use the data as it was released on that day

    Returns
    -------
    result : pandas.DataFrame
        [region, date]-indexed table that has rows for every region & date combination in [
        2020-05-13, run_date - 1].
        Contains columns "new_cases" and "new_tests" that are the number of NEW positives /
        total tests for each (day-region) couple.
        "all" region is the sum over all regions.
    """
    if run_date.date() > datetime.date.today():
        raise ValueError("Run date is in the future. Nice try.")
    if run_date.date() < datetime.date.today():
        # TODO: implement downloading of historic data
        raise NotImplementedError(
            "Downloading with a run_date is not yet supported. "
            f"Today: {datetime.date.today()}, run_date: {run_date}")

    content = requests.get(
        "https://www.data.gouv.fr/fr/datasets/r/001aca18-df6a-45c8-89e6-f82d689e6c01",
        verify=False,
    ).content
    data = pandas.read_csv(
        io.StringIO(content.decode("utf-8")),
        sep=";",
        dtype={
            "reg": str
        },
        parse_dates=["jour"],
        usecols=["reg", "jour", "P", "T", "cl_age90"],
    ).rename(
        columns={
            "reg": "region",
            "jour": "date",
            "cl_age90": "ageclass",
            "P": "new_cases",
            "T": "new_tests",
        })
    # Drop data by age class ('0' age class is the sum of all age classes) and truncate data after
    # run_date
    data = (data[data.ageclass == 0].drop(
        "ageclass", axis=1).set_index("date").sort_index().truncate(
            after=run_date - pandas.DateOffset(1)).reset_index().set_index(
                ["region", "date"]).sort_index())
    # compute and append national data, and restrict to existing regions to get rid of data
    # errors that creep in from the original link
    df_all = data.reset_index(level=1).groupby("date").sum().reset_index()
    df_all["region"] = "all"
    true_region_codes = get_regions_metadata()[0].keys()
    data = (data.append(df_all.set_index(
        ["region", "date"])).loc[true_region_codes].sort_index())

    assert isinstance(data, pandas.DataFrame)
    assert data.index.names == ("region", "date")
    assert "new_cases" in data.columns, f"Columns were: {data.columns}"
    assert "new_tests" in data.columns, f"Columns were: {data.columns}"
    for col in ["new_cases", "new_tests"]:
        if any(data[col] < 0):
            _log.warning(
                f"Column {col} has {sum(data[col] < 0)} negative entries!! Overriding with NaN..."
            )
            data.loc[data[col] < 0, col] = numpy.nan

    return data
コード例 #14
0
    expiry_month = randint(1, 12)
    daysInMonth = Timestamp(1990, expiry_month, 1).daysinmonth
    expiry_day = randint(1, daysInMonth)
    expiry_date_ts = Timestamp(expiry_year, expiry_month, expiry_day)

    if verbosity > 1:
        print('no expiry_date given, generated expiry_date_ts is "{}" '.format(
            expiry_date_ts))

if verbosity > 1:
    print('')

if verbosity > 0:
    print('Authority is {}.'.format(authority))
    print('Number is {}'.format(number))
    print('Date of birth is {}'.format(birth_date_ts.date()))
    print('Date of expiry is {}'.format(expiry_date_ts.date()))
    print('Nationality is {}'.format(nationality))
    print('')

blocks = [
    authority + number,
    birth_date_ts.strftime('%y%m%d'),
    expiry_date_ts.strftime('%y%m%d'), nationality
]

#blocks = ['T220001293', '6408125', '2010315', 'D']

for i, block in enumerate(blocks):
    if len(block) > 3:
        blocks[i] += str(generateChecksum(block))
コード例 #15
0
def ts2str(ts: pd.Timestamp):
    return str(ts.date())
コード例 #16
0
def nyiso_cbl(meter, event_start, event_end, look_back, event_type = 'weekday'):
    '''
    calculates the nysio customer baseline given the input parameters
    
    Parameters:
        meter (dataframe): A dataframe consisting of datetime and load values
        event_start (str) : A str coercile to timestamp for the start of the event
        event_end (str) : A str coercile to timestamp for the end of the event
        look_back (int) : An integer specifying the number of days to look back
        event_type (str) : A string specifying the type of event (weekday, sunday, saturday)
        
    Returns:
        tuple : A tuple of dataframe which give the baselins and the performance for the event hour
    '''
    start = Timestamp(event_start)
    end = Timestamp(event_end)
    event_hours = date_range(start, end, freq = 'H').hour.tolist()
    event_hours = event_hours[:-1] # accounting for hour ending
    # get max lookback days
    window_start = start.date() - Timedelta(look_back, unit = 'days')
    datelist = date_range(window_start, periods = look_back).date.tolist()
    data = meter[meter.date.isin(datelist)]
    #TODO: weekend cbl logic
    if event_type == 'weekday':
        days = list(range(1,6))
    
    if event_type == 'saturday':
        days = [6]
        
    if event_type == 'sunday':
        days = [7]
    
    #get the seed values
    seed_data = data[data.hour.isin(event_hours)]
    seed_data = seed_data[seed_data['date'] != start.date()]
    seed_data = seed_data.groupby(['date','hour']).mean().reset_index()
    seed_value = seed_data['kW'].max()*0.25
    
    # identify the low usage days
    low_usage = seed_data.groupby(['date']).mean()
    low_usage_dates = low_usage[low_usage.kW < seed_value].index.tolist()
    
    rm_day = [d for d in seed_data.date.to_list() if not d.isoweekday() in days]
    rm_day = list(set(rm_day))
    # get dates and holidays to exclude
    exclude = get_holidays(start.year)
    exclude.extend(low_usage_dates)
    exclude.extend([start.date()-Timedelta(1, unit = 'day')])
    exclude.extend(rm_day)
    
    # get cbl basis days 
    max_days = seed_data.date.unique().tolist()
    days_to_keep = [d for d in max_days if d not in exclude]
    days_to_keep.sort(reverse = True)
    
    if len(days_to_keep) > 10:
        cbl_basis = days_to_keep[:10]
    else:
        cbl_basis = days_to_keep
    
    #get averages and rank them, pick the top 5 of the averages
    averages = seed_data.groupby('date').mean()
    averages = averages[averages.index.isin(cbl_basis)]
    averages['rank'] = averages['kW'].rank(ascending = False)
    baseline_dates = averages[averages['rank'] <= 5].index.tolist()
    
    # calculate baseline as average of the hours for the selected days
    baseline = data[data.date.isin(baseline_dates)]
    baseline = baseline.groupby('hour').mean()
    # actual values during event day
    event_day = meter[meter.dttm >= start.floor('24H')]
    event_day = event_day[event_day.dttm < start.ceil('24H')]
    event_day = event_day.groupby(['id','hour']).mean().reset_index()
    event_day['baseline'] = baseline.kW
    
    #get adjustment factor
    gaf = weather_adjustment(start = start, end = end, meter = meter,
                             basis_dates = cbl_basis)
    # get the adjusted baseline
    event_day['adjustment'] = event_day.baseline * gaf.kW
    # calculate the event performance per hour
    perf = perf_calc(event_day, event_hours)
    
    return event_day, perf
コード例 #17
0
ファイル: data_be.py プロジェクト: josebsalazar/RTGLOBAL
def get_data_BE(run_date: pandas.Timestamp) -> pandas.DataFrame:
    """
    Retrieve daily (run_date) regions and append national data (key 'all') to it
    Parameters
    ----------
    run_date : pandas.Timestamp
        date for which the data shall be downloaded
    
    Returns
    -------
    df : pandas.DataFrame
        table with columns as required by rtlive/data.py API
    """
    def redistribute(group: pandas.DataFrame, col: str) -> pandas.Series:
        gdata = group.groupby('REGION')[col].sum()
        gdata.loc['Brussels'] += gdata.loc['Nan'] * (
            gdata.loc['Brussels'] /
            (gdata.loc['Brussels'] + gdata.loc['Flanders'] +
             gdata.loc['Wallonia']))
        gdata.loc['Flanders'] += gdata.loc['Nan'] * (
            gdata.loc['Flanders'] /
            (gdata.loc['Brussels'] + gdata.loc['Flanders'] +
             gdata.loc['Wallonia']))
        gdata.loc['Wallonia'] += gdata.loc['Nan'] * (
            gdata.loc['Wallonia'] /
            (gdata.loc['Brussels'] + gdata.loc['Flanders'] +
             gdata.loc['Wallonia']))
        gdata.drop(index='Nan', inplace=True)
        gdata = gdata.fillna(0).round(0).astype(int)
        return gdata

    if run_date.date() > datetime.date.today():
        raise ValueError('Run date is in the future. Nice try.')
    if run_date.date() < datetime.date.today():
        # TODO: implement downloading of historic data
        raise NotImplementedError(
            'Downloading with a run_date is not yet supported. '
            f'Today: {datetime.date.today()}, run_date: {run_date}')

    # Download data from Sciensano
    content = requests.get(
        'https://epistat.sciensano.be/Data/COVID19BE_tests.csv',
        verify=False,
    ).content
    df_tests = pandas.read_csv(
        io.StringIO(content.decode('utf-8')),
        sep=',',
        parse_dates=['DATE'],
        usecols=['DATE', 'REGION', 'PROVINCE', 'TESTS_ALL_POS',
                 'TESTS_ALL']).rename(columns={'DATE': 'date'})
    # Reformat data into Rtlive.de format at country level all
    df_tests_per_all_day = (df_tests.assign(region='all').groupby(
        'date', as_index=True).agg(new_cases=('TESTS_ALL_POS', 'sum'),
                                   new_tests=('TESTS_ALL', 'sum'),
                                   region=('region', 'first')))
    df_tests_per_all_day = (df_tests_per_all_day.reset_index().set_index(
        ['region', "date"]).sort_index())
    # Redistribute the nan for the column TESTS_ALL_POS for regions Flanders, Wallonia and Brussels
    df_tests_positive = (df_tests.fillna('Nan').groupby(['date']).apply(
        redistribute,
        'TESTS_ALL_POS').stack().reset_index().rename(columns={
            'REGION': 'region',
            0: 'new_cases'
        }))
    # Redistribute the nan for the column TESTS_ALL for regions Flanders, Wallonia and Brussels
    df_tests_all = (df_tests.fillna('Nan').groupby(['date']).apply(
        redistribute, 'TESTS_ALL').stack().reset_index().rename(columns={
            'REGION': 'region',
            0: 'new_tests'
        }))

    # Combine the total number of tests and the number of positive tests into a basetable
    df_tests_per_region_day = pandas.concat(
        [df_tests_all, df_tests_positive['new_cases']],
        axis=1).set_index(['region', 'date'])

    # Test per province (Ignore the nan's for the moment)
    df_tests_per_province_day = (
        df_tests[df_tests['REGION'] != 'Brussels'].groupby(
            ['PROVINCE', 'date'],
            as_index=False).agg(new_cases=('TESTS_ALL_POS', 'sum'),
                                new_tests=('TESTS_ALL',
                                           'sum')).rename(columns={
                                               'PROVINCE': 'region'
                                           }).set_index(['region', 'date']))
    df_tests_per_province_day.index.name = ('region', 'date')

    # Combine the results at country level with region level
    data = pandas.concat([
        df_tests_per_all_day, df_tests_per_region_day,
        df_tests_per_province_day
    ],
                         axis=0).sort_index()

    data.index = data.index.set_levels(
        data.index.levels[0].map(BE_REGION_INPUT_ABBR.get), 'region')

    assert isinstance(data, pandas.DataFrame)
    assert data.index.names == ('region', 'date')
    assert 'new_cases' in data.columns, f'Columns were: {data.columns}'
    assert 'new_tests' in data.columns, f'Columns were: {data.columns}'
    for col in ['new_cases', 'new_tests']:
        if any(data[col] < 0):
            _log.warning(
                f'Column {col} has {sum(data[col] < 0)} negative entries!! Overriding with NaN...'
            )
            data.loc[data[col] < 0, col] = numpy.nan

    return data
コード例 #18
0
def print_header_terminal(date: pd.Timestamp):
    text = "Calculando dia {}".format(date.date())
    print("="*len(text))
    print(text)
コード例 #19
0
    def download(self,
                 start_date: pd.Timestamp = None,
                 end_date: pd.Timestamp = None) -> int:
        # refresh cache
        if start_date is not None:
            start_date = pd.Timestamp(start_date)
        start_date = start_date or self.min_date()
        cache = dict()
        #for symbol, config in self.config.items():
        for cfg in self.config:
            symbol = cfg.download_cfg.symbol
            params = dict(
                symbol=symbol,
                data="daily",
                maxrecords=np.busday_count(start_date.date(),
                                           pd.Timestamp.today().date()) + 1,
                volume="contract",
                order="asc",
                dividends="false",
                backadjust="false",
                daystoexpiration=1,
                contractroll="expiration")
            self.logger.info(
                f"Downloading {symbol} from {self.__class__.__name__} {self.name()}"
            )
            resp = self.http_get(
                "https://www.barchart.com/proxies/timeseries/queryeod.ashx",
                params=params)
            df_barchart = pd.read_csv(io.StringIO(resp.data.decode('utf-8')),
                                      header=None)
            df_barchart.columns = [
                "symbol", "as_of", "open", "high", "low",
                TypeColumn.close.value, "volume", "oi"
            ][:len(df_barchart.columns)]
            df_barchart.as_of = pd.to_datetime(df_barchart.as_of)
            df_barchart = df_barchart[df_barchart.as_of >= start_date]
            df = df_barchart.loc[:, ("open", "high", "low",
                                     TypeColumn.close.value,
                                     "as_of")]  # Store OHLC
            # pivot df so OHLC are split by row
            df_melt = df.melt(id_vars="as_of",
                              var_name="type",
                              value_name="price")
            df_melt['market'] = self.name()
            for column_name, column_value in cfg.commodity_cfg.__dict__.items(
            ):
                df_melt[column_name] = column_value
            product = cfg.download_cfg.product
            df_melt['product'] = product
            # df['type'] = TypeColumn.close.value
            expiry = cfg.download_cfg.expiry
            if expiry is not None:
                maturity = pd.to_datetime(expiry)
                df_melt['offset'] = product_to_date(
                    maturity, product) - product_to_date(
                        df_melt.as_of.dt, product)
            else:
                df_melt[
                    'offset'] = 0  # If no maturity, then it is supposed to be a stock or a spot value
            cache[symbol] = df_melt

        concat_df = pd.concat(cache.values(), axis=0)
        cache_df = pd.pivot_table(concat_df,
                                  values="price",
                                  index="as_of",
                                  columns=df_index_columns)
        self.cache = cache_df

        return super().download(start_date, end_date)
コード例 #20
0
ファイル: iex_price_manager.py プロジェクト: quant-ops/azul
    def _minute_dataframe_for_date(
            self, ticker: str, start_timestamp: pd.Timestamp) -> pd.DataFrame:
        ret_df = pd.DataFrame()

        df = pyEX.chartDF(ticker, timeframe='1d', date=start_timestamp)

        if df.empty:
            return ret_df

        df = df.reset_index()
        df['volume'] = df['volume'].astype('int')
        df['date'] = df['date'].astype('str')
        df['minute'] = df['minute'].astype('str')
        df['datet'] = df['date'] + ' ' + df['minute']
        df['dividend'] = 0.0
        df['split'] = 1.0
        df.drop([
            'date', 'minute', 'average', 'changeOverTime', 'close', 'high',
            'label', 'low', 'marketAverage', 'marketChangeOverTime',
            'marketNotional', 'marketNumberOfTrades', 'notional',
            'numberOfTrades', 'open', 'volume'
        ],
                axis=1,
                level=None,
                inplace=True,
                errors='ignore')
        df.rename(columns={
            'datet': 'date',
            'marketClose': 'close',
            'marketHigh': 'high',
            'marketLow': 'low',
            'marketOpen': 'open',
            'marketVolume': 'volume'
        },
                  inplace=True)
        df.date = pd.to_datetime(df.date,
                                 errors='coerce',
                                 utc=False,
                                 infer_datetime_format=True)
        df = df[~df.date.isnull()]
        df.set_index('date',
                     drop=True,
                     append=False,
                     inplace=True,
                     verify_integrity=True)

        utc = pytz.utc
        nytz = pytz.timezone('US/Eastern')
        df = df.tz_localize(nytz,
                            axis=0,
                            level=None,
                            copy=False,
                            ambiguous='raise')
        df.index = df.index.tz_convert(utc)
        if not (pd.Series(['close', 'high', 'low', 'open']).isin(
                df.columns).all()):
            log.info(
                "Skipping {0} for {1}, not all columns ({2}) received".format(
                    ticker, start_timestamp.date(), str(df.columns)))
            return ret_df

        df = self._fixna(df, ticker)
        df.index = df.index.tz_convert(None)

        # Re-arrange them
        ret_df = df[self._cols]
        return ret_df