コード例 #1
0
def compress_netcfd(folder_path, start_date, out_folder, file_name,
                    num_of_rivids):
    """
    Takes the 52 individual ensembles and combines them into one compact NetCDF file, saving disk space in the process.

    Parameters
    ----------

    folder_path: str
        The path to the folder containing the 52 ensemble forecast files in NetCDF format

    start_date: str
        The start date in YYYYMMDD format.

    out_folder: str
        The path to the folder that you want the more compact NetCDF file in.

    file_name: str
        The name of the region. For example, if the files followed the pattern of "Qout_africa_continental_1.nc,
        this argument would be "Qout_africa_continental"

    num_of_rivids: int
        The number of streams that are contained in the region.
    """

    # Based on 15 day forecast
    forecast_day_indices = np.array(
        [0, 8, 16, 24, 32, 40, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84],
        dtype=np.int8)

    # Based on 10 day forecast
    # Excluding the first day because we already have initialization from the normal forecasts
    high_res_forecast_day_indices = np.array(
        [24, 48, 72, 92, 100, 108, 112, 116, 120, 124])

    start_datetime = to_datetime(start_date, infer_datetime_format=True)
    dates = date_range(start_datetime + DateOffset(1), periods=15)
    high_res_dates = date_range(start_datetime + DateOffset(1), periods=10)

    # Ensemble Dimensions
    #  1) Rivid
    #  2) Number of forecast days (i.e. 15 in a 15 day forecast)
    #  3) Number of ensembles

    ensembles = np.zeros((num_of_rivids, 15, 51), dtype=np.float32)
    initialization = np.zeros((num_of_rivids, ), dtype=np.float32)

    for forecast_number in range(1, 52):
        file = os.path.join(folder_path,
                            "{}_{}.nc".format(file_name, forecast_number))

        tmp_dataset = xr.open_dataset(file)
        streamflow = tmp_dataset['Qout'].data
        streamflow = streamflow[:, forecast_day_indices]

        if forecast_number == 1:
            initialization[:] = streamflow[:, 0]
            rivids = tmp_dataset['rivid'].data
            lat = tmp_dataset['lat'].data
            lon = tmp_dataset['lon'].data
            z = tmp_dataset['z'].data

        ensembles[:, :, forecast_number - 1] = streamflow[:, 1:]

        tmp_dataset.close()

    # High Res Forecast
    file = os.path.join(folder_path, "{}_52.nc".format(file_name))

    tmp_dataset = xr.open_dataset(file)

    high_res_forecast_data = tmp_dataset["Qout"].data
    high_res_forecast_data = high_res_forecast_data[:,
                                                    high_res_forecast_day_indices]

    tmp_dataset.close()

    data_variables = {
        "Qout": (['rivid', 'date', 'ensemble_number'], ensembles),
        "Qout_high_res": (['rivid', 'date_high_res'], high_res_forecast_data)
    }

    coords = {
        'rivid': rivids,
        'date': dates,
        'date_high_res': high_res_dates,
        'ensemble_number': np.arange(1, 52, dtype=np.uint8),
        'initialization_values': ('rivid', initialization),
        'lat': ('rivid', lat),
        'lon': ('rivid', lon),
        'z': ('rivid', z),
        'start_date': start_datetime
    }

    xarray_dataset = xr.Dataset(data_variables, coords)
    xarray_dataset.to_netcdf(path=os.path.join(out_folder,
                                               '{}.nc'.format(start_date)),
                             format='NETCDF4')
コード例 #2
0
ファイル: holiday.py プロジェクト: abhishek18620/yify_scraper
        ----------
        other : holiday calendar
        inplace : bool (default=False)
            If True set rule_table to holidays, else return array of Holidays
        """
        holidays = self.merge_class(self, other)
        if inplace:
            self.rules = holidays
        else:
            return holidays


USMemorialDay = Holiday('MemorialDay',
                        month=5,
                        day=31,
                        offset=DateOffset(weekday=MO(-1)))
USLaborDay = Holiday('Labor Day',
                     month=9,
                     day=1,
                     offset=DateOffset(weekday=MO(1)))
USColumbusDay = Holiday('Columbus Day',
                        month=10,
                        day=1,
                        offset=DateOffset(weekday=MO(2)))
USThanksgivingDay = Holiday('Thanksgiving',
                            month=11,
                            day=1,
                            offset=DateOffset(weekday=TH(4)))
USMartinLutherKingJr = Holiday('Dr. Martin Luther King Jr.',
                               start_date=datetime(1986, 1, 1),
                               month=1,
コード例 #3
0
    def processing_time_offsets(  # pylint: disable=too-many-locals
        self,
        df: pd.DataFrame,
        query_object: QueryObject,
    ) -> CachedTimeOffset:
        query_context = self._query_context
        # ensure query_object is immutable
        query_object_clone = copy.copy(query_object)
        queries: List[str] = []
        cache_keys: List[Optional[str]] = []
        rv_dfs: List[pd.DataFrame] = [df]

        time_offsets = query_object.time_offsets
        outer_from_dttm = query_object.from_dttm
        outer_to_dttm = query_object.to_dttm
        for offset in time_offsets:
            try:
                query_object_clone.from_dttm = get_past_or_future(
                    offset,
                    outer_from_dttm,
                )
                query_object_clone.to_dttm = get_past_or_future(
                    offset, outer_to_dttm)
            except ValueError as ex:
                raise QueryObjectValidationError(str(ex)) from ex
            # make sure subquery use main query where clause
            query_object_clone.inner_from_dttm = outer_from_dttm
            query_object_clone.inner_to_dttm = outer_to_dttm
            query_object_clone.time_offsets = []
            query_object_clone.post_processing = []

            if not query_object.from_dttm or not query_object.to_dttm:
                raise QueryObjectValidationError(
                    _("An enclosed time range (both start and end) must be specified "
                      "when using a Time Comparison."))
            # `offset` is added to the hash function
            cache_key = self.query_cache_key(query_object_clone,
                                             time_offset=offset)
            cache = QueryCacheManager.get(cache_key, CacheRegion.DATA,
                                          query_context.force)
            # whether hit on the cache
            if cache.is_loaded:
                rv_dfs.append(cache.df)
                queries.append(cache.query)
                cache_keys.append(cache_key)
                continue

            query_object_clone_dct = query_object_clone.to_dict()
            # rename metrics: SUM(value) => SUM(value) 1 year ago
            metrics_mapping = {
                metric: TIME_COMPARISON.join([metric, offset])
                for metric in get_metric_names(
                    query_object_clone_dct.get("metrics", []))
            }
            join_keys = [
                col for col in df.columns if col not in metrics_mapping.keys()
            ]

            result = self._qc_datasource.query(query_object_clone_dct)
            queries.append(result.query)
            cache_keys.append(None)

            offset_metrics_df = result.df
            if offset_metrics_df.empty:
                offset_metrics_df = pd.DataFrame({
                    col: [np.NaN]
                    for col in join_keys + list(metrics_mapping.values())
                })
            else:
                # 1. normalize df, set dttm column
                offset_metrics_df = self.normalize_df(offset_metrics_df,
                                                      query_object_clone)

                # 2. rename extra query columns
                offset_metrics_df = offset_metrics_df.rename(
                    columns=metrics_mapping)

                # 3. set time offset for index
                # TODO: add x-axis to QueryObject, potentially as an array for
                #  multi-dimensional charts
                granularity = query_object.granularity
                index = granularity if granularity in df.columns else DTTM_ALIAS
                offset_metrics_df[index] = offset_metrics_df[
                    index] - DateOffset(**normalize_time_delta(offset))

            # df left join `offset_metrics_df`
            offset_df = df_utils.left_join_df(
                left_df=df,
                right_df=offset_metrics_df,
                join_keys=join_keys,
            )
            offset_slice = offset_df[metrics_mapping.values()]

            # set offset_slice to cache and stack.
            value = {
                "df": offset_slice,
                "query": result.query,
            }
            cache.set(
                key=cache_key,
                value=value,
                timeout=self.get_cache_timeout(),
                datasource_uid=query_context.datasource.uid,
                region=CacheRegion.DATA,
            )
            rv_dfs.append(offset_slice)

        rv_df = pd.concat(rv_dfs, axis=1, copy=False) if time_offsets else df
        return CachedTimeOffset(df=rv_df,
                                queries=queries,
                                cache_keys=cache_keys)
コード例 #4
0
def following_tuesday_every_four_years_observance(dt):
    return dt + DateOffset(years=(4 - (dt.year % 4)) % 4, weekday=TU(1))
コード例 #5
0
# 2010 but that does not appear to have been the case previously.
# We'll assume that this will be the behavior from now on.
AnzacDayNonMondayized = anzac_day(end_date='2010')
AnzacDay = anzac_day(observance=sunday_to_monday, start_date='2010')

# When Easter Monday and Anzac Day coincided in 2011, Easter Tuesday was
# also observed as a public holiday. Note that this isn't defined as a
# rule, because it will happen next in 2095 (and then in  2163), and
# there isn't a great way to tell how this will be handled at that point.
EasterTuesday2011AdHoc = Timestamp('2011-04-26', tz='UTC')

QueensBirthday = Holiday(
    "Queen's Birthday",
    month=6,
    day=1,
    offset=[DateOffset(weekday=MO(2))],
)

LastTradingDayBeforeChristmas = Holiday(
    'Last Trading Day Before Christmas',
    month=12,
    day=24,
    start_date='2010',
    observance=previous_friday,
)
Christmas = christmas()
WeekendChristmas = weekend_christmas()
BoxingDay = boxing_day()
WeekendBoxingDay = weekend_boxing_day()

LastTradingDayOfCalendarYear = Holiday(
コード例 #6
0
ファイル: __init__.py プロジェクト: rubensmg/totvs_rm
def process_conciliacao_emprestimo(pfunc: DataFrame, ppessoa: DataFrame,
                                   psecao: DataFrame, pffinanc: DataFrame,
                                   pparam: DataFrame,
                                   pparamadicionais: DataFrame,
                                   emprestimo: DataFrame,
                                   conciliacao_emprestimo: DataFrame):
    """
        TODO: doc string
    """

    _pffinanc_valor_averbado = (pffinanc.merge(
        pparamadicionais,
        left_on=['codcoligada', 'anocomp', 'mescomp'],
        right_on=[
            'codcoligada', 'anocompcarolpffinanc', 'mescompcarolpffinanc'
        ],
        how='inner')[[
            'chapa', 'codcoligada', 'anocomp', 'mescomp', 'valor'
        ]].assign(valor=lambda df: df['valor'].astype(float)).groupby(
            by=['codcoligada', 'chapa', 'anocomp', 'mescomp'
                ])['valor'].sum().reset_index().rename(
                    {'valor': 'pffinac_valoraverbado'}, axis=1))

    _emprestimo_periodo = (emprestimo.assign(
        _vencimento_parcela=lambda df: to_datetime(
            df['vencimento_parcela'], format='%Y-%m-%dT%H:%M:%S.%f')).assign(
                anocomp=lambda df: df['_vencimento_parcela'].dt.year).assign(
                    mescomp=lambda df: df['_vencimento_parcela'].dt.month))

    _pparam_last_comp = (pparam.assign(_datelastcomp=lambda df: to_datetime(
        df['mescomp'].astype(str) + '-' + df['anocomp'].astype(str),
        format='%m-%Y') - DateOffset(months=1)).assign(
            anocomp=lambda df: df['_datelastcomp'].dt.year).assign(
                mescomp=lambda df: df['_datelastcomp'].dt.month))

    df = (pfunc.merge(
        ppessoa, left_on=['codpessoa'], right_on=['codigo'],
        how='inner').merge(
            psecao,
            left_on=['codcoligada', 'codsecao'],
            right_on=['codcoligada', 'codigo'],
            how='inner').merge(
                _pparam_last_comp,
                left_on=['codcoligada'],
                right_on=['codcoligada'],
                how='inner').merge(
                    _pffinanc_valor_averbado,
                    left_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'],
                    right_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'],
                    how='left').
          assign(cnpj=lambda df: df['cgc'].str.replace(r'\.|\/|\-', '')).merge(
              _emprestimo_periodo,
              left_on=['cpf', 'cnpj', 'anocomp', 'mescomp'],
              right_on=['cpf', 'cnpj', 'anocomp', 'mescomp'],
              how='inner').assign(
                  periodo=lambda df: df['anocomp'].astype(str) + df['mescomp'].
                  astype(str).str.pad(2, side='left', fillchar='0')).merge(
                      conciliacao_emprestimo,
                      left_on=[
                          'cpf', 'cnpj', 'codigo_emprestimo',
                          'numero_da_parcela', 'periodo'
                      ],
                      right_on=[
                          'cpffuncionario', 'cnpj', 'codigo_emprestimo',
                          'numero_da_parcela', 'periodo'
                      ],
                      how='inner').query('status_parcela == "Aberta"'))

    if len(df) > 0:
        df.loc[df['pffinac_valoraverbado'].notnull(),
               'valor_averbado'] = df['pffinac_valoraverbado']
        df.loc[df['valor_averbado'] > 0, 'status_parcela'] = 'Paga'
        df = df.assign(valor_nao_averbado=lambda df: df['valor_averbado'] - df[
            'valor_parcela'])

    return df[[
        'cnpj', 'cpffuncionario', 'periodo', 'valor_averbado',
        'valor_nao_averbado', 'motivo', 'status_parcela', 'numero_da_parcela',
        'codigo_emprestimo'
    ]]
コード例 #7
0
 def regular_holidays(self):
     return AbstractHolidayCalendar(rules=[
         USNewYearsDay,
         Holiday(
             name="New Year's Day",
             month=1,
             day=2,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="New Year's Day",
             month=1,
             day=3,
             observance=sunday_to_monday,
         ),
         Holiday(  # second monday of january
             name="Coming of Age Day",
             month=1,
             day=1,
             offset=DateOffset(weekday=MO(2)),
         ),
         Holiday(
             name="National foundation day",
             month=2,
             day=11,
             observance=sunday_to_monday,
         ),
         Holiday(name="Vernal Equinox",
                 month=3,
                 day=20,
                 observance=vernal_equinox),
         Holiday(
             name="Showa day",
             month=4,
             day=29,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Constitution memorial day",
             month=5,
             day=3,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Greenery day",
             month=5,
             day=4,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Children's day",
             month=5,
             day=5,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Marine day",
             month=7,
             day=1,
             offset=DateOffset(weekday=MO(3)),
         ),
         Holiday(
             name="Mountain day",
             month=8,
             day=11,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Respect for the aged day",
             month=9,
             day=1,
             offset=DateOffset(weekday=MO(3)),
         ),
         Holiday(
             name="Autumnal equinox",
             month=9,
             day=22,
             observance=autumnal_equinox,
         ),
         Holiday(
             name="Health and sports day",
             month=10,
             day=1,
             offset=DateOffset(weekday=MO(2)),
         ),
         Holiday(
             name="Culture day",
             month=11,
             day=3,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Labor Thanksgiving Day",
             month=11,
             day=23,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Emperor's Birthday",
             month=12,
             day=23,
             observance=sunday_to_monday,
         ),
         Holiday(
             name="Before New Year's Day",
             month=12,
             day=31,
             observance=sunday_to_monday,
         ),
     ])
コード例 #8
0
    def handle(self, *args, **options):
        try:
            os.rename('analysis_output.csv', 'check_file_access.csv')
            os.rename('check_file_access.csv', 'analysis_output.csv')
        except OSError:
            raise Exception(
                'Destination file is still open. Please close before running!')

        all_stays = []

        for city in self.cities:
            for check_in in self.check_in_range:
                for duration in self.stay_durations:
                    check_out = check_in + DateOffset(days=duration)
                    check_in_range = date_range(check_in,
                                                check_out - DateOffset(days=1))
                    data = self.base_data.copy()
                    data.update({
                        'checkIn': check_in,
                        'checkOut': check_out,
                        'check_in_range': check_in_range,
                        'country': city['country'],
                        'state': city['state'],
                        'city': city['city'],
                    })

                    stays = tasks.execute_search(data, '', None)
                    result_count = len(stays)
                    if result_count == 0:
                        continue

                    stays.query('hotel_2_id != -1', inplace=True)
                    grouping_columns = [
                        'primary_star_rating', 'min_review_tier'
                    ]

                    stays.sort_values('stay_cost', inplace=True)
                    unrestricted_low_cost_stays = stays.groupby(
                        grouping_columns).nth(0)
                    unrestricted_low_cost_stays['restricted'] = False

                    stays.sort_values('cost_per_quality_unit', inplace=True)
                    unrestricted_best_value_stays = stays.groupby(
                        grouping_columns).nth(0)
                    unrestricted_best_value_stays['restricted'] = False

                    switches_with_both_benchmarks = \
                        'entire_stay_cost_1 == entire_stay_cost_1 \
                        and entire_stay_cost_2 == entire_stay_cost_2'

                    stays.query(switches_with_both_benchmarks, inplace=True)

                    stays.sort_values('stay_cost', inplace=True)
                    restricted_low_cost_stays = stays.groupby(
                        grouping_columns).nth(0)
                    restricted_low_cost_stays['restricted'] = True

                    stays.sort_values('cost_per_quality_unit', inplace=True)
                    restricted_best_value_stays = stays.groupby(
                        grouping_columns).nth(0)
                    restricted_best_value_stays['restricted'] = True

                    scenarios = [
                        unrestricted_low_cost_stays,
                        unrestricted_best_value_stays,
                        restricted_low_cost_stays,
                        restricted_best_value_stays,
                    ]

                    stays = concat(scenarios)
                    stays.reset_index(inplace=True)
                    stays.drop_duplicates(inplace=True)

                    stays['city'] = city['city']
                    stays['check_in'] = check_in
                    stays['duration'] = duration
                    stays['result_count'] = result_count

                    all_stays.append(stays)
                    logger.warn('{}, {:%Y-%m-%d}, {}'.format(
                        city['city'], check_in, duration))

        stays = concat(all_stays).to_csv('analysis_output.csv', index=False)
コード例 #9
0
ファイル: bot.py プロジェクト: snapbuy/kiwoom-1
    def history(
            self,
            code,
            period,
            unit=None,
            start=None,
            end=None,
            path=None,
            merge=True,
            warning=True,
            prev_next='0'
    ):
        """
        Download historical market data of given code and save it as csv to given path

        :param code: str
            unique code of stock or sector
        :param period: str
            one of tick, min, day, week, month and year
        :param unit: int
            1, 3, 5, 10, 30 etc.. (cf. 1 bar = unit * period)
        :param start: str
            string of start day in format 'YYYYMMDD'
        :param end: str
            string of end day in format 'YYYYMMDD'. if None, until now by default.
        :param path: str
            path to save downloaded data
        :param merge: bool
            whether to merge data with existing file or to overwrite it
        :param warning: bool
            turn on/off the warning message if any
        :param prev_next: str
            this param is given by the response from the server. default is '0'
        """
        # Wait for default request limit, 3600 ms
        QTest.qWait(history.REQUEST_LIMIT_TIME)

        ctype = history.get_code_type(code)  # ctype = 'stock' | 'sector'
        tr_code = history.get_tr_code(period, ctype)

        """
            Setting args just for once.
        """
        if prev_next == '0':
            # In case path is '' or None
            if not path:
                path = getcwd()

            # To share variables with Slot
            kwargs = effective_args(locals(), remove=['ctype', 'tr_code'])
            self.share.remove_single(name())
            self.share.update_single(name(), 'error', False)
            self.share.update_single(name(), 'restart', False)
            self.share.update_single(name(), 'complete', False)
            self.share.update_single(name(), 'impossible', False)

            # To check format of input dates
            if 'start' in kwargs:
                if not history.is_date(start):
                    raise ValueError(f"Given 'start' {start} is not a valid date.")
            if 'end' in kwargs:
                if not history.is_date(end):
                    raise ValueError(f"Given 'end' {end} is not a valid date.")

            """
                Check 'start' and 'end' points to save downloading time. 
            """
            if merge:
                try:
                    file = join(path, code + '.csv')
                    col = history.get_datetime_column(period)
                    df = read_csv(
                        file,
                        index_col=[col],
                        parse_dates=[col],
                        encoding=config.ENCODING
                    )

                    if period in ['tick', 'min']:
                        # Last tick for stock is 15:30 and for sector is 18:00
                        h, m = (15, 30) if ctype is history.STOCK else (18, 00)  # else for sector
                        last_day = date(df.index[-1])
                        last_tick_of_day = Timestamp(df.index[-1]).replace(hour=h, minute=m)
                        download_completed = last_tick_of_day <= df.index[-1]

                        # To push 'start' date further as much as possible. If None, set newly.
                        if 'start' not in kwargs or date(kwargs['start']) <= last_day:
                            if download_completed:
                                # Start from the day after last day
                                kwargs['start'] = str((last_day + DateOffset(1)).date()).replace('-', '')
                            else:
                                # Start from the last day
                                kwargs['start'] = str(last_day).replace('-', '')

                        # If downloading is not needed, just return
                        if 'end' in kwargs:
                            if download_completed:
                                if date(kwargs['end']) <= last_day:
                                    self.share.update_single(name(), 'complete', True)
                                    return

                    else:  # if period in ['day', 'week', 'year']
                        last_day = date(df.index[-1])
                        # To push 'start' date further as much as possible. If None, set newly.
                        if 'start' not in kwargs or date(kwargs['start']) <= last_day:
                            # Start from the last day
                            kwargs['start'] = str(last_day).replace('-', '')

                        # If downloading is not needed, just return
                        if 'end' in kwargs:
                            if date(kwargs['end']) < last_day:
                                self.share.update_single(name(), 'complete', True)
                                return

                    # Once read, use later in Server.history_to_csv() to increase efficiency
                    self.share.update_single(name(), 'file', df)

                # If any exception, just skip
                except Exception as err:
                    pass

            """
                Update and print arguments. 
            """
            # Done arg setting
            self.share.update_args(name(), kwargs)

            # Print args
            f = lambda key: f"'{kwargs[key]}'" if key in kwargs else None
            print(f"{{code={f('code')}, start={f('start')}, end={f('end')}, period={f('period')}}}")

        """
            Start downloading.
        """
        # Check requesting status
        self.share.single['histories']['nrq'] += 1
        if history.SPEEDING:
            if self.share.get_single('histories', 'nrq') >= history.REQUEST_LIMIT_TRY:
                # Set back to default configuration
                if self.share.get_single('histories', 'cnt') == 0:
                    self.share.update_single(name(), 'impossible', True)
                self.share.update_single(name(), 'restart', True)
                self.api.unloop()
                return

        # Finally request data to server
        for key, val in history.inputs(tr_code, code, unit, end):
            self.api.set_input_value(key, val)
        scr_no = self.scr.alloc(tr_code, code)

        # If comm_rq_data returns non-zero error code, restart downloading
        if self.api.comm_rq_data(name(), tr_code, prev_next, scr_no) != 0:
            self.share.update_single(name(), 'impossible', True)
            self.share.update_single(name(), 'restart', True)
            self.api.unloop()
            return

        # Wait response from the server
        self.api.loop()
コード例 #10
0
ファイル: utils.py プロジェクト: jtallieu/jira-jupyter
def business_hours(start, end):
    """Computes the number of working hours between two dates. (There's gotta be a better way to do this.)"""
    return len(bdate_range(start, end, freq=DateOffset(hours=1)))
コード例 #11
0
class Command(BaseCommand):
    help = "Run multiple searches and log the cheapest and best value switches"
    requires_migrations_checks = True

    base_data = {
        'data_mining': True,
        'source_market': 'UK',
        'place_name': '',
        'latitude': '0',
        'longitude': '0',
        'occupants': '2',
        'currency': 'gbp',
        'county': '',
    }

    base_check_in = datetime.strptime('2017-05-05', '%Y-%m-%d')
    check_in_range = date_range(base_check_in,
                                base_check_in + DateOffset(days=60))

    cities = [
        {
            'city': 'New York',
            'state': 'NY',
            'country': 'US',
        },
        {
            'city': 'Paradise',
            'state': 'NV',
            'country': 'US',
        },
        {
            'city': 'Austin',
            'state': 'TX',
            'country': 'US',
        },
        {
            'city': 'London',
            'state': 'England',
            'country': 'GB',
        },
        {
            'city': 'Barcelona',
            'state': 'CT',
            'country': 'ES',
        },
        {
            'city': 'Milan',
            'state': 'Lombardy',
            'country': 'IT',
        },
        {
            'city': 'Shanghai',
            'state': 'Shanghai',
            'country': 'CN',
        },
        {
            'city': 'Bangkok',
            'state': '',
            'country': 'TH',
        },
        {
            'city': 'Singapore',
            'state': '',
            'country': 'SG',
        },
    ]

    stay_durations = [3, 4, 5, 6]

    def handle(self, *args, **options):
        try:
            os.rename('analysis_output.csv', 'check_file_access.csv')
            os.rename('check_file_access.csv', 'analysis_output.csv')
        except OSError:
            raise Exception(
                'Destination file is still open. Please close before running!')

        all_stays = []

        for city in self.cities:
            for check_in in self.check_in_range:
                for duration in self.stay_durations:
                    check_out = check_in + DateOffset(days=duration)
                    check_in_range = date_range(check_in,
                                                check_out - DateOffset(days=1))
                    data = self.base_data.copy()
                    data.update({
                        'checkIn': check_in,
                        'checkOut': check_out,
                        'check_in_range': check_in_range,
                        'country': city['country'],
                        'state': city['state'],
                        'city': city['city'],
                    })

                    stays = tasks.execute_search(data, '', None)
                    result_count = len(stays)
                    if result_count == 0:
                        continue

                    stays.query('hotel_2_id != -1', inplace=True)
                    grouping_columns = [
                        'primary_star_rating', 'min_review_tier'
                    ]

                    stays.sort_values('stay_cost', inplace=True)
                    unrestricted_low_cost_stays = stays.groupby(
                        grouping_columns).nth(0)
                    unrestricted_low_cost_stays['restricted'] = False

                    stays.sort_values('cost_per_quality_unit', inplace=True)
                    unrestricted_best_value_stays = stays.groupby(
                        grouping_columns).nth(0)
                    unrestricted_best_value_stays['restricted'] = False

                    switches_with_both_benchmarks = \
                        'entire_stay_cost_1 == entire_stay_cost_1 \
                        and entire_stay_cost_2 == entire_stay_cost_2'

                    stays.query(switches_with_both_benchmarks, inplace=True)

                    stays.sort_values('stay_cost', inplace=True)
                    restricted_low_cost_stays = stays.groupby(
                        grouping_columns).nth(0)
                    restricted_low_cost_stays['restricted'] = True

                    stays.sort_values('cost_per_quality_unit', inplace=True)
                    restricted_best_value_stays = stays.groupby(
                        grouping_columns).nth(0)
                    restricted_best_value_stays['restricted'] = True

                    scenarios = [
                        unrestricted_low_cost_stays,
                        unrestricted_best_value_stays,
                        restricted_low_cost_stays,
                        restricted_best_value_stays,
                    ]

                    stays = concat(scenarios)
                    stays.reset_index(inplace=True)
                    stays.drop_duplicates(inplace=True)

                    stays['city'] = city['city']
                    stays['check_in'] = check_in
                    stays['duration'] = duration
                    stays['result_count'] = result_count

                    all_stays.append(stays)
                    logger.warn('{}, {:%Y-%m-%d}, {}'.format(
                        city['city'], check_in, duration))

        stays = concat(all_stays).to_csv('analysis_output.csv', index=False)
コード例 #12
0
 def tf(self, d1, d2):
     """Calculates time fraction (in year fraction) between two dates given
     day count convention"""
     d1 = self.adjust(d1)
     d2 = self.adjust(d2)
     # Save adjustment state and set it to none, so we can safely use the
     # days and dib functions of "date splits" we produce in for some
     # day counts
     state = self.adj
     self.adj = None
     if self.dc == 'ACT/ACT ICMA':
         raise AttributeError('The time fraction function cannot be used '
                              'for the %s convention' % self.dc)
     if not (self.dc == 'ACT/ACT ISDA' or self.dc == 'ACT/ACT AFB' or
             self.dc == '1/1'):
         yf = self.days(d1, d2) / self.dib(d1, d2)
     elif self.dc == 'ACT/ACT ISDA':
         # We could treat everything as an array, we leave the dual
         # implementation because vectorizing is clumsy. So, we just
         # mimic the interface
         if isinstance(d1, Timestamp) and isinstance(d2, Timestamp):
             # We place the assertion here to save some thought in the
             # recursion (we check one by one or delegate)
             assert d1 <= d2, 'First date must be smaller or equal to ' \
                              'second date'
             if d1.year == d2.year:
                 yf = self.days(d1, d2) / self.dib(d1, d2)
             else:
                 ey1 = to_datetime(str(d1.year) + '-12-31')
                 ey2 = to_datetime(str(d2.year - 1) + '-12-31')
                 yf = (d2.year - d1.year - 1) + \
                      (self.days(d1, ey1) / self.dib(d1, d1)) + \
                      (self.days(ey2, d2) / self.dib(d2, d2))
         else:  # This is the dreaded vectorized case that, for now,
             # will be dealt by simulating the interface
             result = list()
             f = result.append
             for t1, t2 in broadcast(d1, d2):
                 f(self.tf(t1, t2))
             yf = asarray(result, dtype='float64')
     elif self.dc == '1/1':
         # See notes in the ACT/ACT sections about vectorization
         if isinstance(d1, Timestamp) and isinstance(d2, Timestamp):
             # We place the assertion here to save some thought in the
             # recursion (we check one by one or delegate)
             assert d1 <= d2, 'First date must be smaller or equal to ' \
                              'second date'
             if (d1.day == d2.day and d1.month == d2.month) \
                     or (d1.month == 2 and d2.month == 2 and
                         d1.day in [28, 29] and d2.day in [28, 29]):
                 yf = int(0.5 + self.days(d1, d2) / self.dib(d1, d2))
             else:
                 # This is the same as ACT/ACT. We tweak the DC and bring
                 # it back. This is computationally costly (as a parsing
                 # of the day count is involved at each step), but safer
                 # from an implementation perspective.
                 self.dc = 'act/act isda'
                 yf = self.tf(d1, d2)
                 self.dc = '1/1'
         else:  # This is the dreaded vectorized case that, for now,
             # will be dealt by simulating the interface
             result = list()
             f = result.append
             for t1, t2 in broadcast(d1, d2):
                 f(self.tf(t1, t2))
             yf = asarray(result, dtype='float64')
     elif self.dc == 'ACT/ACT AFB':
         if isinstance(d1, Timestamp) and isinstance(d2, Timestamp):
             # We place the assertion here to save some thought in the
             # recursion (we check one by one or delegate)
             assert d1 <= d2, 'First date must be smaller or equal to ' \
                              'second date'
             # We need to loop back from d2 counting the number of
             # years we can subtract until we close interval. Note that
             # every time we fall on a Feb 29th, a year offset will land
             # us on Feb 28th. In this cases, we need to add the missing
             # day fraction (1/366). Note that we add it only once,
             # and not the number of leap days in interval divided by
             # 366. Why? While the documents are not super clear about
             # this, it seems reasonable to infer that from the "counting
             # back" rule, where we are always subtracting entire years.
             #
             # 2004-02-28 to 2008-02-27 = 3 + 365/366
             # 2004-02-28 to 2008-02-28 = 4
             # 2004-02-28 to 2008-02-29 = 4 + 1/366
             # 2004-02-28 to 2012-02-28 = 8
             # 2004-02-28 to 2012-02-29 = 8 + 1/366 (and NOT 2/366)
             n = 0
             offset = 0
             while d2 - DateOffset(years=1) >= d1:
                 if d2.day == 29 and d2.month == 2:
                     offset += 1 / 366
                 n += 1
                 d2 = d2 - DateOffset(years=1)
             yf = n + offset + (self.days(d1, d2) / self.dib(d1, d2))
         else:  # This is the dreaded vectorized case that, for now,
             # will be dealt by simulating the interface
             result = list()
             f = result.append
             for t1, t2 in broadcast(d1, d2):
                 f(self.tf(t1, t2))
             yf = asarray(result, dtype='float64')
     else:
         raise NotImplementedError('Day count %s not supported' % self.dc)
     # Return state
     self.adj = state
     return yf
コード例 #13
0
    def get_device_data(self,
                        start_date=None,
                        end_date=None,
                        frequency='1H',
                        clean_na=None):
        '''
        Based on code snippet from Marc Roig:
        # I2CAT RESEARCH CENTER - BARCELONA - MARC ROIG ([email protected])
        '''

        std_out(f'Requesting data from Dades Obertes API')
        std_out(f'Device ID: {self.id}')
        self.get_device_sensors()
        self.get_device_location()

        request = self.API_BASE_URL
        request += f'codi_eoi={self.id}'

        if start_date is not None and end_date is not None:
            request += "&$where=data between " + to_datetime(start_date).strftime("'%Y-%m-%dT%H:%M:%S'") \
                    + " and " + to_datetime(end_date).strftime("'%Y-%m-%dT%H:%M:%S'")
        elif start_date is not None:
            request += "&$where=data >= " + to_datetime(start_date).strftime(
                "'%Y-%m-%dT%H:%M:%S'")
        elif end_date is not None:
            request += "&$where=data < " + to_datetime(end_date).strftime(
                "'%Y-%m-%dT%H:%M:%S'")

        try:
            s = get(request)
        except:
            print_exc()
            std_out('Problem with sensor data from API', 'ERROR')
            pass
            return None

        if s.status_code == 200 or s.status_code == 201:
            df = read_csv(StringIO(s.content.decode('utf-8')))
        else:
            std_out('API reported {}'.format(s.status_code), 'ERROR')
            pass
            return None

        # Filter columns
        measures = ['h0' + str(i) for i in range(1, 10)]
        measures += ['h' + str(i) for i in range(10, 25)]
        # validations = ['v0' + str(i) for i in range(1,10)]
        # validations  += ['v' + str(i) for i in range(10,25)]
        new_measures_names = list(range(1, 25))

        columns = ['contaminant', 'data'] + measures  # + validations
        try:
            df_subset = df[columns]
            df_subset.columns = ['contaminant', 'date'] + new_measures_names
        except:
            print_exc()
            std_out('Problem while filtering columns', 'Error')
            return None
        else:
            std_out('Successful filtering', 'SUCCESS')

        # Pivot
        try:
            df = DataFrame([])
            for contaminant in self.sensors.keys():
                if contaminant not in df_subset['contaminant'].values:
                    std_out(f'{contaminant} not in columns. Skipping',
                            'WARNING')
                    continue
                df_temp = df_subset.loc[
                    df_subset['contaminant'] == contaminant].drop(
                        'contaminant',
                        1).set_index('date').unstack().reset_index()
                df_temp.columns = ['hours', 'date', contaminant]
                df_temp['date'] = to_datetime(df_temp['date'])
                timestamp_lambda = lambda x: x['date'] + DateOffset(hours=int(
                    x['hours']))
                df_temp['date'] = df_temp.apply(timestamp_lambda, axis=1)
                df_temp = df_temp.set_index('date')
                df[contaminant] = df_temp[contaminant]
        except:
            # print_exc()
            std_out('Problem while filtering columns', 'Error')
            pass
            return None
        else:
            std_out('Successful pivoting', 'SUCCESS')

        df.index = to_datetime(df.index).tz_localize('UTC').tz_convert(
            self.location)
        df.sort_index(inplace=True)

        # Rename
        try:
            df.rename(columns=self.sensors, inplace=True)
        except:
            # print_exc()
            std_out('Problem while renaming columns', 'Error')
            pass
            return None
        else:
            std_out('Successful renaming', 'SUCCESS')

        # Clean
        df = df[~df.index.duplicated(keep='first')]
        # Drop unnecessary columns
        df.drop([i for i in df.columns if 'Unnamed' in i],
                axis=1,
                inplace=True)
        # Check for weird things in the data
        df = df.apply(to_numeric, errors='coerce')
        # Resample
        df = df.resample(frequency).mean()

        try:
            df = df.reindex(df.index.rename('Time'))

            df = clean(df, clean_na, how='all')
            # if clean_na is not None:
            #     if clean_na == 'drop':
            #         # std_out('Cleaning na with drop')
            #         df.dropna(axis = 0, how='all', inplace=True)
            #     elif clean_na == 'fill':
            #         df = df.fillna(method='bfill').fillna(method='ffill')
            #         # std_out('Cleaning na with fill')
            self.data = df

        except:
            std_out('Problem closing up the API dataframe', 'ERROR')
            pass
            return None

        std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS')
        return self.data
コード例 #14
0
    def test_both_offset_observance_raises(self):

        with self.assertRaises(NotImplementedError) as cm:
            h = Holiday("Cyber Monday", month=11, day=1,
                        offset=[DateOffset(weekday=SA(4))], observance=next_monday)
コード例 #15
0
def scan_price(gdax,
               product,
               alert_method,
               low_notify=None,
               high_notify=None,
               interval=60,
               end_time=None,
               change_rate=.015,
               alert_addr=None):
    """
    Scans a GdaxProduct's orderbook regularly evaluating
    ask prices and sending notifications when price hits a high or low target.

    This function is designed to run for days, weeks, and months at a time as
    it makes adjustments to target high and low prices as they're reached.

    :param gdax: (gdax.api.Gdax)
        The Gdax API object to be used when calling.

    :param product: (str)
        ETH-USD, LTC-USD, BTC-USD

    :param alert_method: (callable)
        Should be a method that can be called like so:
            alert_method(alert_addr, text)

    :param low_notify: (numeric, default None)
        The lower trigger price to alert on.
        None will default to: price - (price * change_rate)
        This value is decreased by the change rate when reached.
        This value is increased by the change rate when the
        high_notify price is reached.

    :param high_notify: (numeric, default None)
        The upper trigger price to alert on.
        None will default to: price + (price * change_rate)
        This value is increased by the change rate when reached.
        This value is decreased by the change rate when
        the low_notify price is reached.

    :param interval: (int, default 60)
        Number of seconds between order book scans.

    :param end_time: (DateTime, default None)
        The date/time to stop scanning the books
        None will default to 99 weeks from function start time.

    :param change_rate: (float, default 0.015)
        The rate at which the high_notify & low_notify
        target values change as the price moves.

    :return: None
    """
    from time import sleep
    from datetime import datetime
    from pandas import DateOffset
    if alert_addr is None:
        alert_addr = '*****@*****.**'
    both_flags = all((low_notify, high_notify))

    if not both_flags:
        prod = gdax.get_product(product)
        p = prod.price

        if low_notify is None:
            low = p - (p * change_rate)
            low_notify = round(low, 2)

        if high_notify is None:
            high = p + (p * change_rate)
            high_notify = round(high, 2)

    if end_time is None:
        end_time = datetime.now() + DateOffset(weeks=99)

    ask_store, last_ping, i = list(), None, 0
    print("Initializing {} price scan "
          "looking for low {} "
          "and high {}".format(product, low_notify, high_notify))

    # This value is increased by ~30%
    # every time a notification is sent.
    # It represents how many minutes to wait before
    # re-sending the same notification
    wait_time = 5

    while True:
        i += 1
        now = datetime.now()

        try:
            book = gdax.get_book(product, level=1)
        except Exception as e:
            sleep(interval)
            print(e)
            continue

        asks = book['asks']
        lowest_ask = float(asks[0][0])

        diff_high = round(high_notify - lowest_ask, 2)
        diff_low = round(low_notify - lowest_ask, 2)
        pct_away_high = round(100 - ((lowest_ask / high_notify) * 100), 2)
        pct_away_low = round(100 - ((lowest_ask / low_notify) * 100), 2)
        meets_low = (low_notify and lowest_ask <= low_notify)
        meets_high = (high_notify and lowest_ask >= high_notify)
        meets_criteria = meets_low or meets_high
        # This information should probably somehow get stored?
        # NOTE: the program is threaded so it'd need to generate it's own
        # SQL connection ....or open and write to a file every 50 lines or something
        #ask_store.append([now, lowest_ask, diff_high, pct_away_high, diff_low, pct_away_low])

        msg = 'Price: ${} -' \
              'Target: ${} - away: ${}/{}% -' \
              'Stop: ${}- away: ${}/%{}'.format(lowest_ask,
                                                high_notify,
                                                diff_high,
                                                pct_away_high,
                                                low_notify,
                                                diff_low,
                                                pct_away_low)
        msg = '{}: ({}-{})\n{}'.format(str(now)[:19], i, product, msg)
        print(msg)
        print("\n")

        if meets_criteria:
            send_msg = True
            m = int(wait_time)
            min_time = now - DateOffset(minutes=m)
            if last_ping and last_ping > min_time:
                send_msg = False

            if send_msg:
                alert_method(alert_addr, msg)
                last_ping = now

                # Increase/decrease the target price by change_rate
                if meets_low:
                    v = round(low_notify * change_rate, 2)
                    low_notify -= v
                    high_notify -= v

                if meets_high:
                    v = round(high_notify * change_rate, 2)
                    high_notify += v
                    low_notify += v

        if now >= end_time:
            break

        sleep(interval)

    return ask_store
コード例 #16
0
def setup_management_options(folder, crop, config, gridpoint, gridLut,
                             shortcut, rulesList, soil):
    '''
    Setup management rules based on grid cell.

    REMEMBER: order matters!
    '''
    # reset water, nitrogen, and surfaceOM rule
    #manager.reset_on_sowing(folder, crop, soilmodule=soil.get('name'))

    # set dates from config file
    sowStart = config.sowStart
    sowEnd = config.sowEnd

    # if sowStart is 'auto', set by location from provided lookup table
    if sowStart == 'auto':
        sowStart = gridLut['sow_start'][gridpoint]

    # if sowEnd is 'auto', set by location from provided lookup table
    if sowEnd == 'auto':
        sowEnd = gridLut['sow_end'][gridpoint]

    # end crop on fixed date rule
    # removes any crop that may be left in the groud 2 days before sowing
    endDate = datetime.strptime(sowStart, '%d-%b') - DateOffset(2)
    endDate = datetime.strftime(endDate, '%d-%b')
    manager.end_crop_on_fixed_date_rule(folder, crop, endDate)
    #--------------------------------Hamze added this and this adds the biochar module
    manager.BiocharApplication_rule(folder, AppDate=config.BAD, BAR=config.BAR)

    if crop == 'maize':
        # sowing rule
        if sowEnd == '':
            manager.sowOnFixedDate_rule(folder, crop,
                                                date=sowStart,\
                                                density=config.density,\
                                                depth=config.depth,\
                                                cultivar=config.cultivar,\
                                                gclass=config.gclass,\
                                                row_spacing=config.rowSpacing)
        else:
            manager.sowUsingAVariable_rule(folder, crop,
                                                   start_date=sowStart,\
                                                   end_date=sowEnd,\
                                                   density=config.density,\
                                                   depth=config.depth,\
                                                   cultivar=config.cultivar,\
                                                   gclass=config.gclass,\
                                                   row_spacing=config.rowSpacing)
    elif crop == 'cotton':
        # sowing rule
        if sowEnd == '':
            manager.cotton_fixed_date_sowing_rule(folder, crop, date=sowStart)
        else:
            manager.cotton_sowing_rule(folder, crop,
                                               start_date=sowStart,\
                                               end_date=sowEnd)

    # add each shared rule to grid point
    for rulename, ruleType in rulesList:
        apsim.new_management_rule(folder, rulename, ruleType, shortcut)
コード例 #17
0
        self.lines.datetime[0] = d
        self.lines.openinterest[0] = 0.0
        self.lines.open[0] = o
        self.lines.high[0] = h
        self.lines.low[0] = l
        self.lines.close[0] = c
        self.lines.volume[0] = v

        return True


if __name__ == "__main__":
    from stocklook.utils.timetools import today
    from pandas import DateOffset

    start = today() - DateOffset(years=1)
    end = today()
    period = 2 * 60 * 60

    cerebro = bt.Cerebro()
    cerebro.addstrategy(SmaCross)

    data0 = PoloniexDataFeed(dataname='BTC_LTC',
                             fromdate=start,
                             todate=end,
                             period=period)
    cerebro.adddata(data0)

    cerebro.run()
    cerebro.plot()
コード例 #18
0
from pandas import DataFrame, DateOffset

from weaverbird.backends.pandas_executor.types import DomainRetriever, PipelineExecutor
from weaverbird.exceptions import DuplicateError
from weaverbird.pipeline.steps import EvolutionStep

OFFSETS = {
    'vsLastYear': DateOffset(years=1),
    'vsLastMonth': DateOffset(months=1),
    'vsLastWeek': DateOffset(weeks=1),
    'vsLastDay': DateOffset(days=1),
}


def execute_evolution(
    step: EvolutionStep,
    df: DataFrame,
    domain_retriever: DomainRetriever = None,
    execute_pipeline: PipelineExecutor = None,
) -> DataFrame:
    new_column = step.new_column or f'{step.value_col}_EVOL_{step.evolution_format.upper()}'
    df = df.reset_index(drop=True)

    id_cols = [step.date_col] + step.index_columns
    if df.set_index(id_cols).index.duplicated().any():
        raise DuplicateError(
            'Multiple rows for the same date. Did you forget indexColumns?')

    date_col_offseted = df[step.date_col] + OFFSETS[step.evolution_type]
    df_offseted = df.assign(**{step.date_col: date_col_offseted})
    both = df.merge(df_offseted,
コード例 #19
0
ファイル: __init__.py プロジェクト: rubensmg/totvs_rm
def process_funcionario(pfunc: DataFrame, ppessoa: DataFrame,
                        psecao: DataFrame, pfperff: DataFrame,
                        pfemprt: DataFrame, pparam: DataFrame,
                        pffinanc: DataFrame) -> DataFrame:
    """
        TODO: doc string
    """

    _pfperff_group_by_codcoligada_chapa_ano_mes_comp = (pfperff.assign(
        liquido=lambda df: df['liquido'].astype(float)).groupby(
            by=['codcoligada', 'chapa', 'anocomp', 'mescomp'
                ])['liquido'].sum().reset_index())

    _pfemprt_group_by_codcoligada_chapa = (pfemprt.assign(
        saldodevedor=lambda df: df['saldodevedor'].astype(float)).groupby(
            by=['codcoligada', 'chapa'])['saldodevedor'].sum().reset_index())

    _pffinanc_group_by_ano_mes_comp = pffinanc.groupby(
        by=['anocomp', 'mescomp', 'chapa', 'codcoligada'
            ])['valor'].count().reset_index()

    _pparam_anterior = (pparam.assign(_datacompanterior=lambda df: to_datetime(
        df['mescomp'].astype(str) + '-' + df['anocomp'].astype(str),
        format='%m-%Y') - DateOffset(months=1)).assign(
            anocomp=lambda df: df['_datacompanterior'].dt.year).assign(
                mescomp=lambda df: df['_datacompanterior'].dt.month)[[
                    'anocomp', 'mescomp', 'codcoligada'
                ]])

    # Funcionários ativos devem utilizar a competência anterior
    _func_ativo = (pfunc.merge(
        ppessoa, left_on=['codpessoa'], right_on=['codigo'],
        how='inner').merge(psecao,
                           left_on=['codcoligada', 'codsecao'],
                           right_on=['codcoligada', 'codigo'],
                           how='inner').merge(
                               _pparam_anterior,
                               left_on=['codcoligada'],
                               right_on=['codcoligada'],
                               how='inner').query('codsituacao != "D"')[[
                                   'cgc', 'chapa', 'dataadmissao', 'cpf',
                                   'datademissao', 'nome', 'codsituacao',
                                   'telefone1', 'codcoligada', 'salario',
                                   'anocomp', 'mescomp'
                               ]])

    # Funcionários demitidos na competência atual devem utilizar a competência anterior
    _func_demitido_atual = (
        pfunc.merge(
            ppessoa, left_on=['codpessoa'], right_on=['codigo'],
            how='inner').merge(
                psecao,
                left_on=['codcoligada', 'codsecao'],
                right_on=['codcoligada', 'codigo'],
                how='inner').merge(
                    pparam,
                    left_on=['codcoligada'],
                    right_on=['codcoligada'],
                    how='inner').rename(
                        {
                            'anocomp': 'demissao_anocomp',
                            'mescomp': 'demissao_mescomp'
                        },
                        axis=1).reset_index().query('codsituacao == "D"').
        assign(_datademissao=lambda df: to_datetime(
            df['datademissao'], format='%Y-%m-%dT%H:%M:%S.%f'
        )).query(
            '_datademissao.dt.month == demissao_mescomp & _datademissao.dt.year == demissao_anocomp'
        ).merge(_pparam_anterior,
                left_on=['codcoligada'],
                right_on=['codcoligada'],
                how='inner')[[
                    'cgc', 'chapa', 'dataadmissao', 'cpf', 'datademissao',
                    'nome', 'codsituacao', 'telefone1', 'codcoligada',
                    'salario', 'anocomp', 'mescomp'
                ]])

    # Funcionários demitidos na competência anterior devem utilizar a mesma
    _func_demitido = (pfunc.merge(
        ppessoa, left_on=['codpessoa'], right_on=['codigo'], how='inner'
    ).merge(
        psecao,
        left_on=['codcoligada', 'codsecao'],
        right_on=['codcoligada', 'codigo'],
        how='inner'
    ).query(
        'codsituacao == "D"'
    ).merge(
        _pparam_anterior,
        left_on=['codcoligada'
                 ],
        right_on=[
            'codcoligada'
        ],
        how='inner'
    ).assign(
        _datademissao=lambda
        df:
        to_datetime(df['datademissao'], format='%Y-%m-%dT%H:%M:%S.%f')
    ).query(
        '_datademissao.dt.month == mescomp & _datademissao.dt.year == anocomp')
                      [[
                          'cgc', 'chapa', 'dataadmissao', 'cpf',
                          'datademissao', 'nome', 'codsituacao', 'telefone1',
                          'codcoligada', 'salario', 'anocomp', 'mescomp'
                      ]])

    return (concat(
        [_func_ativo, _func_demitido, _func_demitido_atual],
        ignore_index=True).drop_duplicates().reset_index(drop=True).merge(
            _pffinanc_group_by_ano_mes_comp,
            left_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'],
            right_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'],
            how='inner').merge(
                _pfperff_group_by_codcoligada_chapa_ano_mes_comp,
                left_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'],
                right_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'],
                how='left').merge(
                    _pfemprt_group_by_codcoligada_chapa,
                    left_on=['codcoligada', 'chapa'],
                    right_on=['codcoligada', 'chapa'],
                    how='left').assign(emprestimoexterno=lambda df: ~df[
                        'saldodevedor'].isna() & df['saldodevedor'] > 0.0).
            assign(cnpj=lambda df: df['cgc'].str.replace(r'\.|\/|\-', '')).
            assign(codrecisaorais=lambda df: None).assign(
                consignavel=lambda df: (df['liquido'] * 0.3).round(2))[[
                    'cnpj', 'chapa', 'dataadmissao', 'cpf', 'datademissao',
                    'consignavel', 'emprestimoexterno', 'nome', 'salario',
                    'codsituacao', 'telefone1', 'codrecisaorais'
                ]].rename(
                    {
                        'dataadmissao': 'admissao',
                        'datademissao': 'demissao',
                        'codpessoa': 'chavefuncionario',
                        'codsituacao': 'situacaofuncionario',
                        'telefone1': 'telefone',
                        'chapa': 'matriculafuncionario',
                        'cpf': 'cpffuncionario'
                    },
                    axis=1))
コード例 #20
0
#gw3 = gw2.reset_index()
#
#mon_gw1 = grp_ts_agg(gw3, 'site', 'time', 'M').median().reset_index()
#mon_gw1['mon'] = mon_gw1.time.dt.month
#mon_gw1['mtype'] = 'gw'

### Combine all mtypes

#mon_summ = concat([mon_flow1, mon_precip1, mon_gw1]).reset_index(drop=True)
mon_summ = concat([mon_flow1, mon_precip1]).reset_index(drop=True)

###############################################
#### Pull out recent monthly data from hydrotel

now1 = to_datetime(param.date_now)
start_date = now1 - DateOffset(months=param.n_previous_months) - DateOffset(
    days=now1.day - 1)
end_date = now1 - DateOffset(days=now1.day - 1)

### SW
print('Getting HydroTel Flow Data:')
sites2 = sites1.copy()
sites2.loc[sites2.site.isin([64610, 65104, 68526]),
           'site'] = [164610, 165104, 168526]

hy_sites = sites2.site.astype(str).tolist()

hy1 = get_ts_data(param.hydrotel_server,
                  param.hydrotel_database,
                  'flow',
                  hy_sites,
コード例 #21
0
    def test_time_offsets_accuracy(self):
        payload = get_query_context("birth_names")
        payload["queries"][0]["metrics"] = ["sum__num"]
        payload["queries"][0]["groupby"] = ["state"]
        payload["queries"][0]["is_timeseries"] = True
        payload["queries"][0]["timeseries_limit"] = 5
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1980 : 1991"
        payload["queries"][0]["granularity"] = "ds"
        payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get main query dataframe
        df = query_result.df

        # set time_offsets to query_object
        payload["queries"][0]["time_offsets"] = ["3 years ago", "3 years later"]
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        time_offsets_obj = query_context.processing_time_offsets(df, query_object)
        df_with_offsets = time_offsets_obj["df"]
        df_with_offsets = df_with_offsets.set_index(["__timestamp", "state"])

        # should get correct data when apply "3 years ago"
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1977 : 1988"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get df for "3 years ago"
        df_3_years_ago = query_result.df
        df_3_years_ago["__timestamp"] = df_3_years_ago["__timestamp"] + DateOffset(
            years=3
        )
        df_3_years_ago = df_3_years_ago.set_index(["__timestamp", "state"])
        for index, row in df_with_offsets.iterrows():
            if index in df_3_years_ago.index:
                assert (
                    row["sum__num__3 years ago"]
                    == df_3_years_ago.loc[index]["sum__num"]
                )

        # should get correct data when apply "3 years later"
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1983 : 1994"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get df for "3 years later"
        df_3_years_later = query_result.df
        df_3_years_later["__timestamp"] = df_3_years_later["__timestamp"] - DateOffset(
            years=3
        )
        df_3_years_later = df_3_years_later.set_index(["__timestamp", "state"])
        for index, row in df_with_offsets.iterrows():
            if index in df_3_years_later.index:
                assert (
                    row["sum__num__3 years later"]
                    == df_3_years_later.loc[index]["sum__num"]
                )
コード例 #22
0
def get_bacen_indices():

    import pandas as pd
    import pymysql as db
    import datetime
    import logging

    from pandas import DateOffset
    from dependencias.Metodos.funcoes_auxiliares import full_path_from_database
    from findt import FinDt
    from pandas import ExcelWriter

    logger = logging.getLogger(__name__)

    #----Declaração de constantes
    logger.info("Conectando no Banco de dados")

    connection = db.connect('localhost',
                            user='******',
                            passwd='root',
                            db='projeto_inv')

    logger.info("Conexão com DB executada com sucesso")

    var_path = full_path_from_database(
        "feriados_nacionais") + "feriados_nacionais.csv"
    save_path = full_path_from_database("get_bacen_indices")

    #----Input incremental baixado robos_diarios_bacen

    # Fazer a query do que foi baixado pelo robo_diario_bacen
    query = 'SELECT * FROM projeto_inv.bacen_series;'
    bacen_series = pd.read_sql(query, con=connection)
    logger.info("Leitura do banco de dados executada com sucesso")

    logger.info("Tratando dados")

    # Retira duplicatas
    bacen_series = bacen_series.sort(['codigo', 'data_referencia', 'data_bd'],
                                     ascending=[True, True, False])
    bacen_series = bacen_series.drop_duplicates(
        subset=['codigo', 'data_referencia'], take_last=False)

    # Separa em dataframes de dias e meses
    bacen_series_mensal = bacen_series[bacen_series.frequencia.isin(
        ['M'])].copy()
    bacen_series_diario = bacen_series[bacen_series.frequencia.isin(
        ['D'])].copy()

    # Retira duplicatas
    bacen_series_mensal = bacen_series_mensal.drop_duplicates(
        subset=['codigo'], take_last=True)

    # Apenda com o dataframe diário
    bacen_series = bacen_series_mensal.append(bacen_series_diario)

    #Seleciona apenas IPCA, IGPM, CDI e TR
    bacen_series = bacen_series[(bacen_series.codigo == 256) |
                                (bacen_series.codigo == 433) |
                                (bacen_series.codigo == 189) |
                                (bacen_series.codigo == 4389) |
                                (bacen_series.codigo == 7811)]

    bacen_series['indice'] = None
    #IPCA - Periodocidade Mensal; Composição Mensal
    bacen_series['indice'][bacen_series.codigo == 433] = 'IPCA'
    #IGPM - - Periodocidade Mensal; Composição Mensal
    bacen_series['indice'][bacen_series.codigo == 189] = 'IGP'
    #CDI - - Periodocidade Diária; Composição Anual
    bacen_series['indice'][bacen_series.codigo == 4389] = 'DI1'
    #TR - - Periodocidade Diária; Composição Mensal
    bacen_series['indice'][bacen_series.codigo == 7811] = 'TR'
    #TJLP - - Periodocidade Diária; Composição Mensal
    bacen_series['indice'][bacen_series.codigo == 256] = 'TJLP'

    bacen_series['data_referencia'] = bacen_series['data_referencia'].astype(
        str)

    bacen_series['ano'] = bacen_series['data_referencia'].str[0:4].astype(int)
    bacen_series['mes'] = bacen_series['data_referencia'].str[5:7].astype(int)
    bacen_series['dia'] = bacen_series['data_referencia'].str[8:10].astype(int)

    bacen_series['dt_ref'] = pd.to_datetime(
        bacen_series['data_referencia']).dt.date

    del bacen_series['codigo']
    del bacen_series['frequencia']
    del bacen_series['nome']
    del bacen_series['data_bd']
    del bacen_series['id_bacen_series']
    del bacen_series['data_referencia']

    #----ATUALIZAÇÃO ÍNDICES - CARREGAMENTO HISTÓRICO
    logger.info("Atualizando índices")

    #Fazer a query do que foi baixado pelo robo_diario_anbima_projecoes
    query = 'SELECT * FROM projeto_inv.bacen_series_hist;'
    bacen_series_hist = pd.read_sql(query, con=connection)
    logger.info("Leitura do banco de dados executada com sucesso")

    bacen_series_hist = bacen_series_hist.sort(['indice', 'dt_ref', 'data_bd'],
                                               ascending=[True, True, False])
    bacen_series_hist = bacen_series_hist.drop_duplicates(
        subset=['indice', 'dt_ref'], take_last=False)

    del bacen_series_hist['id_bc_series_hist']
    del bacen_series_hist['data_bd']

    horario_bd = datetime.datetime.now()

    #----ATUALIZAÇÃO ÍNDICES - APPEND DAS INFO NOVAS DAS SERIES BACEN

    bacen_series_hist = bacen_series_hist.append(bacen_series)

    bacen_series_hist = bacen_series_hist.sort(['indice', 'dt_ref'],
                                               ascending=[True, True])
    bacen_series_hist = bacen_series_hist.drop_duplicates(
        subset=['indice', 'dt_ref'], take_last=False)

    #----------CRIAÇÃO SÉRIE DIÁRIA
    logger.info("Criando séries diária")

    #Seleciona o última dia do mês vigente
    mesfim = datetime.date.today().month + 1
    fim = datetime.date(datetime.date.today().year, mesfim, 1) - DateOffset(
        months=0, days=1)

    dt_ref = pd.date_range(start='01/01/1996', end=fim, freq='D').date
    ano = pd.date_range(start='01/01/1996', end=fim, freq='D').year
    mes = pd.date_range(start='01/01/1996', end=fim, freq='D').month
    dias = pd.date_range(start='01/01/1996', end=fim, freq='D').day
    serie_dias = pd.DataFrame(columns=['dt_ref', 'ano', 'mes', 'dia'])
    serie_dias['dt_ref'] = dt_ref
    serie_dias['ano'] = ano
    serie_dias['mes'] = mes
    serie_dias['dia'] = dias

    #identificar se é dia útil

    dt_max = max(serie_dias['dt_ref'])
    dt_min = min(serie_dias['dt_ref'])
    per = FinDt.DatasFinanceiras(dt_min, dt_max, path_arquivo=var_path)

    du = pd.DataFrame(columns=['dt_ref'])
    du['dt_ref'] = per.dias(3)
    du['du_1'] = 1

    serie_dias = serie_dias.merge(du, on=['dt_ref'], how='left')
    serie_dias['du_1'] = serie_dias['du_1'].fillna(0)
    serie_dias['dc_1'] = 1

    #calculo de dias corridos por mes
    serie_dias_group_count = serie_dias[['dt_ref', 'ano',
                                         'mes']].groupby(['ano', 'mes'
                                                          ]).agg(['count'])
    serie_dias_group_count = serie_dias_group_count.reset_index(level=None,
                                                                drop=False,
                                                                inplace=False,
                                                                col_level=0,
                                                                col_fill='')
    serie_dias_group_count_filter = pd.DataFrame(columns=['ano', 'mes', 'dc'])

    serie_dias_group_count_filter['ano'] = serie_dias_group_count['ano']
    serie_dias_group_count_filter['mes'] = serie_dias_group_count['mes']
    serie_dias_group_count_filter['dc'] = serie_dias_group_count['dt_ref']

    serie_dias = serie_dias.merge(serie_dias_group_count_filter,
                                  on=['ano', 'mes'],
                                  how='left')

    #calculo de dias uteis por mes
    serie_dias_group_sum = serie_dias[['du_1', 'ano',
                                       'mes']].groupby(['ano',
                                                        'mes']).agg(['sum'])
    serie_dias_group_sum = serie_dias_group_sum.reset_index(level=None,
                                                            drop=False,
                                                            inplace=False,
                                                            col_level=0,
                                                            col_fill='')
    serie_dias_group_sum_filter = pd.DataFrame(columns=['ano', 'mes', 'du'])

    serie_dias_group_sum_filter['ano'] = serie_dias_group_sum['ano']
    serie_dias_group_sum_filter['mes'] = serie_dias_group_sum['mes']
    serie_dias_group_sum_filter['du'] = serie_dias_group_sum['du_1']

    serie_dias = serie_dias.merge(serie_dias_group_sum_filter,
                                  on=['ano', 'mes'],
                                  how='left')

    #----------CRIAÇÃO BASE DIÁRIA
    logger.info("Criando bases diárias")

    #----IPCA
    ipca = bacen_series_hist[['mes', 'ano', 'valor', 'indice'
                              ]][bacen_series_hist.indice == 'IPCA'].copy()
    serie_dias_ipca = serie_dias.merge(ipca, on=['mes', 'ano'], how='left')

    #Taxas acumuladas
    serie_dias_ipca['fator_dia_du'] = (
        1 + serie_dias_ipca['du_1'] * serie_dias_ipca['valor'] / 100)**(
            1 / serie_dias_ipca['du'])
    serie_dias_ipca['fator_dia_dc'] = (
        1 + serie_dias_ipca['dc_1'] * serie_dias_ipca['valor'] / 100)**(
            1 / serie_dias_ipca['dc'])
    serie_dias_ipca['fator_acum_du'] = serie_dias_ipca[[
        'indice', 'fator_dia_du'
    ]].groupby(['indice']).agg(['cumprod'])
    serie_dias_ipca['fator_acum_dc'] = serie_dias_ipca[[
        'indice', 'fator_dia_dc'
    ]].groupby(['indice']).agg(['cumprod'])

    #----IGPM
    igpm = bacen_series_hist[['mes', 'ano', 'valor', 'indice'
                              ]][bacen_series_hist.indice == 'IGP'].copy()
    serie_dias_igpm = serie_dias.merge(igpm, on=['mes', 'ano'], how='left')

    #Taxas acumuladas
    serie_dias_igpm['fator_dia_du'] = (
        1 + serie_dias_igpm['du_1'] * serie_dias_igpm['valor'] / 100)**(
            1 / serie_dias_igpm['du'])
    serie_dias_igpm['fator_dia_dc'] = (
        1 + serie_dias_igpm['dc_1'] * serie_dias_igpm['valor'] / 100)**(
            1 / serie_dias_igpm['dc'])
    serie_dias_igpm['fator_acum_du'] = serie_dias_igpm[[
        'indice', 'fator_dia_du'
    ]].groupby(['indice']).agg(['cumprod'])
    serie_dias_igpm['fator_acum_dc'] = serie_dias_igpm[[
        'indice', 'fator_dia_dc'
    ]].groupby(['indice']).agg(['cumprod'])

    #----CDI
    cdi = bacen_series_hist[['dia', 'mes', 'ano', 'valor', 'indice'
                             ]][bacen_series_hist.indice == 'DI1'].copy()
    serie_dias_cdi = serie_dias.merge(cdi,
                                      on=['dia', 'mes', 'ano'],
                                      how='left')
    serie_dias_cdi['indice'] = serie_dias_cdi['indice'].fillna('DI1')
    serie_dias_cdi['valor'] = serie_dias_cdi['valor'].fillna(0)

    #Taxas acumuladas
    serie_dias_cdi['fator_dia_du'] = (
        1 + serie_dias_cdi['du_1'] * serie_dias_cdi['valor'] / 100)**(1 / 252)
    serie_dias_cdi['fator_dia_dc'] = None
    serie_dias_cdi['fator_acum_du'] = serie_dias_cdi[[
        'indice', 'fator_dia_du'
    ]].groupby(['indice']).agg(['cumprod'])
    serie_dias_cdi['fator_acum_dc'] = None

    #----TR
    tr = bacen_series_hist[['mes', 'ano', 'valor', 'indice'
                            ]][bacen_series_hist.indice == 'TR'].copy()
    serie_dias_tr = serie_dias.merge(tr, on=['mes', 'ano'], how='left')

    #----TJLP
    tjlp = bacen_series_hist[['mes', 'ano', 'valor', 'indice'
                              ]][bacen_series_hist.indice == 'TJLP'].copy()
    serie_dias_tjlp = serie_dias.merge(tjlp, on=['mes', 'ano'], how='left')

    #Taxas acumuladas
    serie_dias_tr['fator_dia_du'] = (
        1 + serie_dias_tr['du_1'] * serie_dias_tr['valor'] / 100)**(
            1 / serie_dias_tr['du'])
    serie_dias_tr['fator_dia_dc'] = (
        1 + serie_dias_tr['dc_1'] * serie_dias_tr['valor'] / 100)**(
            1 / serie_dias_tr['dc'])
    serie_dias_tr['fator_acum_du'] = serie_dias_tr[['indice',
                                                    'fator_dia_du']].groupby([
                                                        'indice'
                                                    ]).agg(['cumprod'])
    serie_dias_tr['fator_acum_dc'] = serie_dias_tr[['indice',
                                                    'fator_dia_dc']].groupby([
                                                        'indice'
                                                    ]).agg(['cumprod'])

    serie_dias_indices = serie_dias_ipca.copy()
    serie_dias_indices = serie_dias_indices.append(serie_dias_igpm)
    serie_dias_indices = serie_dias_indices.append(serie_dias_cdi)
    serie_dias_indices = serie_dias_indices.append(serie_dias_tr)
    serie_dias_indices = serie_dias_indices.append(serie_dias_tjlp)

    serie_dias_indices = serie_dias_indices[
        serie_dias_indices.fator_dia_du.notnull()]

    serie_dias_indices = serie_dias_indices.reset_index(level=None,
                                                        drop=True,
                                                        inplace=False,
                                                        col_level=0,
                                                        col_fill='')

    writer = ExcelWriter(save_path + 'serie_dias_indices.xlsx')
    serie_dias_indices.to_excel(writer, 'Todos')
    serie_dias_ipca.to_excel(writer, 'IPCA')
    serie_dias_igpm.to_excel(writer, 'IGPM')
    serie_dias_cdi.to_excel(writer, 'DI')
    serie_dias_tr.to_excel(writer, 'TR')
    serie_dias_tjlp.to_excel(writer, 'TJLP')
    writer.save()

    serie_dias_indices['data_bd'] = horario_bd

    #----------VERIFICAÇÃO PARA CRIAR A TABELA INCREMENTAL

    query = 'SELECT * FROM projeto_inv.bacen_series_fatores;'
    bc_series = pd.read_sql(query, con=connection)
    logger.info("Leitura do banco de dados executada com sucesso")

    bc_series = bc_series[['indice', 'dt_ref']].copy()
    bc_series['marker'] = 1

    serie_dias_indices = serie_dias_indices.merge(bc_series,
                                                  on=['indice', 'dt_ref'],
                                                  how='left')
    serie_dias_indices = serie_dias_indices[
        serie_dias_indices.marker.isnull()].copy()

    del serie_dias_indices['marker']

    logger.info("Salvando base de dados -  Tabela bacen_series_fatores")
    #Salvar no MySQL
    pd.io.sql.to_sql(serie_dias_indices,
                     name='bacen_series_fatores',
                     con=connection,
                     if_exists='append',
                     flavor='mysql',
                     index=0)

    #Fecha conexao
    connection.close()
コード例 #23
0
 def test_is_scalar_pandas_scalars(self):
     assert is_scalar(Timestamp("2014-01-01"))
     assert is_scalar(Timedelta(hours=1))
     assert is_scalar(Period("2014-01-01"))
     assert is_scalar(Interval(left=0, right=1))
     assert is_scalar(DateOffset(days=1))
コード例 #24
0
ファイル: holiday.py プロジェクト: Aniket130590/prodinfo
        Parameters
        ----------
        other : holiday calendar
        inplace : bool (default=False)
            If True set rule_table to holidays, else return array of Holidays
        """
        holidays = self.merge_class(self, other)
        if inplace:
            self.rules = holidays
        else:
            return holidays


USMemorialDay = Holiday(
    "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
)
USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
USColumbusDay = Holiday(
    "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
)
USThanksgivingDay = Holiday(
    "Thanksgiving", month=11, day=1, offset=DateOffset(weekday=TH(4))
)
USMartinLutherKingJr = Holiday(
    "Martin Luther King Jr. Day",
    start_date=datetime(1986, 1, 1),
    month=1,
    day=1,
    offset=DateOffset(weekday=MO(3)),
)
コード例 #25
0
ファイル: london.py プロジェクト: shasafoster/OTC
    observance=previous_friday,
)

# New Year's Day
LSENewYearsDay = Holiday(
    "New Year's Day",
    month=1,
    day=1,
    observance=weekend_to_monday,
)

# Early May bank holiday
MayBank = Holiday(
    "Early May Bank Holiday",
    month=5,
    offset=DateOffset(weekday=MO(1)),
    day=1,
)

# Spring bank holiday
SpringBank = Holiday(
    "Spring Bank Holiday",
    month=5,
    day=31,
    offset=DateOffset(weekday=MO(-1)),
)

# Summer bank holiday
SummerBank = Holiday(
    "Summer Bank Holiday",
    month=8,
コード例 #26
0
    "Australia Day",
    month=1,
    day=26,
    start_date=Timestamp("1994-01-01"),
    observance=weekend_to_monday,
)

# prior to 1993 the holiday was observed on the Monday
# following, or on, the 26th of January
AustraliaDayPre88 = Holiday(
    "Australia Day",
    month=1,
    day=26,
    start_date=Timestamp("1960-01-01"),
    end_date=Timestamp("1987-12-31"),
    offset=DateOffset(weekday=MO(1)),
)
# The 1988 Bi-Centennial celebrations saw an extra holiday
# and Australia Day observed on the actual date
AustraliaDay1988 = Holiday(
    "Australia Day",
    month=1,
    day=26,
    start_date=Timestamp("1988-01-01"),
    end_date=Timestamp("1988-12-31"),
)
# ASX did not close for Australia Day in 1993 since
# States observed different dates prior to 1994
AustraliaDayPost88Pre93 = Holiday(
    "Australia Day",
    month=1,
コード例 #27
0
NewYearsHolidayJan2 = Holiday(
    "New Year's Holiday (Jan 2)",
    month=1,
    day=2,
)
NewYearsHolidayJan3 = Holiday(
    "New Year's Holiday (Jan 3)",
    month=1,
    day=3,
)

ComingOfAgeDay = Holiday(
    "Coming of Age Day",
    month=1,
    day=1,
    offset=DateOffset(weekday=MO(2)),
)

NationalFoundationDay = Holiday(
    "National Foundation Day",
    month=2,
    day=11,
    observance=sunday_to_monday,
)

# The dates on which the vernal/autumnal equinox will be observed
# are announced on the first weekday of February of the previous
# year, so we treat them as ad-hoc holidays, even though they
# occur every year. For more info, see:
# https://en.wikipedia.org/wiki/Public_holidays_in_Japan#cite_note-3
# For the list of equinoxes going back to 2000, see:
コード例 #28
0
def execute_search(criteria, session_key, reply_channel):
    run_from_management_command = criteria.get('data_mining')

    # Check-in range pre-calculated when running analytics
    if not run_from_management_command:  # pragma: no cover
        check_in = datetime.strptime(criteria['checkIn'], '%Y-%m-%d')
        check_out = datetime.strptime(criteria['checkOut'], '%Y-%m-%d')
        criteria['check_in_range'] = date_range(check_in, check_out - DateOffset(days=1))

    night_count = len(criteria['check_in_range'])

    outbound_message = {
        'status': '200',
        'currency': criteria['currency'],
        'currency_symbol': settings.CURRENCY_SYMBOLS[criteria['currency']],
        'country': criteria['country'],  # Blank if not country search
        'night_count': night_count,
    }

    try:
        criteria['city'] = unquote(criteria['city'])
        criteria['county'] = unquote(criteria['county'])
        criteria['state'] = unquote(criteria['state'])
        criteria['country'] = unquote(criteria['country'])

        if criteria['country'] in settings.BLOCKED_COUNTRIES:
            # We no longer permit searches for certain high-risk countries due
            # to high levels of attempted fraud. We block them in the front-end
            # but have this additional safeguard in case they are smart enough
            # to edit the URL directly (and another later in case they figure
            # out to submit a search without country parameter, but better to
            # catch them as early as possible)
            logger.error(
                'Someone tried searching for a blocked country {} via results URL'
                .format(criteria['country']))
            raise Exception

        _, stays = execute.search(criteria)

        if not run_from_management_command:  # pragma: no cover
            search_key = utils.create_session_key(
                unquote(criteria['place_name']),
                criteria['checkIn'],
                criteria['checkOut'],
                criteria['occupants'],
                criteria['latitude'],
                criteria['longitude'],
                criteria['currency'],
            )

            # Store complete record (including lengthy rateKey information) for
            # later use in stay detail view
            http_session = SessionStore(session_key=session_key)

            http_session[search_key] = {
                'stays': stays.to_json(),
                'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
            }

            http_session.save()

        fields_required_on_results_page = [
            'default_sort',
            'hotel_1_id',
            'check_in_1',
            'night_count_1',
            'entire_stay_cost_1',
            'hotel_2_id',
            'night_count_2',
            'entire_stay_cost_2',
            'switch_count',
            'distance_in_km',
            'rounded_stay_cost',
            'rounded_nightly_cost',
            'benchmark_stay_cost',
            'primary_star_rating',
            'review_score',
            'min_review_tier',
            'primary_review_tier',
            'refundable',
        ]

        required_fields_only_present_in_multi_night_search = [
            'check_in_2',
            'cost_delta_vs_stay_benchmark',
            'percentage_cost_delta_vs_stay_benchmark',
            'switch_benefit',
        ]

        if stays['switch_count'].max() > 0:  # pragma: no cover
            fields_required_on_results_page = \
                fields_required_on_results_page + required_fields_only_present_in_multi_night_search

            max_saving = abs(stays['percentage_cost_delta_vs_stay_benchmark'].min())
            if max_saving >= 0.3:
                log_max_saving(criteria, max_saving)

        if run_from_management_command:  # pragma: no cover
            # Hotel info not required; pass back to calling command
            fields_required_for_data_mining = ['stay_cost', 'cost_per_quality_unit']
            fields_required_on_results_page = \
                fields_required_on_results_page + fields_required_for_data_mining
            return stays[fields_required_on_results_page]

        outbound_message['stays'] = \
            stays[fields_required_on_results_page].to_json(orient='records')

        hotel_id_columns = stays.columns.str.contains('hotel_[\d]_id')
        hotel_ids = melt(stays.loc[:, hotel_id_columns]).dropna()['value'].unique()

        hotels = Hotel.objects.filter(hotel_id__in=hotel_ids).select_related().iterator()

        hotels = [{
            'hotel_id': str(hotel.hotel_id),  # String required for use as key
            'name': hotel.name,
            'star_rating': hotel.star_rating,
            'main_image_url': hotel.main_image_url,
            'recommendations': hotel.trustyou.recommendations,
            'summary': hotel.trustyou.summary,
            'trust_score': hotel.trustyou.trust_score,
            'trust_score_description': hotel.trustyou.trust_score_description,
            'review_count': hotel.trustyou.review_count,
            'category_badge': hotel.trustyou.category_badge,
            'latitude': hotel.latitude,
            'longitude': hotel.longitude,
        } for hotel in hotels]

        hotels = DataFrame(hotels)
        hotels.set_index('hotel_id', inplace=True)

        outbound_message['hotels'] = hotels.to_dict('index')

        min_stay_cost = stays['stay_cost'].min()
        max_stay_cost = stays['stay_cost'].max()
        try:  # pragma: no cover
            min_switch_distance = int(stays['distance_in_km'].min())
            max_switch_distance = int(stays['distance_in_km'].max())
        except ValueError:
            min_switch_distance = 0
            max_switch_distance = 0
        min_nightly_cost = min_stay_cost / night_count
        max_nightly_cost = max_stay_cost / night_count

        outbound_message['cost_ranges'] = {
            'minStayCost': floor(min_stay_cost),
            'maxStayCost': ceil(max_stay_cost),
            'minNightlyCost': floor(min_nightly_cost),
            'maxNightlyCost': ceil(max_nightly_cost),
        }

        outbound_message['distance_ranges'] = {
            'minDistanceSwitch': min_switch_distance,
            'maxDistanceSwitch': max_switch_distance,
        }

    except (RequestError, NoResultsError):
        error = 'RequestError or NoResultsError when searching for {}'.format(
            unquote(criteria['place_name'])
        )
        client.captureMessage(error)
        outbound_message['status'] = '503'
        logger.error(error)

        if run_from_management_command:  # pragma: no cover
            return DataFrame()

    except Exception:  # pragma: no cover
        outbound_message['status'] = '500'

        exception_type, _, exception_traceback = sys.exc_info()
        logger.error(exception_type)
        logger.error(pprint.pformat(traceback.format_tb(exception_traceback, limit=4)))

        if run_from_management_command:
            return DataFrame()

    if reply_channel is not None:  # pragma: no cover
        # This is actually tested but coverage cant detect it
        Channel(reply_channel).send({
            "text": json.dumps(outbound_message)
        })

    if outbound_message['status'] == '200':
        return True
コード例 #29
0
    day=24,
    start_date=Timestamp('1993-01-01'),
    # When Christmas is a Saturday, the 24th is a full holiday.
    days_of_week=(MONDAY, TUESDAY, WEDNESDAY, THURSDAY),
)
USNewYearsDay = new_years_day(
    # When Jan 1 is a Sunday, US markets observe the subsequent Monday.
    # When Jan 1 is a Saturday (as in 2005 and 2011), no holiday is observed.
    observance=sunday_to_monday)
USMartinLutherKingJrAfter1998 = Holiday(
    'Dr. Martin Luther King Jr. Day',
    month=1,
    day=1,
    # The US markets didn't observe MLK day as a holiday until 1998.
    start_date=Timestamp('1998-01-01'),
    offset=DateOffset(weekday=MO(3)),
)
USMemorialDay = Holiday(
    # NOTE: The definition for Memorial Day is incorrect as of pandas 0.16.0.
    # See https://github.com/pydata/pandas/issues/9760.
    'Memorial Day',
    month=5,
    day=25,
    offset=DateOffset(weekday=MO(1)),
)
USIndependenceDay = Holiday(
    'July 4th',
    month=7,
    day=4,
    observance=nearest_workday,
)
コード例 #30
0
ファイル: holiday.py プロジェクト: MarceloDL-A/metodos_python
        Parameters
        ----------
        other : holiday calendar
        inplace : bool (default=False)
            If True set rule_table to holidays, else return array of Holidays
        """
        holidays = self.merge_class(self, other)
        if inplace:
            self.rules = holidays
        else:
            return holidays


USMemorialDay = Holiday(
    "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
)
USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
USColumbusDay = Holiday(
    "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
)
USThanksgivingDay = Holiday(
    "Thanksgiving Day", month=11, day=1, offset=DateOffset(weekday=TH(4))
)
USMartinLutherKingJr = Holiday(
    "Birthday of Martin Luther King, Jr.",
    start_date=datetime(1986, 1, 1),
    month=1,
    day=1,
    offset=DateOffset(weekday=MO(3)),
)