def compress_netcfd(folder_path, start_date, out_folder, file_name, num_of_rivids): """ Takes the 52 individual ensembles and combines them into one compact NetCDF file, saving disk space in the process. Parameters ---------- folder_path: str The path to the folder containing the 52 ensemble forecast files in NetCDF format start_date: str The start date in YYYYMMDD format. out_folder: str The path to the folder that you want the more compact NetCDF file in. file_name: str The name of the region. For example, if the files followed the pattern of "Qout_africa_continental_1.nc, this argument would be "Qout_africa_continental" num_of_rivids: int The number of streams that are contained in the region. """ # Based on 15 day forecast forecast_day_indices = np.array( [0, 8, 16, 24, 32, 40, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84], dtype=np.int8) # Based on 10 day forecast # Excluding the first day because we already have initialization from the normal forecasts high_res_forecast_day_indices = np.array( [24, 48, 72, 92, 100, 108, 112, 116, 120, 124]) start_datetime = to_datetime(start_date, infer_datetime_format=True) dates = date_range(start_datetime + DateOffset(1), periods=15) high_res_dates = date_range(start_datetime + DateOffset(1), periods=10) # Ensemble Dimensions # 1) Rivid # 2) Number of forecast days (i.e. 15 in a 15 day forecast) # 3) Number of ensembles ensembles = np.zeros((num_of_rivids, 15, 51), dtype=np.float32) initialization = np.zeros((num_of_rivids, ), dtype=np.float32) for forecast_number in range(1, 52): file = os.path.join(folder_path, "{}_{}.nc".format(file_name, forecast_number)) tmp_dataset = xr.open_dataset(file) streamflow = tmp_dataset['Qout'].data streamflow = streamflow[:, forecast_day_indices] if forecast_number == 1: initialization[:] = streamflow[:, 0] rivids = tmp_dataset['rivid'].data lat = tmp_dataset['lat'].data lon = tmp_dataset['lon'].data z = tmp_dataset['z'].data ensembles[:, :, forecast_number - 1] = streamflow[:, 1:] tmp_dataset.close() # High Res Forecast file = os.path.join(folder_path, "{}_52.nc".format(file_name)) tmp_dataset = xr.open_dataset(file) high_res_forecast_data = tmp_dataset["Qout"].data high_res_forecast_data = high_res_forecast_data[:, high_res_forecast_day_indices] tmp_dataset.close() data_variables = { "Qout": (['rivid', 'date', 'ensemble_number'], ensembles), "Qout_high_res": (['rivid', 'date_high_res'], high_res_forecast_data) } coords = { 'rivid': rivids, 'date': dates, 'date_high_res': high_res_dates, 'ensemble_number': np.arange(1, 52, dtype=np.uint8), 'initialization_values': ('rivid', initialization), 'lat': ('rivid', lat), 'lon': ('rivid', lon), 'z': ('rivid', z), 'start_date': start_datetime } xarray_dataset = xr.Dataset(data_variables, coords) xarray_dataset.to_netcdf(path=os.path.join(out_folder, '{}.nc'.format(start_date)), format='NETCDF4')
---------- other : holiday calendar inplace : bool (default=False) If True set rule_table to holidays, else return array of Holidays """ holidays = self.merge_class(self, other) if inplace: self.rules = holidays else: return holidays USMemorialDay = Holiday('MemorialDay', month=5, day=31, offset=DateOffset(weekday=MO(-1))) USLaborDay = Holiday('Labor Day', month=9, day=1, offset=DateOffset(weekday=MO(1))) USColumbusDay = Holiday('Columbus Day', month=10, day=1, offset=DateOffset(weekday=MO(2))) USThanksgivingDay = Holiday('Thanksgiving', month=11, day=1, offset=DateOffset(weekday=TH(4))) USMartinLutherKingJr = Holiday('Dr. Martin Luther King Jr.', start_date=datetime(1986, 1, 1), month=1,
def processing_time_offsets( # pylint: disable=too-many-locals self, df: pd.DataFrame, query_object: QueryObject, ) -> CachedTimeOffset: query_context = self._query_context # ensure query_object is immutable query_object_clone = copy.copy(query_object) queries: List[str] = [] cache_keys: List[Optional[str]] = [] rv_dfs: List[pd.DataFrame] = [df] time_offsets = query_object.time_offsets outer_from_dttm = query_object.from_dttm outer_to_dttm = query_object.to_dttm for offset in time_offsets: try: query_object_clone.from_dttm = get_past_or_future( offset, outer_from_dttm, ) query_object_clone.to_dttm = get_past_or_future( offset, outer_to_dttm) except ValueError as ex: raise QueryObjectValidationError(str(ex)) from ex # make sure subquery use main query where clause query_object_clone.inner_from_dttm = outer_from_dttm query_object_clone.inner_to_dttm = outer_to_dttm query_object_clone.time_offsets = [] query_object_clone.post_processing = [] if not query_object.from_dttm or not query_object.to_dttm: raise QueryObjectValidationError( _("An enclosed time range (both start and end) must be specified " "when using a Time Comparison.")) # `offset` is added to the hash function cache_key = self.query_cache_key(query_object_clone, time_offset=offset) cache = QueryCacheManager.get(cache_key, CacheRegion.DATA, query_context.force) # whether hit on the cache if cache.is_loaded: rv_dfs.append(cache.df) queries.append(cache.query) cache_keys.append(cache_key) continue query_object_clone_dct = query_object_clone.to_dict() # rename metrics: SUM(value) => SUM(value) 1 year ago metrics_mapping = { metric: TIME_COMPARISON.join([metric, offset]) for metric in get_metric_names( query_object_clone_dct.get("metrics", [])) } join_keys = [ col for col in df.columns if col not in metrics_mapping.keys() ] result = self._qc_datasource.query(query_object_clone_dct) queries.append(result.query) cache_keys.append(None) offset_metrics_df = result.df if offset_metrics_df.empty: offset_metrics_df = pd.DataFrame({ col: [np.NaN] for col in join_keys + list(metrics_mapping.values()) }) else: # 1. normalize df, set dttm column offset_metrics_df = self.normalize_df(offset_metrics_df, query_object_clone) # 2. rename extra query columns offset_metrics_df = offset_metrics_df.rename( columns=metrics_mapping) # 3. set time offset for index # TODO: add x-axis to QueryObject, potentially as an array for # multi-dimensional charts granularity = query_object.granularity index = granularity if granularity in df.columns else DTTM_ALIAS offset_metrics_df[index] = offset_metrics_df[ index] - DateOffset(**normalize_time_delta(offset)) # df left join `offset_metrics_df` offset_df = df_utils.left_join_df( left_df=df, right_df=offset_metrics_df, join_keys=join_keys, ) offset_slice = offset_df[metrics_mapping.values()] # set offset_slice to cache and stack. value = { "df": offset_slice, "query": result.query, } cache.set( key=cache_key, value=value, timeout=self.get_cache_timeout(), datasource_uid=query_context.datasource.uid, region=CacheRegion.DATA, ) rv_dfs.append(offset_slice) rv_df = pd.concat(rv_dfs, axis=1, copy=False) if time_offsets else df return CachedTimeOffset(df=rv_df, queries=queries, cache_keys=cache_keys)
def following_tuesday_every_four_years_observance(dt): return dt + DateOffset(years=(4 - (dt.year % 4)) % 4, weekday=TU(1))
# 2010 but that does not appear to have been the case previously. # We'll assume that this will be the behavior from now on. AnzacDayNonMondayized = anzac_day(end_date='2010') AnzacDay = anzac_day(observance=sunday_to_monday, start_date='2010') # When Easter Monday and Anzac Day coincided in 2011, Easter Tuesday was # also observed as a public holiday. Note that this isn't defined as a # rule, because it will happen next in 2095 (and then in 2163), and # there isn't a great way to tell how this will be handled at that point. EasterTuesday2011AdHoc = Timestamp('2011-04-26', tz='UTC') QueensBirthday = Holiday( "Queen's Birthday", month=6, day=1, offset=[DateOffset(weekday=MO(2))], ) LastTradingDayBeforeChristmas = Holiday( 'Last Trading Day Before Christmas', month=12, day=24, start_date='2010', observance=previous_friday, ) Christmas = christmas() WeekendChristmas = weekend_christmas() BoxingDay = boxing_day() WeekendBoxingDay = weekend_boxing_day() LastTradingDayOfCalendarYear = Holiday(
def process_conciliacao_emprestimo(pfunc: DataFrame, ppessoa: DataFrame, psecao: DataFrame, pffinanc: DataFrame, pparam: DataFrame, pparamadicionais: DataFrame, emprestimo: DataFrame, conciliacao_emprestimo: DataFrame): """ TODO: doc string """ _pffinanc_valor_averbado = (pffinanc.merge( pparamadicionais, left_on=['codcoligada', 'anocomp', 'mescomp'], right_on=[ 'codcoligada', 'anocompcarolpffinanc', 'mescompcarolpffinanc' ], how='inner')[[ 'chapa', 'codcoligada', 'anocomp', 'mescomp', 'valor' ]].assign(valor=lambda df: df['valor'].astype(float)).groupby( by=['codcoligada', 'chapa', 'anocomp', 'mescomp' ])['valor'].sum().reset_index().rename( {'valor': 'pffinac_valoraverbado'}, axis=1)) _emprestimo_periodo = (emprestimo.assign( _vencimento_parcela=lambda df: to_datetime( df['vencimento_parcela'], format='%Y-%m-%dT%H:%M:%S.%f')).assign( anocomp=lambda df: df['_vencimento_parcela'].dt.year).assign( mescomp=lambda df: df['_vencimento_parcela'].dt.month)) _pparam_last_comp = (pparam.assign(_datelastcomp=lambda df: to_datetime( df['mescomp'].astype(str) + '-' + df['anocomp'].astype(str), format='%m-%Y') - DateOffset(months=1)).assign( anocomp=lambda df: df['_datelastcomp'].dt.year).assign( mescomp=lambda df: df['_datelastcomp'].dt.month)) df = (pfunc.merge( ppessoa, left_on=['codpessoa'], right_on=['codigo'], how='inner').merge( psecao, left_on=['codcoligada', 'codsecao'], right_on=['codcoligada', 'codigo'], how='inner').merge( _pparam_last_comp, left_on=['codcoligada'], right_on=['codcoligada'], how='inner').merge( _pffinanc_valor_averbado, left_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'], right_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'], how='left'). assign(cnpj=lambda df: df['cgc'].str.replace(r'\.|\/|\-', '')).merge( _emprestimo_periodo, left_on=['cpf', 'cnpj', 'anocomp', 'mescomp'], right_on=['cpf', 'cnpj', 'anocomp', 'mescomp'], how='inner').assign( periodo=lambda df: df['anocomp'].astype(str) + df['mescomp']. astype(str).str.pad(2, side='left', fillchar='0')).merge( conciliacao_emprestimo, left_on=[ 'cpf', 'cnpj', 'codigo_emprestimo', 'numero_da_parcela', 'periodo' ], right_on=[ 'cpffuncionario', 'cnpj', 'codigo_emprestimo', 'numero_da_parcela', 'periodo' ], how='inner').query('status_parcela == "Aberta"')) if len(df) > 0: df.loc[df['pffinac_valoraverbado'].notnull(), 'valor_averbado'] = df['pffinac_valoraverbado'] df.loc[df['valor_averbado'] > 0, 'status_parcela'] = 'Paga' df = df.assign(valor_nao_averbado=lambda df: df['valor_averbado'] - df[ 'valor_parcela']) return df[[ 'cnpj', 'cpffuncionario', 'periodo', 'valor_averbado', 'valor_nao_averbado', 'motivo', 'status_parcela', 'numero_da_parcela', 'codigo_emprestimo' ]]
def regular_holidays(self): return AbstractHolidayCalendar(rules=[ USNewYearsDay, Holiday( name="New Year's Day", month=1, day=2, observance=sunday_to_monday, ), Holiday( name="New Year's Day", month=1, day=3, observance=sunday_to_monday, ), Holiday( # second monday of january name="Coming of Age Day", month=1, day=1, offset=DateOffset(weekday=MO(2)), ), Holiday( name="National foundation day", month=2, day=11, observance=sunday_to_monday, ), Holiday(name="Vernal Equinox", month=3, day=20, observance=vernal_equinox), Holiday( name="Showa day", month=4, day=29, observance=sunday_to_monday, ), Holiday( name="Constitution memorial day", month=5, day=3, observance=sunday_to_monday, ), Holiday( name="Greenery day", month=5, day=4, observance=sunday_to_monday, ), Holiday( name="Children's day", month=5, day=5, observance=sunday_to_monday, ), Holiday( name="Marine day", month=7, day=1, offset=DateOffset(weekday=MO(3)), ), Holiday( name="Mountain day", month=8, day=11, observance=sunday_to_monday, ), Holiday( name="Respect for the aged day", month=9, day=1, offset=DateOffset(weekday=MO(3)), ), Holiday( name="Autumnal equinox", month=9, day=22, observance=autumnal_equinox, ), Holiday( name="Health and sports day", month=10, day=1, offset=DateOffset(weekday=MO(2)), ), Holiday( name="Culture day", month=11, day=3, observance=sunday_to_monday, ), Holiday( name="Labor Thanksgiving Day", month=11, day=23, observance=sunday_to_monday, ), Holiday( name="Emperor's Birthday", month=12, day=23, observance=sunday_to_monday, ), Holiday( name="Before New Year's Day", month=12, day=31, observance=sunday_to_monday, ), ])
def handle(self, *args, **options): try: os.rename('analysis_output.csv', 'check_file_access.csv') os.rename('check_file_access.csv', 'analysis_output.csv') except OSError: raise Exception( 'Destination file is still open. Please close before running!') all_stays = [] for city in self.cities: for check_in in self.check_in_range: for duration in self.stay_durations: check_out = check_in + DateOffset(days=duration) check_in_range = date_range(check_in, check_out - DateOffset(days=1)) data = self.base_data.copy() data.update({ 'checkIn': check_in, 'checkOut': check_out, 'check_in_range': check_in_range, 'country': city['country'], 'state': city['state'], 'city': city['city'], }) stays = tasks.execute_search(data, '', None) result_count = len(stays) if result_count == 0: continue stays.query('hotel_2_id != -1', inplace=True) grouping_columns = [ 'primary_star_rating', 'min_review_tier' ] stays.sort_values('stay_cost', inplace=True) unrestricted_low_cost_stays = stays.groupby( grouping_columns).nth(0) unrestricted_low_cost_stays['restricted'] = False stays.sort_values('cost_per_quality_unit', inplace=True) unrestricted_best_value_stays = stays.groupby( grouping_columns).nth(0) unrestricted_best_value_stays['restricted'] = False switches_with_both_benchmarks = \ 'entire_stay_cost_1 == entire_stay_cost_1 \ and entire_stay_cost_2 == entire_stay_cost_2' stays.query(switches_with_both_benchmarks, inplace=True) stays.sort_values('stay_cost', inplace=True) restricted_low_cost_stays = stays.groupby( grouping_columns).nth(0) restricted_low_cost_stays['restricted'] = True stays.sort_values('cost_per_quality_unit', inplace=True) restricted_best_value_stays = stays.groupby( grouping_columns).nth(0) restricted_best_value_stays['restricted'] = True scenarios = [ unrestricted_low_cost_stays, unrestricted_best_value_stays, restricted_low_cost_stays, restricted_best_value_stays, ] stays = concat(scenarios) stays.reset_index(inplace=True) stays.drop_duplicates(inplace=True) stays['city'] = city['city'] stays['check_in'] = check_in stays['duration'] = duration stays['result_count'] = result_count all_stays.append(stays) logger.warn('{}, {:%Y-%m-%d}, {}'.format( city['city'], check_in, duration)) stays = concat(all_stays).to_csv('analysis_output.csv', index=False)
def history( self, code, period, unit=None, start=None, end=None, path=None, merge=True, warning=True, prev_next='0' ): """ Download historical market data of given code and save it as csv to given path :param code: str unique code of stock or sector :param period: str one of tick, min, day, week, month and year :param unit: int 1, 3, 5, 10, 30 etc.. (cf. 1 bar = unit * period) :param start: str string of start day in format 'YYYYMMDD' :param end: str string of end day in format 'YYYYMMDD'. if None, until now by default. :param path: str path to save downloaded data :param merge: bool whether to merge data with existing file or to overwrite it :param warning: bool turn on/off the warning message if any :param prev_next: str this param is given by the response from the server. default is '0' """ # Wait for default request limit, 3600 ms QTest.qWait(history.REQUEST_LIMIT_TIME) ctype = history.get_code_type(code) # ctype = 'stock' | 'sector' tr_code = history.get_tr_code(period, ctype) """ Setting args just for once. """ if prev_next == '0': # In case path is '' or None if not path: path = getcwd() # To share variables with Slot kwargs = effective_args(locals(), remove=['ctype', 'tr_code']) self.share.remove_single(name()) self.share.update_single(name(), 'error', False) self.share.update_single(name(), 'restart', False) self.share.update_single(name(), 'complete', False) self.share.update_single(name(), 'impossible', False) # To check format of input dates if 'start' in kwargs: if not history.is_date(start): raise ValueError(f"Given 'start' {start} is not a valid date.") if 'end' in kwargs: if not history.is_date(end): raise ValueError(f"Given 'end' {end} is not a valid date.") """ Check 'start' and 'end' points to save downloading time. """ if merge: try: file = join(path, code + '.csv') col = history.get_datetime_column(period) df = read_csv( file, index_col=[col], parse_dates=[col], encoding=config.ENCODING ) if period in ['tick', 'min']: # Last tick for stock is 15:30 and for sector is 18:00 h, m = (15, 30) if ctype is history.STOCK else (18, 00) # else for sector last_day = date(df.index[-1]) last_tick_of_day = Timestamp(df.index[-1]).replace(hour=h, minute=m) download_completed = last_tick_of_day <= df.index[-1] # To push 'start' date further as much as possible. If None, set newly. if 'start' not in kwargs or date(kwargs['start']) <= last_day: if download_completed: # Start from the day after last day kwargs['start'] = str((last_day + DateOffset(1)).date()).replace('-', '') else: # Start from the last day kwargs['start'] = str(last_day).replace('-', '') # If downloading is not needed, just return if 'end' in kwargs: if download_completed: if date(kwargs['end']) <= last_day: self.share.update_single(name(), 'complete', True) return else: # if period in ['day', 'week', 'year'] last_day = date(df.index[-1]) # To push 'start' date further as much as possible. If None, set newly. if 'start' not in kwargs or date(kwargs['start']) <= last_day: # Start from the last day kwargs['start'] = str(last_day).replace('-', '') # If downloading is not needed, just return if 'end' in kwargs: if date(kwargs['end']) < last_day: self.share.update_single(name(), 'complete', True) return # Once read, use later in Server.history_to_csv() to increase efficiency self.share.update_single(name(), 'file', df) # If any exception, just skip except Exception as err: pass """ Update and print arguments. """ # Done arg setting self.share.update_args(name(), kwargs) # Print args f = lambda key: f"'{kwargs[key]}'" if key in kwargs else None print(f"{{code={f('code')}, start={f('start')}, end={f('end')}, period={f('period')}}}") """ Start downloading. """ # Check requesting status self.share.single['histories']['nrq'] += 1 if history.SPEEDING: if self.share.get_single('histories', 'nrq') >= history.REQUEST_LIMIT_TRY: # Set back to default configuration if self.share.get_single('histories', 'cnt') == 0: self.share.update_single(name(), 'impossible', True) self.share.update_single(name(), 'restart', True) self.api.unloop() return # Finally request data to server for key, val in history.inputs(tr_code, code, unit, end): self.api.set_input_value(key, val) scr_no = self.scr.alloc(tr_code, code) # If comm_rq_data returns non-zero error code, restart downloading if self.api.comm_rq_data(name(), tr_code, prev_next, scr_no) != 0: self.share.update_single(name(), 'impossible', True) self.share.update_single(name(), 'restart', True) self.api.unloop() return # Wait response from the server self.api.loop()
def business_hours(start, end): """Computes the number of working hours between two dates. (There's gotta be a better way to do this.)""" return len(bdate_range(start, end, freq=DateOffset(hours=1)))
class Command(BaseCommand): help = "Run multiple searches and log the cheapest and best value switches" requires_migrations_checks = True base_data = { 'data_mining': True, 'source_market': 'UK', 'place_name': '', 'latitude': '0', 'longitude': '0', 'occupants': '2', 'currency': 'gbp', 'county': '', } base_check_in = datetime.strptime('2017-05-05', '%Y-%m-%d') check_in_range = date_range(base_check_in, base_check_in + DateOffset(days=60)) cities = [ { 'city': 'New York', 'state': 'NY', 'country': 'US', }, { 'city': 'Paradise', 'state': 'NV', 'country': 'US', }, { 'city': 'Austin', 'state': 'TX', 'country': 'US', }, { 'city': 'London', 'state': 'England', 'country': 'GB', }, { 'city': 'Barcelona', 'state': 'CT', 'country': 'ES', }, { 'city': 'Milan', 'state': 'Lombardy', 'country': 'IT', }, { 'city': 'Shanghai', 'state': 'Shanghai', 'country': 'CN', }, { 'city': 'Bangkok', 'state': '', 'country': 'TH', }, { 'city': 'Singapore', 'state': '', 'country': 'SG', }, ] stay_durations = [3, 4, 5, 6] def handle(self, *args, **options): try: os.rename('analysis_output.csv', 'check_file_access.csv') os.rename('check_file_access.csv', 'analysis_output.csv') except OSError: raise Exception( 'Destination file is still open. Please close before running!') all_stays = [] for city in self.cities: for check_in in self.check_in_range: for duration in self.stay_durations: check_out = check_in + DateOffset(days=duration) check_in_range = date_range(check_in, check_out - DateOffset(days=1)) data = self.base_data.copy() data.update({ 'checkIn': check_in, 'checkOut': check_out, 'check_in_range': check_in_range, 'country': city['country'], 'state': city['state'], 'city': city['city'], }) stays = tasks.execute_search(data, '', None) result_count = len(stays) if result_count == 0: continue stays.query('hotel_2_id != -1', inplace=True) grouping_columns = [ 'primary_star_rating', 'min_review_tier' ] stays.sort_values('stay_cost', inplace=True) unrestricted_low_cost_stays = stays.groupby( grouping_columns).nth(0) unrestricted_low_cost_stays['restricted'] = False stays.sort_values('cost_per_quality_unit', inplace=True) unrestricted_best_value_stays = stays.groupby( grouping_columns).nth(0) unrestricted_best_value_stays['restricted'] = False switches_with_both_benchmarks = \ 'entire_stay_cost_1 == entire_stay_cost_1 \ and entire_stay_cost_2 == entire_stay_cost_2' stays.query(switches_with_both_benchmarks, inplace=True) stays.sort_values('stay_cost', inplace=True) restricted_low_cost_stays = stays.groupby( grouping_columns).nth(0) restricted_low_cost_stays['restricted'] = True stays.sort_values('cost_per_quality_unit', inplace=True) restricted_best_value_stays = stays.groupby( grouping_columns).nth(0) restricted_best_value_stays['restricted'] = True scenarios = [ unrestricted_low_cost_stays, unrestricted_best_value_stays, restricted_low_cost_stays, restricted_best_value_stays, ] stays = concat(scenarios) stays.reset_index(inplace=True) stays.drop_duplicates(inplace=True) stays['city'] = city['city'] stays['check_in'] = check_in stays['duration'] = duration stays['result_count'] = result_count all_stays.append(stays) logger.warn('{}, {:%Y-%m-%d}, {}'.format( city['city'], check_in, duration)) stays = concat(all_stays).to_csv('analysis_output.csv', index=False)
def tf(self, d1, d2): """Calculates time fraction (in year fraction) between two dates given day count convention""" d1 = self.adjust(d1) d2 = self.adjust(d2) # Save adjustment state and set it to none, so we can safely use the # days and dib functions of "date splits" we produce in for some # day counts state = self.adj self.adj = None if self.dc == 'ACT/ACT ICMA': raise AttributeError('The time fraction function cannot be used ' 'for the %s convention' % self.dc) if not (self.dc == 'ACT/ACT ISDA' or self.dc == 'ACT/ACT AFB' or self.dc == '1/1'): yf = self.days(d1, d2) / self.dib(d1, d2) elif self.dc == 'ACT/ACT ISDA': # We could treat everything as an array, we leave the dual # implementation because vectorizing is clumsy. So, we just # mimic the interface if isinstance(d1, Timestamp) and isinstance(d2, Timestamp): # We place the assertion here to save some thought in the # recursion (we check one by one or delegate) assert d1 <= d2, 'First date must be smaller or equal to ' \ 'second date' if d1.year == d2.year: yf = self.days(d1, d2) / self.dib(d1, d2) else: ey1 = to_datetime(str(d1.year) + '-12-31') ey2 = to_datetime(str(d2.year - 1) + '-12-31') yf = (d2.year - d1.year - 1) + \ (self.days(d1, ey1) / self.dib(d1, d1)) + \ (self.days(ey2, d2) / self.dib(d2, d2)) else: # This is the dreaded vectorized case that, for now, # will be dealt by simulating the interface result = list() f = result.append for t1, t2 in broadcast(d1, d2): f(self.tf(t1, t2)) yf = asarray(result, dtype='float64') elif self.dc == '1/1': # See notes in the ACT/ACT sections about vectorization if isinstance(d1, Timestamp) and isinstance(d2, Timestamp): # We place the assertion here to save some thought in the # recursion (we check one by one or delegate) assert d1 <= d2, 'First date must be smaller or equal to ' \ 'second date' if (d1.day == d2.day and d1.month == d2.month) \ or (d1.month == 2 and d2.month == 2 and d1.day in [28, 29] and d2.day in [28, 29]): yf = int(0.5 + self.days(d1, d2) / self.dib(d1, d2)) else: # This is the same as ACT/ACT. We tweak the DC and bring # it back. This is computationally costly (as a parsing # of the day count is involved at each step), but safer # from an implementation perspective. self.dc = 'act/act isda' yf = self.tf(d1, d2) self.dc = '1/1' else: # This is the dreaded vectorized case that, for now, # will be dealt by simulating the interface result = list() f = result.append for t1, t2 in broadcast(d1, d2): f(self.tf(t1, t2)) yf = asarray(result, dtype='float64') elif self.dc == 'ACT/ACT AFB': if isinstance(d1, Timestamp) and isinstance(d2, Timestamp): # We place the assertion here to save some thought in the # recursion (we check one by one or delegate) assert d1 <= d2, 'First date must be smaller or equal to ' \ 'second date' # We need to loop back from d2 counting the number of # years we can subtract until we close interval. Note that # every time we fall on a Feb 29th, a year offset will land # us on Feb 28th. In this cases, we need to add the missing # day fraction (1/366). Note that we add it only once, # and not the number of leap days in interval divided by # 366. Why? While the documents are not super clear about # this, it seems reasonable to infer that from the "counting # back" rule, where we are always subtracting entire years. # # 2004-02-28 to 2008-02-27 = 3 + 365/366 # 2004-02-28 to 2008-02-28 = 4 # 2004-02-28 to 2008-02-29 = 4 + 1/366 # 2004-02-28 to 2012-02-28 = 8 # 2004-02-28 to 2012-02-29 = 8 + 1/366 (and NOT 2/366) n = 0 offset = 0 while d2 - DateOffset(years=1) >= d1: if d2.day == 29 and d2.month == 2: offset += 1 / 366 n += 1 d2 = d2 - DateOffset(years=1) yf = n + offset + (self.days(d1, d2) / self.dib(d1, d2)) else: # This is the dreaded vectorized case that, for now, # will be dealt by simulating the interface result = list() f = result.append for t1, t2 in broadcast(d1, d2): f(self.tf(t1, t2)) yf = asarray(result, dtype='float64') else: raise NotImplementedError('Day count %s not supported' % self.dc) # Return state self.adj = state return yf
def get_device_data(self, start_date=None, end_date=None, frequency='1H', clean_na=None): ''' Based on code snippet from Marc Roig: # I2CAT RESEARCH CENTER - BARCELONA - MARC ROIG ([email protected]) ''' std_out(f'Requesting data from Dades Obertes API') std_out(f'Device ID: {self.id}') self.get_device_sensors() self.get_device_location() request = self.API_BASE_URL request += f'codi_eoi={self.id}' if start_date is not None and end_date is not None: request += "&$where=data between " + to_datetime(start_date).strftime("'%Y-%m-%dT%H:%M:%S'") \ + " and " + to_datetime(end_date).strftime("'%Y-%m-%dT%H:%M:%S'") elif start_date is not None: request += "&$where=data >= " + to_datetime(start_date).strftime( "'%Y-%m-%dT%H:%M:%S'") elif end_date is not None: request += "&$where=data < " + to_datetime(end_date).strftime( "'%Y-%m-%dT%H:%M:%S'") try: s = get(request) except: print_exc() std_out('Problem with sensor data from API', 'ERROR') pass return None if s.status_code == 200 or s.status_code == 201: df = read_csv(StringIO(s.content.decode('utf-8'))) else: std_out('API reported {}'.format(s.status_code), 'ERROR') pass return None # Filter columns measures = ['h0' + str(i) for i in range(1, 10)] measures += ['h' + str(i) for i in range(10, 25)] # validations = ['v0' + str(i) for i in range(1,10)] # validations += ['v' + str(i) for i in range(10,25)] new_measures_names = list(range(1, 25)) columns = ['contaminant', 'data'] + measures # + validations try: df_subset = df[columns] df_subset.columns = ['contaminant', 'date'] + new_measures_names except: print_exc() std_out('Problem while filtering columns', 'Error') return None else: std_out('Successful filtering', 'SUCCESS') # Pivot try: df = DataFrame([]) for contaminant in self.sensors.keys(): if contaminant not in df_subset['contaminant'].values: std_out(f'{contaminant} not in columns. Skipping', 'WARNING') continue df_temp = df_subset.loc[ df_subset['contaminant'] == contaminant].drop( 'contaminant', 1).set_index('date').unstack().reset_index() df_temp.columns = ['hours', 'date', contaminant] df_temp['date'] = to_datetime(df_temp['date']) timestamp_lambda = lambda x: x['date'] + DateOffset(hours=int( x['hours'])) df_temp['date'] = df_temp.apply(timestamp_lambda, axis=1) df_temp = df_temp.set_index('date') df[contaminant] = df_temp[contaminant] except: # print_exc() std_out('Problem while filtering columns', 'Error') pass return None else: std_out('Successful pivoting', 'SUCCESS') df.index = to_datetime(df.index).tz_localize('UTC').tz_convert( self.location) df.sort_index(inplace=True) # Rename try: df.rename(columns=self.sensors, inplace=True) except: # print_exc() std_out('Problem while renaming columns', 'Error') pass return None else: std_out('Successful renaming', 'SUCCESS') # Clean df = df[~df.index.duplicated(keep='first')] # Drop unnecessary columns df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True) # Check for weird things in the data df = df.apply(to_numeric, errors='coerce') # Resample df = df.resample(frequency).mean() try: df = df.reindex(df.index.rename('Time')) df = clean(df, clean_na, how='all') # if clean_na is not None: # if clean_na == 'drop': # # std_out('Cleaning na with drop') # df.dropna(axis = 0, how='all', inplace=True) # elif clean_na == 'fill': # df = df.fillna(method='bfill').fillna(method='ffill') # # std_out('Cleaning na with fill') self.data = df except: std_out('Problem closing up the API dataframe', 'ERROR') pass return None std_out(f'Device {self.id} loaded successfully from API', 'SUCCESS') return self.data
def test_both_offset_observance_raises(self): with self.assertRaises(NotImplementedError) as cm: h = Holiday("Cyber Monday", month=11, day=1, offset=[DateOffset(weekday=SA(4))], observance=next_monday)
def scan_price(gdax, product, alert_method, low_notify=None, high_notify=None, interval=60, end_time=None, change_rate=.015, alert_addr=None): """ Scans a GdaxProduct's orderbook regularly evaluating ask prices and sending notifications when price hits a high or low target. This function is designed to run for days, weeks, and months at a time as it makes adjustments to target high and low prices as they're reached. :param gdax: (gdax.api.Gdax) The Gdax API object to be used when calling. :param product: (str) ETH-USD, LTC-USD, BTC-USD :param alert_method: (callable) Should be a method that can be called like so: alert_method(alert_addr, text) :param low_notify: (numeric, default None) The lower trigger price to alert on. None will default to: price - (price * change_rate) This value is decreased by the change rate when reached. This value is increased by the change rate when the high_notify price is reached. :param high_notify: (numeric, default None) The upper trigger price to alert on. None will default to: price + (price * change_rate) This value is increased by the change rate when reached. This value is decreased by the change rate when the low_notify price is reached. :param interval: (int, default 60) Number of seconds between order book scans. :param end_time: (DateTime, default None) The date/time to stop scanning the books None will default to 99 weeks from function start time. :param change_rate: (float, default 0.015) The rate at which the high_notify & low_notify target values change as the price moves. :return: None """ from time import sleep from datetime import datetime from pandas import DateOffset if alert_addr is None: alert_addr = '*****@*****.**' both_flags = all((low_notify, high_notify)) if not both_flags: prod = gdax.get_product(product) p = prod.price if low_notify is None: low = p - (p * change_rate) low_notify = round(low, 2) if high_notify is None: high = p + (p * change_rate) high_notify = round(high, 2) if end_time is None: end_time = datetime.now() + DateOffset(weeks=99) ask_store, last_ping, i = list(), None, 0 print("Initializing {} price scan " "looking for low {} " "and high {}".format(product, low_notify, high_notify)) # This value is increased by ~30% # every time a notification is sent. # It represents how many minutes to wait before # re-sending the same notification wait_time = 5 while True: i += 1 now = datetime.now() try: book = gdax.get_book(product, level=1) except Exception as e: sleep(interval) print(e) continue asks = book['asks'] lowest_ask = float(asks[0][0]) diff_high = round(high_notify - lowest_ask, 2) diff_low = round(low_notify - lowest_ask, 2) pct_away_high = round(100 - ((lowest_ask / high_notify) * 100), 2) pct_away_low = round(100 - ((lowest_ask / low_notify) * 100), 2) meets_low = (low_notify and lowest_ask <= low_notify) meets_high = (high_notify and lowest_ask >= high_notify) meets_criteria = meets_low or meets_high # This information should probably somehow get stored? # NOTE: the program is threaded so it'd need to generate it's own # SQL connection ....or open and write to a file every 50 lines or something #ask_store.append([now, lowest_ask, diff_high, pct_away_high, diff_low, pct_away_low]) msg = 'Price: ${} -' \ 'Target: ${} - away: ${}/{}% -' \ 'Stop: ${}- away: ${}/%{}'.format(lowest_ask, high_notify, diff_high, pct_away_high, low_notify, diff_low, pct_away_low) msg = '{}: ({}-{})\n{}'.format(str(now)[:19], i, product, msg) print(msg) print("\n") if meets_criteria: send_msg = True m = int(wait_time) min_time = now - DateOffset(minutes=m) if last_ping and last_ping > min_time: send_msg = False if send_msg: alert_method(alert_addr, msg) last_ping = now # Increase/decrease the target price by change_rate if meets_low: v = round(low_notify * change_rate, 2) low_notify -= v high_notify -= v if meets_high: v = round(high_notify * change_rate, 2) high_notify += v low_notify += v if now >= end_time: break sleep(interval) return ask_store
def setup_management_options(folder, crop, config, gridpoint, gridLut, shortcut, rulesList, soil): ''' Setup management rules based on grid cell. REMEMBER: order matters! ''' # reset water, nitrogen, and surfaceOM rule #manager.reset_on_sowing(folder, crop, soilmodule=soil.get('name')) # set dates from config file sowStart = config.sowStart sowEnd = config.sowEnd # if sowStart is 'auto', set by location from provided lookup table if sowStart == 'auto': sowStart = gridLut['sow_start'][gridpoint] # if sowEnd is 'auto', set by location from provided lookup table if sowEnd == 'auto': sowEnd = gridLut['sow_end'][gridpoint] # end crop on fixed date rule # removes any crop that may be left in the groud 2 days before sowing endDate = datetime.strptime(sowStart, '%d-%b') - DateOffset(2) endDate = datetime.strftime(endDate, '%d-%b') manager.end_crop_on_fixed_date_rule(folder, crop, endDate) #--------------------------------Hamze added this and this adds the biochar module manager.BiocharApplication_rule(folder, AppDate=config.BAD, BAR=config.BAR) if crop == 'maize': # sowing rule if sowEnd == '': manager.sowOnFixedDate_rule(folder, crop, date=sowStart,\ density=config.density,\ depth=config.depth,\ cultivar=config.cultivar,\ gclass=config.gclass,\ row_spacing=config.rowSpacing) else: manager.sowUsingAVariable_rule(folder, crop, start_date=sowStart,\ end_date=sowEnd,\ density=config.density,\ depth=config.depth,\ cultivar=config.cultivar,\ gclass=config.gclass,\ row_spacing=config.rowSpacing) elif crop == 'cotton': # sowing rule if sowEnd == '': manager.cotton_fixed_date_sowing_rule(folder, crop, date=sowStart) else: manager.cotton_sowing_rule(folder, crop, start_date=sowStart,\ end_date=sowEnd) # add each shared rule to grid point for rulename, ruleType in rulesList: apsim.new_management_rule(folder, rulename, ruleType, shortcut)
self.lines.datetime[0] = d self.lines.openinterest[0] = 0.0 self.lines.open[0] = o self.lines.high[0] = h self.lines.low[0] = l self.lines.close[0] = c self.lines.volume[0] = v return True if __name__ == "__main__": from stocklook.utils.timetools import today from pandas import DateOffset start = today() - DateOffset(years=1) end = today() period = 2 * 60 * 60 cerebro = bt.Cerebro() cerebro.addstrategy(SmaCross) data0 = PoloniexDataFeed(dataname='BTC_LTC', fromdate=start, todate=end, period=period) cerebro.adddata(data0) cerebro.run() cerebro.plot()
from pandas import DataFrame, DateOffset from weaverbird.backends.pandas_executor.types import DomainRetriever, PipelineExecutor from weaverbird.exceptions import DuplicateError from weaverbird.pipeline.steps import EvolutionStep OFFSETS = { 'vsLastYear': DateOffset(years=1), 'vsLastMonth': DateOffset(months=1), 'vsLastWeek': DateOffset(weeks=1), 'vsLastDay': DateOffset(days=1), } def execute_evolution( step: EvolutionStep, df: DataFrame, domain_retriever: DomainRetriever = None, execute_pipeline: PipelineExecutor = None, ) -> DataFrame: new_column = step.new_column or f'{step.value_col}_EVOL_{step.evolution_format.upper()}' df = df.reset_index(drop=True) id_cols = [step.date_col] + step.index_columns if df.set_index(id_cols).index.duplicated().any(): raise DuplicateError( 'Multiple rows for the same date. Did you forget indexColumns?') date_col_offseted = df[step.date_col] + OFFSETS[step.evolution_type] df_offseted = df.assign(**{step.date_col: date_col_offseted}) both = df.merge(df_offseted,
def process_funcionario(pfunc: DataFrame, ppessoa: DataFrame, psecao: DataFrame, pfperff: DataFrame, pfemprt: DataFrame, pparam: DataFrame, pffinanc: DataFrame) -> DataFrame: """ TODO: doc string """ _pfperff_group_by_codcoligada_chapa_ano_mes_comp = (pfperff.assign( liquido=lambda df: df['liquido'].astype(float)).groupby( by=['codcoligada', 'chapa', 'anocomp', 'mescomp' ])['liquido'].sum().reset_index()) _pfemprt_group_by_codcoligada_chapa = (pfemprt.assign( saldodevedor=lambda df: df['saldodevedor'].astype(float)).groupby( by=['codcoligada', 'chapa'])['saldodevedor'].sum().reset_index()) _pffinanc_group_by_ano_mes_comp = pffinanc.groupby( by=['anocomp', 'mescomp', 'chapa', 'codcoligada' ])['valor'].count().reset_index() _pparam_anterior = (pparam.assign(_datacompanterior=lambda df: to_datetime( df['mescomp'].astype(str) + '-' + df['anocomp'].astype(str), format='%m-%Y') - DateOffset(months=1)).assign( anocomp=lambda df: df['_datacompanterior'].dt.year).assign( mescomp=lambda df: df['_datacompanterior'].dt.month)[[ 'anocomp', 'mescomp', 'codcoligada' ]]) # Funcionários ativos devem utilizar a competência anterior _func_ativo = (pfunc.merge( ppessoa, left_on=['codpessoa'], right_on=['codigo'], how='inner').merge(psecao, left_on=['codcoligada', 'codsecao'], right_on=['codcoligada', 'codigo'], how='inner').merge( _pparam_anterior, left_on=['codcoligada'], right_on=['codcoligada'], how='inner').query('codsituacao != "D"')[[ 'cgc', 'chapa', 'dataadmissao', 'cpf', 'datademissao', 'nome', 'codsituacao', 'telefone1', 'codcoligada', 'salario', 'anocomp', 'mescomp' ]]) # Funcionários demitidos na competência atual devem utilizar a competência anterior _func_demitido_atual = ( pfunc.merge( ppessoa, left_on=['codpessoa'], right_on=['codigo'], how='inner').merge( psecao, left_on=['codcoligada', 'codsecao'], right_on=['codcoligada', 'codigo'], how='inner').merge( pparam, left_on=['codcoligada'], right_on=['codcoligada'], how='inner').rename( { 'anocomp': 'demissao_anocomp', 'mescomp': 'demissao_mescomp' }, axis=1).reset_index().query('codsituacao == "D"'). assign(_datademissao=lambda df: to_datetime( df['datademissao'], format='%Y-%m-%dT%H:%M:%S.%f' )).query( '_datademissao.dt.month == demissao_mescomp & _datademissao.dt.year == demissao_anocomp' ).merge(_pparam_anterior, left_on=['codcoligada'], right_on=['codcoligada'], how='inner')[[ 'cgc', 'chapa', 'dataadmissao', 'cpf', 'datademissao', 'nome', 'codsituacao', 'telefone1', 'codcoligada', 'salario', 'anocomp', 'mescomp' ]]) # Funcionários demitidos na competência anterior devem utilizar a mesma _func_demitido = (pfunc.merge( ppessoa, left_on=['codpessoa'], right_on=['codigo'], how='inner' ).merge( psecao, left_on=['codcoligada', 'codsecao'], right_on=['codcoligada', 'codigo'], how='inner' ).query( 'codsituacao == "D"' ).merge( _pparam_anterior, left_on=['codcoligada' ], right_on=[ 'codcoligada' ], how='inner' ).assign( _datademissao=lambda df: to_datetime(df['datademissao'], format='%Y-%m-%dT%H:%M:%S.%f') ).query( '_datademissao.dt.month == mescomp & _datademissao.dt.year == anocomp') [[ 'cgc', 'chapa', 'dataadmissao', 'cpf', 'datademissao', 'nome', 'codsituacao', 'telefone1', 'codcoligada', 'salario', 'anocomp', 'mescomp' ]]) return (concat( [_func_ativo, _func_demitido, _func_demitido_atual], ignore_index=True).drop_duplicates().reset_index(drop=True).merge( _pffinanc_group_by_ano_mes_comp, left_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'], right_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'], how='inner').merge( _pfperff_group_by_codcoligada_chapa_ano_mes_comp, left_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'], right_on=['codcoligada', 'chapa', 'anocomp', 'mescomp'], how='left').merge( _pfemprt_group_by_codcoligada_chapa, left_on=['codcoligada', 'chapa'], right_on=['codcoligada', 'chapa'], how='left').assign(emprestimoexterno=lambda df: ~df[ 'saldodevedor'].isna() & df['saldodevedor'] > 0.0). assign(cnpj=lambda df: df['cgc'].str.replace(r'\.|\/|\-', '')). assign(codrecisaorais=lambda df: None).assign( consignavel=lambda df: (df['liquido'] * 0.3).round(2))[[ 'cnpj', 'chapa', 'dataadmissao', 'cpf', 'datademissao', 'consignavel', 'emprestimoexterno', 'nome', 'salario', 'codsituacao', 'telefone1', 'codrecisaorais' ]].rename( { 'dataadmissao': 'admissao', 'datademissao': 'demissao', 'codpessoa': 'chavefuncionario', 'codsituacao': 'situacaofuncionario', 'telefone1': 'telefone', 'chapa': 'matriculafuncionario', 'cpf': 'cpffuncionario' }, axis=1))
#gw3 = gw2.reset_index() # #mon_gw1 = grp_ts_agg(gw3, 'site', 'time', 'M').median().reset_index() #mon_gw1['mon'] = mon_gw1.time.dt.month #mon_gw1['mtype'] = 'gw' ### Combine all mtypes #mon_summ = concat([mon_flow1, mon_precip1, mon_gw1]).reset_index(drop=True) mon_summ = concat([mon_flow1, mon_precip1]).reset_index(drop=True) ############################################### #### Pull out recent monthly data from hydrotel now1 = to_datetime(param.date_now) start_date = now1 - DateOffset(months=param.n_previous_months) - DateOffset( days=now1.day - 1) end_date = now1 - DateOffset(days=now1.day - 1) ### SW print('Getting HydroTel Flow Data:') sites2 = sites1.copy() sites2.loc[sites2.site.isin([64610, 65104, 68526]), 'site'] = [164610, 165104, 168526] hy_sites = sites2.site.astype(str).tolist() hy1 = get_ts_data(param.hydrotel_server, param.hydrotel_database, 'flow', hy_sites,
def test_time_offsets_accuracy(self): payload = get_query_context("birth_names") payload["queries"][0]["metrics"] = ["sum__num"] payload["queries"][0]["groupby"] = ["state"] payload["queries"][0]["is_timeseries"] = True payload["queries"][0]["timeseries_limit"] = 5 payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1980 : 1991" payload["queries"][0]["granularity"] = "ds" payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get main query dataframe df = query_result.df # set time_offsets to query_object payload["queries"][0]["time_offsets"] = ["3 years ago", "3 years later"] query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] time_offsets_obj = query_context.processing_time_offsets(df, query_object) df_with_offsets = time_offsets_obj["df"] df_with_offsets = df_with_offsets.set_index(["__timestamp", "state"]) # should get correct data when apply "3 years ago" payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1977 : 1988" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get df for "3 years ago" df_3_years_ago = query_result.df df_3_years_ago["__timestamp"] = df_3_years_ago["__timestamp"] + DateOffset( years=3 ) df_3_years_ago = df_3_years_ago.set_index(["__timestamp", "state"]) for index, row in df_with_offsets.iterrows(): if index in df_3_years_ago.index: assert ( row["sum__num__3 years ago"] == df_3_years_ago.loc[index]["sum__num"] ) # should get correct data when apply "3 years later" payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1983 : 1994" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get df for "3 years later" df_3_years_later = query_result.df df_3_years_later["__timestamp"] = df_3_years_later["__timestamp"] - DateOffset( years=3 ) df_3_years_later = df_3_years_later.set_index(["__timestamp", "state"]) for index, row in df_with_offsets.iterrows(): if index in df_3_years_later.index: assert ( row["sum__num__3 years later"] == df_3_years_later.loc[index]["sum__num"] )
def get_bacen_indices(): import pandas as pd import pymysql as db import datetime import logging from pandas import DateOffset from dependencias.Metodos.funcoes_auxiliares import full_path_from_database from findt import FinDt from pandas import ExcelWriter logger = logging.getLogger(__name__) #----Declaração de constantes logger.info("Conectando no Banco de dados") connection = db.connect('localhost', user='******', passwd='root', db='projeto_inv') logger.info("Conexão com DB executada com sucesso") var_path = full_path_from_database( "feriados_nacionais") + "feriados_nacionais.csv" save_path = full_path_from_database("get_bacen_indices") #----Input incremental baixado robos_diarios_bacen # Fazer a query do que foi baixado pelo robo_diario_bacen query = 'SELECT * FROM projeto_inv.bacen_series;' bacen_series = pd.read_sql(query, con=connection) logger.info("Leitura do banco de dados executada com sucesso") logger.info("Tratando dados") # Retira duplicatas bacen_series = bacen_series.sort(['codigo', 'data_referencia', 'data_bd'], ascending=[True, True, False]) bacen_series = bacen_series.drop_duplicates( subset=['codigo', 'data_referencia'], take_last=False) # Separa em dataframes de dias e meses bacen_series_mensal = bacen_series[bacen_series.frequencia.isin( ['M'])].copy() bacen_series_diario = bacen_series[bacen_series.frequencia.isin( ['D'])].copy() # Retira duplicatas bacen_series_mensal = bacen_series_mensal.drop_duplicates( subset=['codigo'], take_last=True) # Apenda com o dataframe diário bacen_series = bacen_series_mensal.append(bacen_series_diario) #Seleciona apenas IPCA, IGPM, CDI e TR bacen_series = bacen_series[(bacen_series.codigo == 256) | (bacen_series.codigo == 433) | (bacen_series.codigo == 189) | (bacen_series.codigo == 4389) | (bacen_series.codigo == 7811)] bacen_series['indice'] = None #IPCA - Periodocidade Mensal; Composição Mensal bacen_series['indice'][bacen_series.codigo == 433] = 'IPCA' #IGPM - - Periodocidade Mensal; Composição Mensal bacen_series['indice'][bacen_series.codigo == 189] = 'IGP' #CDI - - Periodocidade Diária; Composição Anual bacen_series['indice'][bacen_series.codigo == 4389] = 'DI1' #TR - - Periodocidade Diária; Composição Mensal bacen_series['indice'][bacen_series.codigo == 7811] = 'TR' #TJLP - - Periodocidade Diária; Composição Mensal bacen_series['indice'][bacen_series.codigo == 256] = 'TJLP' bacen_series['data_referencia'] = bacen_series['data_referencia'].astype( str) bacen_series['ano'] = bacen_series['data_referencia'].str[0:4].astype(int) bacen_series['mes'] = bacen_series['data_referencia'].str[5:7].astype(int) bacen_series['dia'] = bacen_series['data_referencia'].str[8:10].astype(int) bacen_series['dt_ref'] = pd.to_datetime( bacen_series['data_referencia']).dt.date del bacen_series['codigo'] del bacen_series['frequencia'] del bacen_series['nome'] del bacen_series['data_bd'] del bacen_series['id_bacen_series'] del bacen_series['data_referencia'] #----ATUALIZAÇÃO ÍNDICES - CARREGAMENTO HISTÓRICO logger.info("Atualizando índices") #Fazer a query do que foi baixado pelo robo_diario_anbima_projecoes query = 'SELECT * FROM projeto_inv.bacen_series_hist;' bacen_series_hist = pd.read_sql(query, con=connection) logger.info("Leitura do banco de dados executada com sucesso") bacen_series_hist = bacen_series_hist.sort(['indice', 'dt_ref', 'data_bd'], ascending=[True, True, False]) bacen_series_hist = bacen_series_hist.drop_duplicates( subset=['indice', 'dt_ref'], take_last=False) del bacen_series_hist['id_bc_series_hist'] del bacen_series_hist['data_bd'] horario_bd = datetime.datetime.now() #----ATUALIZAÇÃO ÍNDICES - APPEND DAS INFO NOVAS DAS SERIES BACEN bacen_series_hist = bacen_series_hist.append(bacen_series) bacen_series_hist = bacen_series_hist.sort(['indice', 'dt_ref'], ascending=[True, True]) bacen_series_hist = bacen_series_hist.drop_duplicates( subset=['indice', 'dt_ref'], take_last=False) #----------CRIAÇÃO SÉRIE DIÁRIA logger.info("Criando séries diária") #Seleciona o última dia do mês vigente mesfim = datetime.date.today().month + 1 fim = datetime.date(datetime.date.today().year, mesfim, 1) - DateOffset( months=0, days=1) dt_ref = pd.date_range(start='01/01/1996', end=fim, freq='D').date ano = pd.date_range(start='01/01/1996', end=fim, freq='D').year mes = pd.date_range(start='01/01/1996', end=fim, freq='D').month dias = pd.date_range(start='01/01/1996', end=fim, freq='D').day serie_dias = pd.DataFrame(columns=['dt_ref', 'ano', 'mes', 'dia']) serie_dias['dt_ref'] = dt_ref serie_dias['ano'] = ano serie_dias['mes'] = mes serie_dias['dia'] = dias #identificar se é dia útil dt_max = max(serie_dias['dt_ref']) dt_min = min(serie_dias['dt_ref']) per = FinDt.DatasFinanceiras(dt_min, dt_max, path_arquivo=var_path) du = pd.DataFrame(columns=['dt_ref']) du['dt_ref'] = per.dias(3) du['du_1'] = 1 serie_dias = serie_dias.merge(du, on=['dt_ref'], how='left') serie_dias['du_1'] = serie_dias['du_1'].fillna(0) serie_dias['dc_1'] = 1 #calculo de dias corridos por mes serie_dias_group_count = serie_dias[['dt_ref', 'ano', 'mes']].groupby(['ano', 'mes' ]).agg(['count']) serie_dias_group_count = serie_dias_group_count.reset_index(level=None, drop=False, inplace=False, col_level=0, col_fill='') serie_dias_group_count_filter = pd.DataFrame(columns=['ano', 'mes', 'dc']) serie_dias_group_count_filter['ano'] = serie_dias_group_count['ano'] serie_dias_group_count_filter['mes'] = serie_dias_group_count['mes'] serie_dias_group_count_filter['dc'] = serie_dias_group_count['dt_ref'] serie_dias = serie_dias.merge(serie_dias_group_count_filter, on=['ano', 'mes'], how='left') #calculo de dias uteis por mes serie_dias_group_sum = serie_dias[['du_1', 'ano', 'mes']].groupby(['ano', 'mes']).agg(['sum']) serie_dias_group_sum = serie_dias_group_sum.reset_index(level=None, drop=False, inplace=False, col_level=0, col_fill='') serie_dias_group_sum_filter = pd.DataFrame(columns=['ano', 'mes', 'du']) serie_dias_group_sum_filter['ano'] = serie_dias_group_sum['ano'] serie_dias_group_sum_filter['mes'] = serie_dias_group_sum['mes'] serie_dias_group_sum_filter['du'] = serie_dias_group_sum['du_1'] serie_dias = serie_dias.merge(serie_dias_group_sum_filter, on=['ano', 'mes'], how='left') #----------CRIAÇÃO BASE DIÁRIA logger.info("Criando bases diárias") #----IPCA ipca = bacen_series_hist[['mes', 'ano', 'valor', 'indice' ]][bacen_series_hist.indice == 'IPCA'].copy() serie_dias_ipca = serie_dias.merge(ipca, on=['mes', 'ano'], how='left') #Taxas acumuladas serie_dias_ipca['fator_dia_du'] = ( 1 + serie_dias_ipca['du_1'] * serie_dias_ipca['valor'] / 100)**( 1 / serie_dias_ipca['du']) serie_dias_ipca['fator_dia_dc'] = ( 1 + serie_dias_ipca['dc_1'] * serie_dias_ipca['valor'] / 100)**( 1 / serie_dias_ipca['dc']) serie_dias_ipca['fator_acum_du'] = serie_dias_ipca[[ 'indice', 'fator_dia_du' ]].groupby(['indice']).agg(['cumprod']) serie_dias_ipca['fator_acum_dc'] = serie_dias_ipca[[ 'indice', 'fator_dia_dc' ]].groupby(['indice']).agg(['cumprod']) #----IGPM igpm = bacen_series_hist[['mes', 'ano', 'valor', 'indice' ]][bacen_series_hist.indice == 'IGP'].copy() serie_dias_igpm = serie_dias.merge(igpm, on=['mes', 'ano'], how='left') #Taxas acumuladas serie_dias_igpm['fator_dia_du'] = ( 1 + serie_dias_igpm['du_1'] * serie_dias_igpm['valor'] / 100)**( 1 / serie_dias_igpm['du']) serie_dias_igpm['fator_dia_dc'] = ( 1 + serie_dias_igpm['dc_1'] * serie_dias_igpm['valor'] / 100)**( 1 / serie_dias_igpm['dc']) serie_dias_igpm['fator_acum_du'] = serie_dias_igpm[[ 'indice', 'fator_dia_du' ]].groupby(['indice']).agg(['cumprod']) serie_dias_igpm['fator_acum_dc'] = serie_dias_igpm[[ 'indice', 'fator_dia_dc' ]].groupby(['indice']).agg(['cumprod']) #----CDI cdi = bacen_series_hist[['dia', 'mes', 'ano', 'valor', 'indice' ]][bacen_series_hist.indice == 'DI1'].copy() serie_dias_cdi = serie_dias.merge(cdi, on=['dia', 'mes', 'ano'], how='left') serie_dias_cdi['indice'] = serie_dias_cdi['indice'].fillna('DI1') serie_dias_cdi['valor'] = serie_dias_cdi['valor'].fillna(0) #Taxas acumuladas serie_dias_cdi['fator_dia_du'] = ( 1 + serie_dias_cdi['du_1'] * serie_dias_cdi['valor'] / 100)**(1 / 252) serie_dias_cdi['fator_dia_dc'] = None serie_dias_cdi['fator_acum_du'] = serie_dias_cdi[[ 'indice', 'fator_dia_du' ]].groupby(['indice']).agg(['cumprod']) serie_dias_cdi['fator_acum_dc'] = None #----TR tr = bacen_series_hist[['mes', 'ano', 'valor', 'indice' ]][bacen_series_hist.indice == 'TR'].copy() serie_dias_tr = serie_dias.merge(tr, on=['mes', 'ano'], how='left') #----TJLP tjlp = bacen_series_hist[['mes', 'ano', 'valor', 'indice' ]][bacen_series_hist.indice == 'TJLP'].copy() serie_dias_tjlp = serie_dias.merge(tjlp, on=['mes', 'ano'], how='left') #Taxas acumuladas serie_dias_tr['fator_dia_du'] = ( 1 + serie_dias_tr['du_1'] * serie_dias_tr['valor'] / 100)**( 1 / serie_dias_tr['du']) serie_dias_tr['fator_dia_dc'] = ( 1 + serie_dias_tr['dc_1'] * serie_dias_tr['valor'] / 100)**( 1 / serie_dias_tr['dc']) serie_dias_tr['fator_acum_du'] = serie_dias_tr[['indice', 'fator_dia_du']].groupby([ 'indice' ]).agg(['cumprod']) serie_dias_tr['fator_acum_dc'] = serie_dias_tr[['indice', 'fator_dia_dc']].groupby([ 'indice' ]).agg(['cumprod']) serie_dias_indices = serie_dias_ipca.copy() serie_dias_indices = serie_dias_indices.append(serie_dias_igpm) serie_dias_indices = serie_dias_indices.append(serie_dias_cdi) serie_dias_indices = serie_dias_indices.append(serie_dias_tr) serie_dias_indices = serie_dias_indices.append(serie_dias_tjlp) serie_dias_indices = serie_dias_indices[ serie_dias_indices.fator_dia_du.notnull()] serie_dias_indices = serie_dias_indices.reset_index(level=None, drop=True, inplace=False, col_level=0, col_fill='') writer = ExcelWriter(save_path + 'serie_dias_indices.xlsx') serie_dias_indices.to_excel(writer, 'Todos') serie_dias_ipca.to_excel(writer, 'IPCA') serie_dias_igpm.to_excel(writer, 'IGPM') serie_dias_cdi.to_excel(writer, 'DI') serie_dias_tr.to_excel(writer, 'TR') serie_dias_tjlp.to_excel(writer, 'TJLP') writer.save() serie_dias_indices['data_bd'] = horario_bd #----------VERIFICAÇÃO PARA CRIAR A TABELA INCREMENTAL query = 'SELECT * FROM projeto_inv.bacen_series_fatores;' bc_series = pd.read_sql(query, con=connection) logger.info("Leitura do banco de dados executada com sucesso") bc_series = bc_series[['indice', 'dt_ref']].copy() bc_series['marker'] = 1 serie_dias_indices = serie_dias_indices.merge(bc_series, on=['indice', 'dt_ref'], how='left') serie_dias_indices = serie_dias_indices[ serie_dias_indices.marker.isnull()].copy() del serie_dias_indices['marker'] logger.info("Salvando base de dados - Tabela bacen_series_fatores") #Salvar no MySQL pd.io.sql.to_sql(serie_dias_indices, name='bacen_series_fatores', con=connection, if_exists='append', flavor='mysql', index=0) #Fecha conexao connection.close()
def test_is_scalar_pandas_scalars(self): assert is_scalar(Timestamp("2014-01-01")) assert is_scalar(Timedelta(hours=1)) assert is_scalar(Period("2014-01-01")) assert is_scalar(Interval(left=0, right=1)) assert is_scalar(DateOffset(days=1))
Parameters ---------- other : holiday calendar inplace : bool (default=False) If True set rule_table to holidays, else return array of Holidays """ holidays = self.merge_class(self, other) if inplace: self.rules = holidays else: return holidays USMemorialDay = Holiday( "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1)) ) USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1))) USColumbusDay = Holiday( "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2)) ) USThanksgivingDay = Holiday( "Thanksgiving", month=11, day=1, offset=DateOffset(weekday=TH(4)) ) USMartinLutherKingJr = Holiday( "Martin Luther King Jr. Day", start_date=datetime(1986, 1, 1), month=1, day=1, offset=DateOffset(weekday=MO(3)), )
observance=previous_friday, ) # New Year's Day LSENewYearsDay = Holiday( "New Year's Day", month=1, day=1, observance=weekend_to_monday, ) # Early May bank holiday MayBank = Holiday( "Early May Bank Holiday", month=5, offset=DateOffset(weekday=MO(1)), day=1, ) # Spring bank holiday SpringBank = Holiday( "Spring Bank Holiday", month=5, day=31, offset=DateOffset(weekday=MO(-1)), ) # Summer bank holiday SummerBank = Holiday( "Summer Bank Holiday", month=8,
"Australia Day", month=1, day=26, start_date=Timestamp("1994-01-01"), observance=weekend_to_monday, ) # prior to 1993 the holiday was observed on the Monday # following, or on, the 26th of January AustraliaDayPre88 = Holiday( "Australia Day", month=1, day=26, start_date=Timestamp("1960-01-01"), end_date=Timestamp("1987-12-31"), offset=DateOffset(weekday=MO(1)), ) # The 1988 Bi-Centennial celebrations saw an extra holiday # and Australia Day observed on the actual date AustraliaDay1988 = Holiday( "Australia Day", month=1, day=26, start_date=Timestamp("1988-01-01"), end_date=Timestamp("1988-12-31"), ) # ASX did not close for Australia Day in 1993 since # States observed different dates prior to 1994 AustraliaDayPost88Pre93 = Holiday( "Australia Day", month=1,
NewYearsHolidayJan2 = Holiday( "New Year's Holiday (Jan 2)", month=1, day=2, ) NewYearsHolidayJan3 = Holiday( "New Year's Holiday (Jan 3)", month=1, day=3, ) ComingOfAgeDay = Holiday( "Coming of Age Day", month=1, day=1, offset=DateOffset(weekday=MO(2)), ) NationalFoundationDay = Holiday( "National Foundation Day", month=2, day=11, observance=sunday_to_monday, ) # The dates on which the vernal/autumnal equinox will be observed # are announced on the first weekday of February of the previous # year, so we treat them as ad-hoc holidays, even though they # occur every year. For more info, see: # https://en.wikipedia.org/wiki/Public_holidays_in_Japan#cite_note-3 # For the list of equinoxes going back to 2000, see:
def execute_search(criteria, session_key, reply_channel): run_from_management_command = criteria.get('data_mining') # Check-in range pre-calculated when running analytics if not run_from_management_command: # pragma: no cover check_in = datetime.strptime(criteria['checkIn'], '%Y-%m-%d') check_out = datetime.strptime(criteria['checkOut'], '%Y-%m-%d') criteria['check_in_range'] = date_range(check_in, check_out - DateOffset(days=1)) night_count = len(criteria['check_in_range']) outbound_message = { 'status': '200', 'currency': criteria['currency'], 'currency_symbol': settings.CURRENCY_SYMBOLS[criteria['currency']], 'country': criteria['country'], # Blank if not country search 'night_count': night_count, } try: criteria['city'] = unquote(criteria['city']) criteria['county'] = unquote(criteria['county']) criteria['state'] = unquote(criteria['state']) criteria['country'] = unquote(criteria['country']) if criteria['country'] in settings.BLOCKED_COUNTRIES: # We no longer permit searches for certain high-risk countries due # to high levels of attempted fraud. We block them in the front-end # but have this additional safeguard in case they are smart enough # to edit the URL directly (and another later in case they figure # out to submit a search without country parameter, but better to # catch them as early as possible) logger.error( 'Someone tried searching for a blocked country {} via results URL' .format(criteria['country'])) raise Exception _, stays = execute.search(criteria) if not run_from_management_command: # pragma: no cover search_key = utils.create_session_key( unquote(criteria['place_name']), criteria['checkIn'], criteria['checkOut'], criteria['occupants'], criteria['latitude'], criteria['longitude'], criteria['currency'], ) # Store complete record (including lengthy rateKey information) for # later use in stay detail view http_session = SessionStore(session_key=session_key) http_session[search_key] = { 'stays': stays.to_json(), 'timestamp': datetime.now().strftime('%Y-%m-%dT%H:%M:%S') } http_session.save() fields_required_on_results_page = [ 'default_sort', 'hotel_1_id', 'check_in_1', 'night_count_1', 'entire_stay_cost_1', 'hotel_2_id', 'night_count_2', 'entire_stay_cost_2', 'switch_count', 'distance_in_km', 'rounded_stay_cost', 'rounded_nightly_cost', 'benchmark_stay_cost', 'primary_star_rating', 'review_score', 'min_review_tier', 'primary_review_tier', 'refundable', ] required_fields_only_present_in_multi_night_search = [ 'check_in_2', 'cost_delta_vs_stay_benchmark', 'percentage_cost_delta_vs_stay_benchmark', 'switch_benefit', ] if stays['switch_count'].max() > 0: # pragma: no cover fields_required_on_results_page = \ fields_required_on_results_page + required_fields_only_present_in_multi_night_search max_saving = abs(stays['percentage_cost_delta_vs_stay_benchmark'].min()) if max_saving >= 0.3: log_max_saving(criteria, max_saving) if run_from_management_command: # pragma: no cover # Hotel info not required; pass back to calling command fields_required_for_data_mining = ['stay_cost', 'cost_per_quality_unit'] fields_required_on_results_page = \ fields_required_on_results_page + fields_required_for_data_mining return stays[fields_required_on_results_page] outbound_message['stays'] = \ stays[fields_required_on_results_page].to_json(orient='records') hotel_id_columns = stays.columns.str.contains('hotel_[\d]_id') hotel_ids = melt(stays.loc[:, hotel_id_columns]).dropna()['value'].unique() hotels = Hotel.objects.filter(hotel_id__in=hotel_ids).select_related().iterator() hotels = [{ 'hotel_id': str(hotel.hotel_id), # String required for use as key 'name': hotel.name, 'star_rating': hotel.star_rating, 'main_image_url': hotel.main_image_url, 'recommendations': hotel.trustyou.recommendations, 'summary': hotel.trustyou.summary, 'trust_score': hotel.trustyou.trust_score, 'trust_score_description': hotel.trustyou.trust_score_description, 'review_count': hotel.trustyou.review_count, 'category_badge': hotel.trustyou.category_badge, 'latitude': hotel.latitude, 'longitude': hotel.longitude, } for hotel in hotels] hotels = DataFrame(hotels) hotels.set_index('hotel_id', inplace=True) outbound_message['hotels'] = hotels.to_dict('index') min_stay_cost = stays['stay_cost'].min() max_stay_cost = stays['stay_cost'].max() try: # pragma: no cover min_switch_distance = int(stays['distance_in_km'].min()) max_switch_distance = int(stays['distance_in_km'].max()) except ValueError: min_switch_distance = 0 max_switch_distance = 0 min_nightly_cost = min_stay_cost / night_count max_nightly_cost = max_stay_cost / night_count outbound_message['cost_ranges'] = { 'minStayCost': floor(min_stay_cost), 'maxStayCost': ceil(max_stay_cost), 'minNightlyCost': floor(min_nightly_cost), 'maxNightlyCost': ceil(max_nightly_cost), } outbound_message['distance_ranges'] = { 'minDistanceSwitch': min_switch_distance, 'maxDistanceSwitch': max_switch_distance, } except (RequestError, NoResultsError): error = 'RequestError or NoResultsError when searching for {}'.format( unquote(criteria['place_name']) ) client.captureMessage(error) outbound_message['status'] = '503' logger.error(error) if run_from_management_command: # pragma: no cover return DataFrame() except Exception: # pragma: no cover outbound_message['status'] = '500' exception_type, _, exception_traceback = sys.exc_info() logger.error(exception_type) logger.error(pprint.pformat(traceback.format_tb(exception_traceback, limit=4))) if run_from_management_command: return DataFrame() if reply_channel is not None: # pragma: no cover # This is actually tested but coverage cant detect it Channel(reply_channel).send({ "text": json.dumps(outbound_message) }) if outbound_message['status'] == '200': return True
day=24, start_date=Timestamp('1993-01-01'), # When Christmas is a Saturday, the 24th is a full holiday. days_of_week=(MONDAY, TUESDAY, WEDNESDAY, THURSDAY), ) USNewYearsDay = new_years_day( # When Jan 1 is a Sunday, US markets observe the subsequent Monday. # When Jan 1 is a Saturday (as in 2005 and 2011), no holiday is observed. observance=sunday_to_monday) USMartinLutherKingJrAfter1998 = Holiday( 'Dr. Martin Luther King Jr. Day', month=1, day=1, # The US markets didn't observe MLK day as a holiday until 1998. start_date=Timestamp('1998-01-01'), offset=DateOffset(weekday=MO(3)), ) USMemorialDay = Holiday( # NOTE: The definition for Memorial Day is incorrect as of pandas 0.16.0. # See https://github.com/pydata/pandas/issues/9760. 'Memorial Day', month=5, day=25, offset=DateOffset(weekday=MO(1)), ) USIndependenceDay = Holiday( 'July 4th', month=7, day=4, observance=nearest_workday, )
Parameters ---------- other : holiday calendar inplace : bool (default=False) If True set rule_table to holidays, else return array of Holidays """ holidays = self.merge_class(self, other) if inplace: self.rules = holidays else: return holidays USMemorialDay = Holiday( "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1)) ) USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1))) USColumbusDay = Holiday( "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2)) ) USThanksgivingDay = Holiday( "Thanksgiving Day", month=11, day=1, offset=DateOffset(weekday=TH(4)) ) USMartinLutherKingJr = Holiday( "Birthday of Martin Luther King, Jr.", start_date=datetime(1986, 1, 1), month=1, day=1, offset=DateOffset(weekday=MO(3)), )