def get_data( self, symbol: str, interval: str, start_datetime: pd.Timestamp, end_datetime: pd.Timestamp, ) -> pd.DataFrame: if interval != self.INTERVAL_QUARTERLY: raise ValueError(f"cannot support {interval}") symbol, exchange = symbol.split(".") exchange = "sh" if exchange == "ss" else "sz" code = f"{exchange}.{symbol}" start_date = start_datetime.strftime("%Y-%m-%d") end_date = end_datetime.strftime("%Y-%m-%d") performance_express_report_df = self.get_performance_express_report_df(code, start_date, end_date) profit_df = self.get_profit_df(code, start_date, end_date) forecast_report_df = self.get_forecast_report_df(code, start_date, end_date) growth_df = self.get_growth_df(code, start_date, end_date) df = pd.concat( [performance_express_report_df, profit_df, forecast_report_df, growth_df], axis=0, ) return df
def craft_and_send_email(t1, t2, config, volcano, d_Azimuth, velocity, mx_pressure, filename): from pandas import Timestamp # create the subject line subject = '{} Airwave Detection'.format(volcano['volcano']) # create the test for the message you want to send message = '{} alarm:\n'.format(config.alarm_name) message = '{}{} detection!\n\n'.format(message, volcano['volcano']) message = '{}Start: {} (UTC)\nEnd: {} (UTC)\n\n'.format( message, t1.strftime('%Y-%m-%d %H:%M'), t2.strftime('%Y-%m-%d %H:%M')) t1_local = Timestamp(t1.datetime, tz='UTC') t2_local = Timestamp(t2.datetime, tz='UTC') t1_local = t1_local.tz_convert('US/Alaska') t2_local = t2_local.tz_convert('US/Alaska') message = '{}Start: {} ({})'.format(message, t1_local.strftime('%Y-%m-%d %H:%M'), t1_local.tzname()) message = '{}\nEnd: {} ({})\n\n'.format( message, t2_local.strftime('%Y-%m-%d %H:%M'), t2_local.tzname()) message = '{}d_Azimuth: {:+.1f} degrees\n'.format(message, d_Azimuth) message = '{}Velocity: {:.0f} m/s\n'.format(message, velocity * 1000) message = '{}Max Pressure: {:.1f} Pa'.format(message, mx_pressure) utils.send_alert(config.alarm_name, subject, message, filename) utils.post_mattermost(subject, message, config.alarm_name, filename) # delete the file you just sent if filename: remove(filename)
def date_to_url( datetime: pd.Timestamp, already_downloaded: Set[str]) -> Union[str, None]: """convert a datetime to its corresponding wiki dump url Arguments: datetime {Timestamp} -- Timestamp whose corresponding wiki pageviews dump will be downloaded already_downloaded {Set[str]} -- set of filenames for datetimes whose pageviews have already been downloaded and processed Returns: string, None -- url to download, or None if data already downloaded and processed """ year = datetime.strftime('%Y') year_month = datetime.strftime('%Y-%m') pageviews = datetime.strftime('pageviews-%Y%m%d-%H0000.gz') # check to see if the file has already been downloaded and processed # we do "pageviews[:-3]" because the string ends in ".gz" if pageviews[:-3] in already_downloaded: print(f'already downloaded {pageviews[:-3]}') return None # url looks like: # https://dumps.wikimedia.org/other/pageviews/2020/2020-05/pageviews-20200501-100000.gz url = os.path.join(ROOT_URL, year, year_month, pageviews) return url
def get_ts_traces(self, site_list, start=0, end=0, varfrom=100, varto=140, interval='day', multiplier=1, datasource='A', data_type='mean', qual_codes=[30, 20, 10, 11, 21, 18], report_time=None): """ """ # Convert the site list to a comma delimited string of sites sites = select_sites(site_list).astype(str) site_list_str = ','.join([str(site) for site in sites]) ### Datetime conversion - with dates < 1900 c1900 = Timestamp('1900-01-01') if start != 0: start1 = Timestamp(start) if start1 > c1900: start = start1.strftime('%Y%m%d%H%M%S') else: start = start1.isoformat(' ').replace('-', '').replace(' ', '').replace(':', '') if end != 0: end1 = Timestamp(end) if end1 > c1900: end = end1.strftime('%Y%m%d%H%M%S') else: end = end1.isoformat(' ').replace('-', '').replace(' ', '').replace(':', '') ts_traces_request = {'function': 'get_ts_traces', 'version': 2, 'params': {'site_list': site_list_str, 'start_time': start, 'end_time': end, 'varfrom': varfrom, 'varto': varto, 'interval': interval, 'datasource': datasource, 'data_type': data_type, 'multiplier': multiplier, 'report_time': report_time}} ts_traces_request = self.query_by_dict(ts_traces_request) j1 = ts_traces_request['return']['traces'] ### Convert json to a dataframe sites = [str(f['site']) for f in j1] out1 = DataFrame() for i in range(len(j1)): df1 = DataFrame(j1[i]['trace']) if not df1.empty: df1.rename(columns={'v': 'data', 't': 'time', 'q': 'qual_code'}, inplace=True) df1['data'] = to_numeric(df1['data'], errors='coerce') df1['time'] = to_datetime(df1['time'], format='%Y%m%d%H%M%S') df1['qual_code'] = to_numeric(df1['qual_code'], errors='coerce', downcast='integer') df1['site'] = sites[i] df2 = df1[df1.qual_code.isin(qual_codes)] out1 = concat([out1, df2]) out2 = out1.set_index(['site', 'time'])[['data', 'qual_code']] return out2
class TickEvent(Event): """ Tick event """ def __init__(self): """ Initialises Tick """ self.event_type = EventType.TICK self.tick_type = TickType.TRADE self.timestamp = Timestamp('1970-01-01', tz='UTC') self.full_symbol = '' self.price = 0.0 self.size = 0 self.depth = 1 self.bid_price_L1 = 0.0 self.bid_size_L1 = 0 self.ask_price_L1 = 0.0 self.ask_size_L1 = 0 self.open_interest = 0 self.open = 0.0 self.high = 0.0 self.low = 0.0 self.pre_close = 0.0 self.upper_limit_price = 0.0 self.lower_limit_price = 0.0 def __str__(self): return "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" % ( str(self.timestamp.strftime("%H:%M:%S.%f")), str(datetime.now().strftime("%H:%M:%S.%f")), str(self.full_symbol), (self.tick_type), str(self.bid_size_L1), str(self.bid_price_L1), str(self.ask_price_L1), str(self.ask_size_L1), str( self.price), str(self.size))
def get_next_trading_day_str(time: pd.Timestamp, shift_days: int = 1): """""" # TODO: # 顺延需要处理是否为交易日的详细数据(根据国内交易日历) # 倒推不需要,有时候是特殊情况导致新增假期 check_detail = False if shift_days > 0: check_detail = True idx = 0 shift_times = abs(shift_days) shift_delta = shift_days // shift_times while True: day_str = time.strftime(DATE_FORMAT) if calendar_manager.check_open(day_str, check_detail): idx += 1 if idx >= shift_times: return day_str if shift_delta > 0 and time.day_of_week == 4: # friday to monday directly delta = 3 elif shift_delta < 0 and time.day_of_week == 0: # monday to friday directly delta = -3 else: delta = 1 * shift_delta time += datetime.timedelta(days=delta)
def _req_history_ticks(self, code: str, start: Timestamp, end: Timestamp, nums: int, what_to_show: str, use_rth: int, ignore_size: bool, misc_options) -> List[HistoricalTickLast]: req = Request.new_request() contract = self.code_to_contract(code) self.cli.reqHistoricalTicks(req.req_id, contract, start.strftime("%Y%m%d %H:%M:%S") if start is not None else "", end.strftime("%Y%m%d %H:%M:%S"), nums, what_to_show, use_rth, ignore_size, misc_options) if req.condition.acquire(): req.condition.wait(10) if not req.resp: raise RuntimeError("获取数据超时或者没有获取到数据") resp = req.resp Request.clear(req.req_id) return resp
def process_one(self, time: pd.Timestamp, proc_funcs: list = [], save=True, **proc_funcs_kwargs): """This method defines a processing pipeline consisting of opening the file using the `open` method, applying each of the `proc_funcs` to the output of the previous and `save` the processed data using save method.""" tstr = time.strftime('%Y%m%d') files = self.list_files(time) try: if len(files) > 0: data = self.open(files) proc_funcs = [BandsAssertShape()] + proc_funcs kwargs = {'cls': self, **proc_funcs_kwargs} for f in proc_funcs: data = f(data, time, **kwargs) if save: self.save(time, data) else: return data else: warn(f'No files for {time}. Skipping to the next time.') except: msg = f'Unable to process files for {time}. Check if files are corrupted. Skipping to the next time. { sys.exc_info()[0]}' warn(msg, UserWarning)
def load_to_cache(self, start_time: pd.Timestamp, end_time: pd.Timestamp): """ Метод загрузки данных в диапазоне (end_time - start_time) дней с сервера на файл формата CSV. start_time - Начальное время (тип: pd.Timestamp()) end_time - Конечное время (тип: pd.Timestamp()) """ # TODO: Переделать надо, так как за последние сутки bitmex вылаживает не полностью #assert end_time < (pd.Timestamp.today() - dt.timedelta(days=1)) assert start_time < end_time if start_time.date() == end_time.date(): if not self.check_cache(start_time): self.load_bar_day(start_time).to_csv( path.join(self.path_cash, self.symbol, self.data_frequency, start_time.strftime("%Y-%m-%d") + '.csv')) else: for day in pd.date_range(start_time, end_time, freq='D', closed='left'): if not self.check_cache(day): self.load_bar_day(day).to_csv( path.join(self.path_cash, self.symbol, self.data_frequency, day.strftime("%Y-%m-%d") + '.csv'))
def insert_data(dt: pd.Timestamp, df: pd.DataFrame, db_conn: sqlite3.Connection): db_cursor = db_conn.cursor() bases = ["base252", "base360"] date = dt.strftime("%Y-%m-%d") for row in df.itertuples(): doc = { "date": f"'{date}'", "duration": str(getattr(row, "duration")), } for base in bases: val = getattr(row, base) if math.isnan(val): continue doc[base] = str(val) if any([d in doc for d in bases]): columns = ", ".join(doc.keys()) values = ", ".join(doc.values()) query = f"INSERT INTO cdi ({columns}) VALUES ({values});" db_cursor.execute(query) db_conn.commit() db_cursor.close()
def list_files(self, time:pd.Timestamp) -> list: out = [] if time.year in self.times.year: time = pd.Timestamp(f'{time.year}-01-01') time_pattern = time.strftime('_%Y_') files = self.paths.src.ls(recursive=True, include=['.tif', time_pattern], exclude=['.xml']) return files
def query(self): try: start = Timestamp(self.q[1].get()) end = Timestamp(self.q[2].get()) assert not isinstance(start, NaTType) and not isinstance( end, NaTType) except Exception as e: messagebox.showinfo('Wrong Format', 'Fail to convert into Timestamp') else: sDate = start.strftime('%Y-%m-%d') eDate = end.strftime('%Y-%m-%d') sDatetime = start.strftime('%Y-%m-%d %H:%M:%S') eDatetime = end.strftime('%Y-%m-%d %H:%M:%S') if self.q[3].get() == 'day': return self.read(sDate, eDate, True) else: return self.read(sDatetime, eDatetime, False)
def _log_and_update_pnl(self, pnl: float, symbol: Symbol, timestamp: pd.Timestamp): self.pnl_history[symbol].append(pnl) self.cum_pnl[symbol] += pnl self.pnl_file.write(','.join([ str(timestamp.strftime('%Y-%m-%dT%H:%M:%S')), symbol.name, str(pnl), str(self.cum_pnl[symbol]) ]) + '\n')
def get_communication_subgraph(self, owner: str, dt: pd.Timestamp) -> pd.DataFrame: """ Queries a subgraph from the timetree consisting of communication links between users in a specific timeframe. The timeframe length can be configured in the conf module. :param owner: repository owner :param repo: repository name :param dt: timestamp indicating the end of the desired period for the subgraph query :return: pd.DataFrame containing links between nodes in the subgraph """ q_subgraph_time = ''' MATCH (o:OWNER{login:$l_owner}) WITH o, apoc.date.parse($l_dt, 'ms', 'yyyy-MM-dd') as end WITH o, end, apoc.date.add(end, 'ms', $l_tf_length , 'd') as start MATCH (node:COMMENT) -[:to]-> () -[:to]-> (r) -[:belongs_to]-> (o) WHERE node.event_time >= start AND node.event_time <= end WITH node as comment MATCH (source:USER) -[:makes]-> (comment) -[x]-> (target:USER) WHERE id(source) <> id(target) WITH DISTINCT source, target RETURN id(source) as source, id(target) as target ''' links = pd.DataFrame( self.graph.data(q_subgraph_time, parameters={ "l_owner": owner, "l_tf_length": (-1 * conf.a_length_timeframe), "l_dt": dt.strftime("%Y-%m-%d") })) if conf.a_filter_core and not links.empty: dev_core = self.get_dev_core(owner) links = links[(links['source'].isin(dev_core['u_id']) & links['target'].isin(dev_core['u_id']))] return links
def map_platform_to_schema(self, event, band, mbid, other): concertdate = Timestamp(dateparse(event["start"]["date"]).date()) return { "titel": event["displayName"].strip().rstrip( concertdate.strftime("%B %d, %Y")), "titel_generated": event["displayName"].strip().rstrip( concertdate.strftime("%B %d, %Y")), "datum": concertdate, "einddatum": Timestamp(dateparse(event["end"]["date"]).date()) if "end" in event else None, "artiest": other["artist_name"], "artiest_id": "songkick_" + str(other["artist_id"]), "artiest_mb_naam": band, "artiest_mb_id": mbid, "stad": ",".join([ i.strip() for i in event["location"]["city"].split(",")[0:-1] ]), "land": event["location"]["city"].split(",")[-1].strip(), "venue": event["displayName"].strip() if event["type"] == "Festival" else event["venue"]["displayName"].strip(), "latitude": event["venue"]["lat"], "longitude": event["venue"]["lng"], "source": self.platform, "event_id": "songkick_" + str(event["id"]), "event_type": event["type"].lower() }
def download_data(date, resolution="high"): """Short summary. Parameters ---------- date : type Description of parameter `date`. resolution : type Description of parameter `resolution`. Returns ------- type Description of returned object. """ import ftplib from datetime import datetime from pandas import DatetimeIndex if isinstance(date, datetime) or isinstance(date, DatetimeIndex): year = date.strftime("%Y") yyyymmdd = date.strftime("%Y%m%d") else: from pandas import Timestamp date = Timestamp(date) year = date.strftime("%Y") yyyymmdd = date.strftime("%Y%m%d") # npp_eaot_ip_gridded_0.25_20181222.high.nc # print(year, yyyymmdd) file = f"npp_eaot_ip_gridded_0.25_{yyyymmdd}.high.nc" exists = os.path.isfile(file) if ~exists: ftp = ftplib.FTP(server) ftp.login() ftp.cwd(base_dir + year) ftp.retrbinary("RETR " + file, open(file, "wb").write) else: print(f"File Already Exists! Reading: {file}") return file, date
def check_cache(self, day: pd.Timestamp): """ Метод проверки на существование за кешированных данных. day - Дата TODO: Не реализованно проверка на полноту данных в файле. """ pt = path.join(self.path_cash, self.symbol, self.data_frequency, day.strftime("%Y-%m-%d") + '.csv') return path.exists(pt)
def show(self, date: pd.Timestamp): d = self.data.loc[date] if isinstance(d, pd.DataFrame): if len(d) > 1: raise ValueError(f'"{date}" matches more than one date') d = d.stack("age_group") for bar, h in zip(self.bars, d): bar.set_height(h) formatted_date = date.strftime("%d %B %Y") self.date_text.set_text(formatted_date) self.update_labels() return self.artists
def send_data(self, timestamp: pd.Timestamp, power): msg = json.dumps({ 'topic': 'data', 'payload': { 'timestamp': timestamp.strftime(_TIMESTAMP_FORMAT), 'power': power } }) self.channel.basic_publish(exchange='', routing_key='tmhchallenge', body=msg) print(f"[x] Sent '{msg}'")
def list_files(self, time:pd.Timestamp) -> list: out = [] if time in self.times: time = pd.Timestamp(f'{time.year}-{time.month}-01') time_pattern = time.strftime('%Y%m%d') files = self.paths.src.ls(recursive=True, include=['JD.tif', time_pattern], exclude=['.xml']) # Find windows joint with region bounding box for f in files: data = open_tif(f) if not disjoint_bounds(data.bounds, self.region.bbox): out.append(f) return out
def download_data(date, ftype='meanFRP'): import requests as rq from datetime import datetime from numpy import arange if isinstance(date, datetime): year = date.strftime('%Y') yyyymmdd = date.strftime('%Y%m%d') else: from pandas import Timestamp date = Timestamp(date) year = date.strftime('%Y') yyyymmdd = date.strftime('%Y%m%d') url_ftype = "&files={}.".format(ftype) for i in arange(1, 7, dtype=int).astype(str): tile = ".FV3C384Grid.tile{}.bin".format(i) url = "{}{}{}{}{}".format(base_dir, yyyymmdd, url_ftype, yyyymmdd, tile) fname = "{}.{}.FV3.C384Grid.tile{}.bin".format(ftype, yyyymmdd, i) print('Retrieving file:', fname) r = rq.get(url) with open(fname, 'wb') as f: f.write(r.content)
def process_quality_control(rid: int, date: pd.Timestamp, outpath: str) -> None: """ Driver for processing the quality control. Find the data for given radar ID and dates on GADI. Unzip it and send it to the radar_qcchecks function. Also takes care of saving the output results into a daily csv file. Parameters: =========== rid: int Radar rapic ID. date: pd.Timestamp Date to process. outpath: str Output path directory. """ datestr = date.strftime("%Y%m%d") fname = f"{rid}_stats_{datestr}.csv" fname = os.path.join(outpath, fname) if os.path.isfile(fname): print("Output file already exists. Doing nothing.") return None inzip = get_radar_archive_file(date, rid) if inzip is None: print(f"Couldn't get zip archive for {date} and radar {rid}.") return None flist = extract_zip(inzip) print(f"Found {len(flist)} files for radar {rid} on the {datestr}.") try: bag = db.from_sequence(flist).map(radar_qcchecks.qccheck_radar_odim) rslt = bag.compute() rslt = [r for r in rslt if r is not None] if len(rslt) == 0: raise ValueError("No rain today.") df = pd.DataFrame(rslt[0], index=[0]) for r in rslt[1:]: df = df.append(r, ignore_index=True) df.set_index("time").to_csv(fname, float_format="%g") print(f"{fname} saved.") except Exception: traceback.print_exc() finally: remove(flist) gc.collect() return None
def craft_and_send_email(t1, t2, stations, rms, lvlv, alarm_name, filename): from pandas import Timestamp # create the subject line subject = '--- {} ---'.format(alarm_name) # create the text for the message you want to send message = 'Start: {} (UTC)\nEnd: {} (UTC)\n\n'.format( t1.strftime('%Y-%m-%d %H:%M'), t2.strftime('%Y-%m-%d %H:%M')) t1_local = Timestamp(t1.datetime, tz='UTC') t2_local = Timestamp(t2.datetime, tz='UTC') t1_local = t1_local.tz_convert('US/Alaska') t2_local = t2_local.tz_convert('US/Alaska') message = '{}Start: {} ({})'.format(message, t1_local.strftime('%Y-%m-%d %H:%M'), t1_local.tzname()) message = '{}\nEnd: {} ({})\n\n'.format( message, t2_local.strftime('%Y-%m-%d %H:%M'), t2_local.tzname()) a = np.array([''] * len(rms[:-1])) a[np.where(rms > lvlv)] = '*' sta_message = ''.join( '{}{}: {:.0f}/{}\n'.format(sta, a[i], rms[i], lvlv[i]) for i, sta in enumerate(stations[:-1])) sta_message = ''.join([ sta_message, '\nArrestor: {} {:.0f}/{}'.format(stations[-1], rms[-1], lvlv[-1]) ]) message = ''.join([message, sta_message]) utils.send_alert(alarm_name, subject, message, filename) # utils.post_mattermost(subject,message,filename) utils.post_mattermost(subject, message, alarm_name, filename) # delete the file you just sent if filename: remove(filename)
def download_data(date, resolution='high'): import ftplib from datetime import datetime if isinstance(date, datetime): year = date.strftime('%Y') yyyymmdd = date.strftime('%Y%m%d') else: from pandas import Timestamp date = Timestamp(date) year = date.strftime('%Y') yyyymmdd = date.strftime('%Y%m%d') if resolution is 'high': file = 'npp_aot550_edr_gridded_0.10_{}.high.bin.gz'.format(yyyymmdd) else: file = 'npp_aot550_edr_gridded_0.25_{}.high.bin.gz'.format(yyyymmdd) ftp = ftplib.FTP(server) ftp.login() # print(base_dir) # print(year) # print(base_dir + year) ftp.cwd(base_dir + year) # print(file) ftp.retrbinary("RETR " + file, open(file, 'wb').write) return file, date
def process_one(self, time:pd.Timestamp, proc_funcs:list=[], save=True, **proc_funcs_kwargs): tstr = time.strftime('%Y%m%d') files = self.list_files(time) try: if len(files) > 0: data = self.open(files) proc_funcs = [BandsAssertShape()] + proc_funcs kwargs = {'cls': self, **proc_funcs_kwargs} for f in proc_funcs: data = f(data, time, **kwargs) if save: self.save(time, data) else: return data else: warn(f'No files for {time}. Skipping to the next time.') except: warn(f'Unable to process files for {time}. Check if files are corrupted. Skipping to the next time.')
def _request_history_companies(self, trade_date: pd.Timestamp, use_cache: bool = True) -> pd.DataFrame: trade_date = trade_date.strftime("%Y-%m-%d") cache_path = self.cache_dir.joinpath(f"{trade_date}_history_companies.pkl") if cache_path.exists() and use_cache: df = pd.read_pickle(cache_path) else: url = self.HISTORY_COMPANIES_URL.format(trade_date=trade_date) resp = requests.post(url) if resp.status_code != 200: raise ValueError(f"request error: {url}") df = pd.DataFrame(resp.json()["aaData"]) df[self.DATE_FIELD_NAME] = trade_date df.rename(columns={"Name": "name", "Symbol": self.SYMBOL_FIELD_NAME}, inplace=True) if not df.empty: df.to_pickle(cache_path) return df
def generate_date_feature(date: pd.Timestamp): """ Generate date features. """ result = pd.Series() result['timestamp'] = date.timestamp() format_string = '%Y-%m-%d' dt_string = date.strftime(format_string) result['holiday'] = int( (dt_string in holiday) or (date.weekday() in [5, 6] and dt_string not in work)) result = pd.concat( [result, get_onehot(date.weekday(), 0, 6, name='weekday')]) return result
def test_plot_cumulative_panel_date_index_no_freq(date_rand_data, pre_str_period, post_str_period, monkeypatch): ci = CausalImpact(date_rand_data, pre_str_period, post_str_period) dd = date_rand_data.copy() dd.drop(dd.index[10:20]) ax_mock = mock.Mock() plotter_mock = mock.Mock() plotter_mock.subplot.return_value = ax_mock plot_mock = mock.Mock(return_value=plotter_mock) monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock) ci.plot(panels=['cumulative']) plot_mock.assert_called_once() plotter_mock.figure.assert_called_with(figsize=(15, 12)) plotter_mock.subplot.assert_any_call(1, 1, 1, sharex=ax_mock) ax_args = ax_mock.plot.call_args inferences = ci.inferences.iloc[1:, :] assert_array_equal(inferences['post_cum_effects'], ax_args[0][0]) assert ax_args[0][1] == 'b--' assert ax_args[1] == {'label': 'Cumulative Effect'} date_ = datetime.strptime(ci.post_period[0], "%Y%m%d") date_ = date_ + timedelta(days=-1) date_ = Timestamp(date_.strftime("%Y-%m-%d %H:%M:%S")) ax_mock.axvline.assert_called_with(date_, c='k', linestyle='--') ax_args = ax_mock.fill_between.call_args_list[0] assert_array_equal(ax_args[0][0], inferences['post_cum_effects'].index) assert_array_equal(ax_args[0][1], inferences['post_cum_effects_lower']) assert_array_equal(ax_args[0][2], inferences['post_cum_effects_upper']) assert ax_args[1] == { 'facecolor': 'blue', 'interpolate': True, 'alpha': 0.25 } ax_mock.axhline.assert_called_with(y=0, color='k', linestyle='--') ax_mock.grid.assert_called_with(True, linestyle='--') ax_mock.legend.assert_called() plotter_mock.show.assert_called_once()
def get_df_from_epex(start: pd.Timestamp, country='FR'): """This will take the start date timestamp, returns a dataframe of the week that ends in that day along with the time stamp for the next day in the past (the day start - 8).""" # Get the url url_init = 'https://www.epexspot.com/en/market-data/dayaheadauction/auction-table/' url_var = start.strftime('%Y-%m-%d') # url_end = '/FR' url = url_init + url_var # Get the response : page = requests.get(url) # BeautifulSoup object : soup = BeautifulSoup(page.content, 'html.parser') # Use the selector soup_list_1 = soup.select('table.list.hours.responsive tr.no-border') # Data for France, Originally, we expected to find only 24 'tr'-type objects representing 24 rows of data per day, # but we got 72 instead. This is because the page also includes data for 2 additional countries (Germany & Switzerland). # All 3 countries' data are put in different tabs so that at the first glance we cant observe it if country == 'FR': country_tag = soup_list_1[:24] elif country == 'DE/AT': country_tag = soup_list_1[24:24*2] elif country == 'CH': country_tag = soup_list_1[24*2:] else: raise ValueError('The country code is not recognized: must be \'FR\', \'DE/AT\' or \'CH\' ') country_dict = {} for hour in country_tag: ex = hour.find_all('td') ex_list = [elem.text for elem in ex] name_hour = ex_list[0].replace(" ", "")[1:3] # strip the whitespaces, take the first number as the hour column country_dict[name_hour] = ex_list[:-8:-1] # Be careful with the \n commands week_interval = pd.DatetimeIndex(start=start, freq='-1D', periods=7) epex_tempo = pd.DataFrame(index=week_interval, data=country_dict) next_day_stamp = (week_interval[-1] - pd.to_timedelta(1, unit='D')) return epex_tempo, next_day_stamp
def req_history_data(self, code: str, end_date_time: Timestamp, duration_str, bar_size, what_to_show, use_rth: int, format_date: int, keep_up_to_date, char_options) -> List[BarData]: req = Request.new_request() contract = self.code_to_contract(code) self.cli.reqHistoricalData(req.req_id, contract, end_date_time.strftime("%Y%m%d %H:%M:%S") if end_date_time else "", duration_str, bar_size, what_to_show, use_rth, format_date, keep_up_to_date, char_options) if req.condition.acquire(): req.condition.wait(20) if not req.resp: self.cli.cancelHistoricalData(req.req_id) raise RuntimeError("获取数据超时或者没有获取到数据") resp = req.resp # 清理数据 Request.clear(req.req_id) # 返回排好序的数据 return sorted(resp, key=lambda bar: bar.date)
class Station(object): """ USGS Station and download helpers Parameters ---------- site : int, string, or sequence Site ID number from NWIS. E.g, site = 14211500 or site = '14211500' for Johnson Creek in Portland, OR. This can also be a list-like object for multiple sites (experimental). start, end : string or date-like Start and end dates for the period of interest. savepath : path-like Path to where data would be saved when using the `get_data` method. """ def __init__(self, site, start, end, savepath='data'): self.site = site self.start = Timestamp(start) self.end = Timestamp(end) self.savepath = Path('.' or savepath) self._daily_json = None self._insta_json = None self._daily_data = None self._insta_data = None def _make_fpath(self, daily): datefmt = '%Y%m%d' suffix = 'daily' if daily else 'insta' fname = "_".join([ f"{self.site}", self.start.strftime(datefmt), 'thru', self.end.strftime(datefmt), suffix, ]) return self.savepath / (fname + '.csv') @property def daily_json(self): if self._daily_json is None: self._daily_json = fetch_nwis(self.site, self.start, self.end, daily=True).json() return self._daily_json @property def insta_json(self): if self._insta_json is None: self._insta_json = fetch_nwis(self.site, self.start, self.end, daily=False).json() return self._insta_json @property def daily_data(self): if self._daily_data is None: self._daily_data = read_nwis(self.daily_json, daily=True) return self._daily_data @property def insta_data(self): if self._insta_data is None: self._insta_data = read_nwis(self.insta_json, daily=False) return self._insta_data def get_data(self, daily=False, save=False, force=False): """ Fetch and save data for the site. Parameters ---------- daily : bool (default False) Toggles fetching either instaneous (False) or daily values (True). save : bool (defaut False) Toggles saving the downloaded data to `site.savepath` force : bool (defaut False) If True and the data has already been downloaded and save, this will force the redownloading of the data. Returns ------- pandas.DataFrame Note ---- Unless readying from a cache, this method *always* redownloads the data, even with multiple calls, instead of relying on the `daily_data` and `insta_data` properties of the class. """ fpath = self._make_fpath(daily=daily) if not fpath.exists() or force: df = read_nwis(self.site, self.start, self.end, daily=daily) if save: df.to_csv(fpath, encoding='utf-8') else: df = read_cache(fpath, daily=daily) return df