コード例 #1
0
    def get_data(
        self,
        symbol: str,
        interval: str,
        start_datetime: pd.Timestamp,
        end_datetime: pd.Timestamp,
    ) -> pd.DataFrame:
        if interval != self.INTERVAL_QUARTERLY:
            raise ValueError(f"cannot support {interval}")
        symbol, exchange = symbol.split(".")
        exchange = "sh" if exchange == "ss" else "sz"
        code = f"{exchange}.{symbol}"
        start_date = start_datetime.strftime("%Y-%m-%d")
        end_date = end_datetime.strftime("%Y-%m-%d")

        performance_express_report_df = self.get_performance_express_report_df(code, start_date, end_date)
        profit_df = self.get_profit_df(code, start_date, end_date)
        forecast_report_df = self.get_forecast_report_df(code, start_date, end_date)
        growth_df = self.get_growth_df(code, start_date, end_date)

        df = pd.concat(
            [performance_express_report_df, profit_df, forecast_report_df, growth_df],
            axis=0,
        )
        return df
コード例 #2
0
def craft_and_send_email(t1, t2, config, volcano, d_Azimuth, velocity,
                         mx_pressure, filename):
    from pandas import Timestamp
    # create the subject line
    subject = '{} Airwave Detection'.format(volcano['volcano'])

    # create the test for the message you want to send
    message = '{} alarm:\n'.format(config.alarm_name)
    message = '{}{} detection!\n\n'.format(message, volcano['volcano'])
    message = '{}Start: {} (UTC)\nEnd: {} (UTC)\n\n'.format(
        message, t1.strftime('%Y-%m-%d %H:%M'), t2.strftime('%Y-%m-%d %H:%M'))
    t1_local = Timestamp(t1.datetime, tz='UTC')
    t2_local = Timestamp(t2.datetime, tz='UTC')
    t1_local = t1_local.tz_convert('US/Alaska')
    t2_local = t2_local.tz_convert('US/Alaska')
    message = '{}Start: {} ({})'.format(message,
                                        t1_local.strftime('%Y-%m-%d %H:%M'),
                                        t1_local.tzname())
    message = '{}\nEnd: {} ({})\n\n'.format(
        message, t2_local.strftime('%Y-%m-%d %H:%M'), t2_local.tzname())

    message = '{}d_Azimuth: {:+.1f} degrees\n'.format(message, d_Azimuth)
    message = '{}Velocity: {:.0f} m/s\n'.format(message, velocity * 1000)
    message = '{}Max Pressure: {:.1f} Pa'.format(message, mx_pressure)

    utils.send_alert(config.alarm_name, subject, message, filename)
    utils.post_mattermost(subject, message, config.alarm_name, filename)
    # delete the file you just sent
    if filename:
        remove(filename)
コード例 #3
0
def date_to_url(
        datetime: pd.Timestamp,
        already_downloaded: Set[str]) -> Union[str, None]:
    """convert a datetime to its corresponding wiki dump url

    Arguments:
        datetime {Timestamp} -- Timestamp whose corresponding wiki pageviews dump will be downloaded
        already_downloaded {Set[str]} -- set of filenames for datetimes whose pageviews have already been downloaded and processed

    Returns:
        string, None -- url to download, or None if data already downloaded and processed
    """
    year = datetime.strftime('%Y')
    year_month = datetime.strftime('%Y-%m')
    pageviews = datetime.strftime('pageviews-%Y%m%d-%H0000.gz')

    # check to see if the file has already been downloaded and processed
    # we do "pageviews[:-3]" because the string ends in ".gz"
    if pageviews[:-3] in already_downloaded:
        print(f'already downloaded {pageviews[:-3]}')
        return None

    # url looks like:
    # https://dumps.wikimedia.org/other/pageviews/2020/2020-05/pageviews-20200501-100000.gz
    url = os.path.join(ROOT_URL, year, year_month, pageviews)
    return url
コード例 #4
0
ファイル: hydllp.py プロジェクト: mullenkamp/HydroPandas
    def get_ts_traces(self, site_list, start=0, end=0, varfrom=100, varto=140, interval='day', multiplier=1, datasource='A', data_type='mean', qual_codes=[30, 20, 10, 11, 21, 18], report_time=None):
        """

        """

        # Convert the site list to a comma delimited string of sites
        sites = select_sites(site_list).astype(str)
        site_list_str = ','.join([str(site) for site in sites])

        ### Datetime conversion - with dates < 1900
        c1900 = Timestamp('1900-01-01')
        if start != 0:
            start1 = Timestamp(start)
            if start1 > c1900:
                start = start1.strftime('%Y%m%d%H%M%S')
            else:
                start = start1.isoformat(' ').replace('-', '').replace(' ', '').replace(':', '')
        if end != 0:
            end1 = Timestamp(end)
            if end1 > c1900:
                end = end1.strftime('%Y%m%d%H%M%S')
            else:
                end = end1.isoformat(' ').replace('-', '').replace(' ', '').replace(':', '')

        ts_traces_request = {'function': 'get_ts_traces',
                             'version': 2,
                             'params': {'site_list': site_list_str,
                                        'start_time': start,
                                        'end_time': end,
                                        'varfrom': varfrom,
                                        'varto': varto,
                                        'interval': interval,
                                        'datasource': datasource,
                                        'data_type': data_type,
                                        'multiplier': multiplier,
                                        'report_time': report_time}}

        ts_traces_request = self.query_by_dict(ts_traces_request)
        j1 = ts_traces_request['return']['traces']

        ### Convert json to a dataframe
        sites = [str(f['site']) for f in j1]

        out1 = DataFrame()
        for i in range(len(j1)):
            df1 = DataFrame(j1[i]['trace'])
            if not df1.empty:
                df1.rename(columns={'v': 'data', 't': 'time', 'q': 'qual_code'}, inplace=True)
                df1['data'] = to_numeric(df1['data'], errors='coerce')
                df1['time'] = to_datetime(df1['time'], format='%Y%m%d%H%M%S')
                df1['qual_code'] = to_numeric(df1['qual_code'], errors='coerce', downcast='integer')
                df1['site'] = sites[i]
                df2 = df1[df1.qual_code.isin(qual_codes)]
                out1 = concat([out1, df2])

        out2 = out1.set_index(['site', 'time'])[['data', 'qual_code']]

        return out2
コード例 #5
0
class TickEvent(Event):
    """
    Tick event
    """
    def __init__(self):
        """
        Initialises Tick
        """
        self.event_type = EventType.TICK
        self.tick_type = TickType.TRADE
        self.timestamp = Timestamp('1970-01-01', tz='UTC')
        self.full_symbol = ''
        self.price = 0.0
        self.size = 0
        self.depth = 1
        self.bid_price_L1 = 0.0
        self.bid_size_L1 = 0
        self.ask_price_L1 = 0.0
        self.ask_size_L1 = 0
        self.open_interest = 0
        self.open = 0.0
        self.high = 0.0
        self.low = 0.0
        self.pre_close = 0.0
        self.upper_limit_price = 0.0
        self.lower_limit_price = 0.0

    def __str__(self):
        return "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" % (
            str(self.timestamp.strftime("%H:%M:%S.%f")),
            str(datetime.now().strftime("%H:%M:%S.%f")), str(self.full_symbol),
            (self.tick_type), str(self.bid_size_L1), str(self.bid_price_L1),
            str(self.ask_price_L1), str(self.ask_size_L1), str(
                self.price), str(self.size))
コード例 #6
0
def get_next_trading_day_str(time: pd.Timestamp, shift_days: int = 1):
    """"""
    # TODO:
    # 顺延需要处理是否为交易日的详细数据(根据国内交易日历)
    # 倒推不需要,有时候是特殊情况导致新增假期
    check_detail = False
    if shift_days > 0:
        check_detail = True
    idx = 0
    shift_times = abs(shift_days)
    shift_delta = shift_days // shift_times
    while True:
        day_str = time.strftime(DATE_FORMAT)
        if calendar_manager.check_open(day_str, check_detail):
            idx += 1
        if idx >= shift_times:
            return day_str
        if shift_delta > 0 and time.day_of_week == 4:
            # friday to monday directly
            delta = 3
        elif shift_delta < 0 and time.day_of_week == 0:
            # monday to friday directly
            delta = -3
        else:
            delta = 1 * shift_delta
        time += datetime.timedelta(days=delta)
コード例 #7
0
ファイル: ib.py プロジェクト: weiguang-zz/stratege_engine
 def _req_history_ticks(self, code: str, start: Timestamp, end: Timestamp, nums: int, what_to_show: str,
                        use_rth: int,
                        ignore_size: bool, misc_options) -> List[HistoricalTickLast]:
     req = Request.new_request()
     contract = self.code_to_contract(code)
     self.cli.reqHistoricalTicks(req.req_id, contract,
                                 start.strftime("%Y%m%d %H:%M:%S") if start is not None else "",
                                 end.strftime("%Y%m%d %H:%M:%S"), nums, what_to_show,
                                 use_rth, ignore_size, misc_options)
     if req.condition.acquire():
         req.condition.wait(10)
     if not req.resp:
         raise RuntimeError("获取数据超时或者没有获取到数据")
     resp = req.resp
     Request.clear(req.req_id)
     return resp
コード例 #8
0
ファイル: data.py プロジェクト: aashish24/banet-1
 def process_one(self,
                 time: pd.Timestamp,
                 proc_funcs: list = [],
                 save=True,
                 **proc_funcs_kwargs):
     """This method defines a processing pipeline consisting of opening the file
     using the `open` method, applying each of the `proc_funcs` to the output of the previous
     and `save` the processed data using save method."""
     tstr = time.strftime('%Y%m%d')
     files = self.list_files(time)
     try:
         if len(files) > 0:
             data = self.open(files)
             proc_funcs = [BandsAssertShape()] + proc_funcs
             kwargs = {'cls': self, **proc_funcs_kwargs}
             for f in proc_funcs:
                 data = f(data, time, **kwargs)
             if save:
                 self.save(time, data)
             else:
                 return data
         else:
             warn(f'No files for {time}. Skipping to the next time.')
     except:
         msg = f'Unable to process files for {time}. Check if files are corrupted. Skipping to the next time. { sys.exc_info()[0]}'
         warn(msg, UserWarning)
コード例 #9
0
    def load_to_cache(self, start_time: pd.Timestamp, end_time: pd.Timestamp):
        """
        Метод загрузки данных в диапазоне (end_time - start_time) дней
        с сервера на файл формата CSV.
        start_time - Начальное время (тип: pd.Timestamp())
        end_time   - Конечное время (тип: pd.Timestamp())
        """
        # TODO: Переделать надо, так как за последние сутки bitmex вылаживает не полностью
        #assert end_time < (pd.Timestamp.today() - dt.timedelta(days=1))

        assert start_time < end_time

        if start_time.date() == end_time.date():
            if not self.check_cache(start_time):
                self.load_bar_day(start_time).to_csv(
                    path.join(self.path_cash, self.symbol, self.data_frequency,
                              start_time.strftime("%Y-%m-%d") + '.csv'))
        else:
            for day in pd.date_range(start_time,
                                     end_time,
                                     freq='D',
                                     closed='left'):
                if not self.check_cache(day):
                    self.load_bar_day(day).to_csv(
                        path.join(self.path_cash, self.symbol,
                                  self.data_frequency,
                                  day.strftime("%Y-%m-%d") + '.csv'))
コード例 #10
0
def insert_data(dt: pd.Timestamp, df: pd.DataFrame,
                db_conn: sqlite3.Connection):
    db_cursor = db_conn.cursor()

    bases = ["base252", "base360"]
    date = dt.strftime("%Y-%m-%d")

    for row in df.itertuples():
        doc = {
            "date": f"'{date}'",
            "duration": str(getattr(row, "duration")),
        }

        for base in bases:
            val = getattr(row, base)
            if math.isnan(val):
                continue
            doc[base] = str(val)

        if any([d in doc for d in bases]):
            columns = ", ".join(doc.keys())
            values = ", ".join(doc.values())
            query = f"INSERT INTO cdi ({columns}) VALUES ({values});"
            db_cursor.execute(query)

    db_conn.commit()
    db_cursor.close()
コード例 #11
0
 def list_files(self, time:pd.Timestamp) -> list:
     out = []
     if time.year in self.times.year:
         time = pd.Timestamp(f'{time.year}-01-01')
         time_pattern = time.strftime('_%Y_')
         files = self.paths.src.ls(recursive=True, include=['.tif', time_pattern],
                             exclude=['.xml'])
     return files
コード例 #12
0
ファイル: GUI.py プロジェクト: ArlenPP/Futures_Database
 def query(self):
     try:
         start = Timestamp(self.q[1].get())
         end = Timestamp(self.q[2].get())
         assert not isinstance(start, NaTType) and not isinstance(
             end, NaTType)
     except Exception as e:
         messagebox.showinfo('Wrong Format',
                             'Fail to convert into Timestamp')
     else:
         sDate = start.strftime('%Y-%m-%d')
         eDate = end.strftime('%Y-%m-%d')
         sDatetime = start.strftime('%Y-%m-%d %H:%M:%S')
         eDatetime = end.strftime('%Y-%m-%d %H:%M:%S')
         if self.q[3].get() == 'day':
             return self.read(sDate, eDate, True)
         else:
             return self.read(sDatetime, eDatetime, False)
コード例 #13
0
 def _log_and_update_pnl(self, pnl: float, symbol: Symbol,
                         timestamp: pd.Timestamp):
     self.pnl_history[symbol].append(pnl)
     self.cum_pnl[symbol] += pnl
     self.pnl_file.write(','.join([
         str(timestamp.strftime('%Y-%m-%dT%H:%M:%S')), symbol.name,
         str(pnl),
         str(self.cum_pnl[symbol])
     ]) + '\n')
コード例 #14
0
    def get_communication_subgraph(self, owner: str,
                                   dt: pd.Timestamp) -> pd.DataFrame:
        """
        Queries a subgraph from the timetree consisting of communication links between users in a specific timeframe.

        The timeframe length can be configured in the conf module.

        :param owner:   repository owner
        :param repo:    repository name
        :param dt:      timestamp indicating the end of the desired period for the subgraph query
        :return:        pd.DataFrame containing links between nodes in the subgraph
        """

        q_subgraph_time = '''            
            MATCH (o:OWNER{login:$l_owner})

            WITH
                o,
                apoc.date.parse($l_dt, 'ms', 'yyyy-MM-dd') as end
            WITH
                o, 
                end, 
                apoc.date.add(end, 'ms', $l_tf_length , 'd') as start


            MATCH (node:COMMENT) -[:to]-> () -[:to]-> (r) -[:belongs_to]-> (o)
            WHERE node.event_time >= start AND node.event_time <= end
            WITH node as comment 

            MATCH (source:USER) -[:makes]-> (comment) -[x]-> (target:USER)
            WHERE id(source) <> id(target)
            
            WITH DISTINCT 
                source, 
                target          

            RETURN 
            id(source) as source,
            id(target) as target
        '''

        links = pd.DataFrame(
            self.graph.data(q_subgraph_time,
                            parameters={
                                "l_owner": owner,
                                "l_tf_length": (-1 * conf.a_length_timeframe),
                                "l_dt": dt.strftime("%Y-%m-%d")
                            }))

        if conf.a_filter_core and not links.empty:
            dev_core = self.get_dev_core(owner)

            links = links[(links['source'].isin(dev_core['u_id'])
                           & links['target'].isin(dev_core['u_id']))]

        return links
コード例 #15
0
 def map_platform_to_schema(self, event, band, mbid, other):
     concertdate = Timestamp(dateparse(event["start"]["date"]).date())
     return {
         "titel":
         event["displayName"].strip().rstrip(
             concertdate.strftime("%B %d, %Y")),
         "titel_generated":
         event["displayName"].strip().rstrip(
             concertdate.strftime("%B %d, %Y")),
         "datum":
         concertdate,
         "einddatum":
         Timestamp(dateparse(event["end"]["date"]).date())
         if "end" in event else None,
         "artiest":
         other["artist_name"],
         "artiest_id":
         "songkick_" + str(other["artist_id"]),
         "artiest_mb_naam":
         band,
         "artiest_mb_id":
         mbid,
         "stad":
         ",".join([
             i.strip() for i in event["location"]["city"].split(",")[0:-1]
         ]),
         "land":
         event["location"]["city"].split(",")[-1].strip(),
         "venue":
         event["displayName"].strip() if event["type"] == "Festival" else
         event["venue"]["displayName"].strip(),
         "latitude":
         event["venue"]["lat"],
         "longitude":
         event["venue"]["lng"],
         "source":
         self.platform,
         "event_id":
         "songkick_" + str(event["id"]),
         "event_type":
         event["type"].lower()
     }
コード例 #16
0
def download_data(date, resolution="high"):
    """Short summary.

    Parameters
    ----------
    date : type
        Description of parameter `date`.
    resolution : type
        Description of parameter `resolution`.

    Returns
    -------
    type
        Description of returned object.

    """
    import ftplib
    from datetime import datetime

    from pandas import DatetimeIndex

    if isinstance(date, datetime) or isinstance(date, DatetimeIndex):
        year = date.strftime("%Y")
        yyyymmdd = date.strftime("%Y%m%d")
    else:
        from pandas import Timestamp

        date = Timestamp(date)
        year = date.strftime("%Y")
        yyyymmdd = date.strftime("%Y%m%d")
        # npp_eaot_ip_gridded_0.25_20181222.high.nc
    # print(year, yyyymmdd)
    file = f"npp_eaot_ip_gridded_0.25_{yyyymmdd}.high.nc"
    exists = os.path.isfile(file)
    if ~exists:
        ftp = ftplib.FTP(server)
        ftp.login()
        ftp.cwd(base_dir + year)
        ftp.retrbinary("RETR " + file, open(file, "wb").write)
    else:
        print(f"File Already Exists! Reading: {file}")
    return file, date
コード例 #17
0
    def check_cache(self, day: pd.Timestamp):
        """
        Метод проверки на существование за кешированных данных.
        day - Дата
        TODO: Не реализованно проверка на полноту данных в файле.
        """

        pt = path.join(self.path_cash, self.symbol, self.data_frequency,
                       day.strftime("%Y-%m-%d") + '.csv')

        return path.exists(pt)
コード例 #18
0
 def show(self, date: pd.Timestamp):
     d = self.data.loc[date]
     if isinstance(d, pd.DataFrame):
         if len(d) > 1:
             raise ValueError(f'"{date}" matches more than one date')
         d = d.stack("age_group")
     for bar, h in zip(self.bars, d):
         bar.set_height(h)
     formatted_date = date.strftime("%d %B %Y")
     self.date_text.set_text(formatted_date)
     self.update_labels()
     return self.artists
コード例 #19
0
 def send_data(self, timestamp: pd.Timestamp, power):
     msg = json.dumps({
         'topic': 'data',
         'payload': {
             'timestamp': timestamp.strftime(_TIMESTAMP_FORMAT),
             'power': power
         }
     })
     self.channel.basic_publish(exchange='',
                                routing_key='tmhchallenge',
                                body=msg)
     print(f"[x] Sent '{msg}'")
コード例 #20
0
 def list_files(self, time:pd.Timestamp) -> list:
     out = []
     if time in self.times:
         time = pd.Timestamp(f'{time.year}-{time.month}-01')
         time_pattern = time.strftime('%Y%m%d')
         files = self.paths.src.ls(recursive=True, include=['JD.tif', time_pattern],
                             exclude=['.xml'])
         # Find windows joint with region bounding box
         for f in files:
             data = open_tif(f)
             if not disjoint_bounds(data.bounds, self.region.bbox):
                 out.append(f)
     return out
コード例 #21
0
def download_data(date, ftype='meanFRP'):
    import requests as rq
    from datetime import datetime
    from numpy import arange
    if isinstance(date, datetime):
        year = date.strftime('%Y')
        yyyymmdd = date.strftime('%Y%m%d')
    else:
        from pandas import Timestamp
        date = Timestamp(date)
        year = date.strftime('%Y')
        yyyymmdd = date.strftime('%Y%m%d')
    url_ftype = "&files={}.".format(ftype)
    for i in arange(1, 7, dtype=int).astype(str):
        tile = ".FV3C384Grid.tile{}.bin".format(i)
        url = "{}{}{}{}{}".format(base_dir, yyyymmdd, url_ftype, yyyymmdd,
                                  tile)
        fname = "{}.{}.FV3.C384Grid.tile{}.bin".format(ftype, yyyymmdd, i)
        print('Retrieving file:', fname)
        r = rq.get(url)
        with open(fname, 'wb') as f:
            f.write(r.content)
コード例 #22
0
def process_quality_control(rid: int, date: pd.Timestamp,
                            outpath: str) -> None:
    """
    Driver for processing the quality control. Find the data for given radar ID
    and dates on GADI. Unzip it and send it to the radar_qcchecks function.
    Also takes care of saving the output results into a daily csv file.

    Parameters:
    ===========
    rid: int
        Radar rapic ID.
    date: pd.Timestamp
        Date to process.
    outpath: str
        Output path directory.
    """
    datestr = date.strftime("%Y%m%d")
    fname = f"{rid}_stats_{datestr}.csv"
    fname = os.path.join(outpath, fname)
    if os.path.isfile(fname):
        print("Output file already exists. Doing nothing.")
        return None

    inzip = get_radar_archive_file(date, rid)
    if inzip is None:
        print(f"Couldn't get zip archive for {date} and radar {rid}.")
        return None

    flist = extract_zip(inzip)
    print(f"Found {len(flist)} files for radar {rid} on the {datestr}.")
    try:
        bag = db.from_sequence(flist).map(radar_qcchecks.qccheck_radar_odim)
        rslt = bag.compute()

        rslt = [r for r in rslt if r is not None]
        if len(rslt) == 0:
            raise ValueError("No rain today.")

        df = pd.DataFrame(rslt[0], index=[0])
        for r in rslt[1:]:
            df = df.append(r, ignore_index=True)

        df.set_index("time").to_csv(fname, float_format="%g")
        print(f"{fname} saved.")
    except Exception:
        traceback.print_exc()
    finally:
        remove(flist)

    gc.collect()
    return None
コード例 #23
0
def craft_and_send_email(t1, t2, stations, rms, lvlv, alarm_name, filename):
    from pandas import Timestamp

    # create the subject line
    subject = '--- {} ---'.format(alarm_name)

    # create the text for the message you want to send
    message = 'Start: {} (UTC)\nEnd: {} (UTC)\n\n'.format(
        t1.strftime('%Y-%m-%d %H:%M'), t2.strftime('%Y-%m-%d %H:%M'))
    t1_local = Timestamp(t1.datetime, tz='UTC')
    t2_local = Timestamp(t2.datetime, tz='UTC')
    t1_local = t1_local.tz_convert('US/Alaska')
    t2_local = t2_local.tz_convert('US/Alaska')
    message = '{}Start: {} ({})'.format(message,
                                        t1_local.strftime('%Y-%m-%d %H:%M'),
                                        t1_local.tzname())
    message = '{}\nEnd: {} ({})\n\n'.format(
        message, t2_local.strftime('%Y-%m-%d %H:%M'), t2_local.tzname())

    a = np.array([''] * len(rms[:-1]))
    a[np.where(rms > lvlv)] = '*'
    sta_message = ''.join(
        '{}{}: {:.0f}/{}\n'.format(sta, a[i], rms[i], lvlv[i])
        for i, sta in enumerate(stations[:-1]))
    sta_message = ''.join([
        sta_message, '\nArrestor: {} {:.0f}/{}'.format(stations[-1], rms[-1],
                                                       lvlv[-1])
    ])
    message = ''.join([message, sta_message])

    utils.send_alert(alarm_name, subject, message, filename)
    # utils.post_mattermost(subject,message,filename)
    utils.post_mattermost(subject, message, alarm_name, filename)
    # delete the file you just sent
    if filename:
        remove(filename)
コード例 #24
0
def download_data(date, resolution='high'):
    import ftplib
    from datetime import datetime
    if isinstance(date, datetime):
        year = date.strftime('%Y')
        yyyymmdd = date.strftime('%Y%m%d')
    else:
        from pandas import Timestamp
        date = Timestamp(date)
        year = date.strftime('%Y')
        yyyymmdd = date.strftime('%Y%m%d')
    if resolution is 'high':
        file = 'npp_aot550_edr_gridded_0.10_{}.high.bin.gz'.format(yyyymmdd)
    else:
        file = 'npp_aot550_edr_gridded_0.25_{}.high.bin.gz'.format(yyyymmdd)
    ftp = ftplib.FTP(server)
    ftp.login()
    # print(base_dir)
    # print(year)
    # print(base_dir + year)
    ftp.cwd(base_dir + year)
    # print(file)
    ftp.retrbinary("RETR " + file, open(file, 'wb').write)
    return file, date
コード例 #25
0
ファイル: dataset.py プロジェクト: Alex-AH/banet
 def process_one(self, time:pd.Timestamp, proc_funcs:list=[], save=True, **proc_funcs_kwargs):
     tstr = time.strftime('%Y%m%d')
     files = self.list_files(time)
     try:
         if len(files) > 0:
             data = self.open(files)
             proc_funcs = [BandsAssertShape()] + proc_funcs
             kwargs = {'cls': self, **proc_funcs_kwargs}
             for f in proc_funcs:
                 data = f(data, time, **kwargs)
             if save: 
                 self.save(time, data)   
             else: return data
         else: 
             warn(f'No files for {time}. Skipping to the next time.')
     except: warn(f'Unable to process files for {time}. Check if files are corrupted. Skipping to the next time.')
コード例 #26
0
 def _request_history_companies(self, trade_date: pd.Timestamp, use_cache: bool = True) -> pd.DataFrame:
     trade_date = trade_date.strftime("%Y-%m-%d")
     cache_path = self.cache_dir.joinpath(f"{trade_date}_history_companies.pkl")
     if cache_path.exists() and use_cache:
         df = pd.read_pickle(cache_path)
     else:
         url = self.HISTORY_COMPANIES_URL.format(trade_date=trade_date)
         resp = requests.post(url)
         if resp.status_code != 200:
             raise ValueError(f"request error: {url}")
         df = pd.DataFrame(resp.json()["aaData"])
         df[self.DATE_FIELD_NAME] = trade_date
         df.rename(columns={"Name": "name", "Symbol": self.SYMBOL_FIELD_NAME}, inplace=True)
         if not df.empty:
             df.to_pickle(cache_path)
     return df
コード例 #27
0
def generate_date_feature(date: pd.Timestamp):
    """
    Generate date features.
    """
    result = pd.Series()
    result['timestamp'] = date.timestamp()

    format_string = '%Y-%m-%d'
    dt_string = date.strftime(format_string)
    result['holiday'] = int(
        (dt_string in holiday)
        or (date.weekday() in [5, 6] and dt_string not in work))

    result = pd.concat(
        [result, get_onehot(date.weekday(), 0, 6, name='weekday')])
    return result
コード例 #28
0
ファイル: test_plot.py プロジェクト: mac-kim/causalimpact
def test_plot_cumulative_panel_date_index_no_freq(date_rand_data,
                                                  pre_str_period,
                                                  post_str_period,
                                                  monkeypatch):
    ci = CausalImpact(date_rand_data, pre_str_period, post_str_period)
    dd = date_rand_data.copy()
    dd.drop(dd.index[10:20])
    ax_mock = mock.Mock()
    plotter_mock = mock.Mock()
    plotter_mock.subplot.return_value = ax_mock
    plot_mock = mock.Mock(return_value=plotter_mock)
    monkeypatch.setattr(plot.Plot, '_get_plotter', plot_mock)

    ci.plot(panels=['cumulative'])
    plot_mock.assert_called_once()
    plotter_mock.figure.assert_called_with(figsize=(15, 12))
    plotter_mock.subplot.assert_any_call(1, 1, 1, sharex=ax_mock)
    ax_args = ax_mock.plot.call_args

    inferences = ci.inferences.iloc[1:, :]

    assert_array_equal(inferences['post_cum_effects'], ax_args[0][0])
    assert ax_args[0][1] == 'b--'
    assert ax_args[1] == {'label': 'Cumulative Effect'}

    date_ = datetime.strptime(ci.post_period[0], "%Y%m%d")
    date_ = date_ + timedelta(days=-1)
    date_ = Timestamp(date_.strftime("%Y-%m-%d %H:%M:%S"))
    ax_mock.axvline.assert_called_with(date_, c='k', linestyle='--')

    ax_args = ax_mock.fill_between.call_args_list[0]
    assert_array_equal(ax_args[0][0], inferences['post_cum_effects'].index)
    assert_array_equal(ax_args[0][1], inferences['post_cum_effects_lower'])
    assert_array_equal(ax_args[0][2], inferences['post_cum_effects_upper'])
    assert ax_args[1] == {
        'facecolor': 'blue',
        'interpolate': True,
        'alpha': 0.25
    }

    ax_mock.axhline.assert_called_with(y=0, color='k', linestyle='--')

    ax_mock.grid.assert_called_with(True, linestyle='--')
    ax_mock.legend.assert_called()

    plotter_mock.show.assert_called_once()
コード例 #29
0
def get_df_from_epex(start: pd.Timestamp, country='FR'):
    """This will take the start date timestamp, returns a dataframe of the week that ends in that day along with the
    time stamp for the next day in the past (the day start - 8)."""

    # Get the url
    url_init = 'https://www.epexspot.com/en/market-data/dayaheadauction/auction-table/'
    url_var = start.strftime('%Y-%m-%d')
    # url_end = '/FR'
    url = url_init + url_var

    # Get the response :
    page = requests.get(url)

    # BeautifulSoup object :
    soup = BeautifulSoup(page.content, 'html.parser')

    # Use the selector
    soup_list_1 = soup.select('table.list.hours.responsive tr.no-border')

    # Data for France, Originally, we expected to find only 24 'tr'-type objects representing 24 rows of data per day,
    # but we got 72 instead. This is because the page also includes data for 2 additional countries (Germany & Switzerland).
    # All 3 countries' data are put in different tabs so that at the first glance we cant observe it

    if country == 'FR':
        country_tag = soup_list_1[:24]
    elif country == 'DE/AT':
        country_tag = soup_list_1[24:24*2]
    elif country == 'CH':
        country_tag = soup_list_1[24*2:]
    else:
        raise ValueError('The country code is not recognized: must be \'FR\', \'DE/AT\' or \'CH\' ')

    country_dict = {}

    for hour in country_tag:
        ex = hour.find_all('td')
        ex_list = [elem.text for elem in ex]
        name_hour = ex_list[0].replace(" ", "")[1:3]  # strip the whitespaces, take the first number as the hour column
        country_dict[name_hour] = ex_list[:-8:-1]  # Be careful with the \n commands

    week_interval = pd.DatetimeIndex(start=start, freq='-1D', periods=7)
    epex_tempo = pd.DataFrame(index=week_interval, data=country_dict)
    next_day_stamp = (week_interval[-1] - pd.to_timedelta(1, unit='D'))

    return epex_tempo, next_day_stamp
コード例 #30
0
ファイル: ib.py プロジェクト: weiguang-zz/stratege_engine
 def req_history_data(self, code: str, end_date_time: Timestamp, duration_str, bar_size, what_to_show,
                      use_rth: int, format_date: int, keep_up_to_date, char_options) -> List[BarData]:
     req = Request.new_request()
     contract = self.code_to_contract(code)
     self.cli.reqHistoricalData(req.req_id, contract,
                                end_date_time.strftime("%Y%m%d %H:%M:%S") if end_date_time else "",
                                duration_str, bar_size,
                                what_to_show, use_rth, format_date, keep_up_to_date, char_options)
     if req.condition.acquire():
         req.condition.wait(20)
     if not req.resp:
         self.cli.cancelHistoricalData(req.req_id)
         raise RuntimeError("获取数据超时或者没有获取到数据")
     resp = req.resp
     # 清理数据
     Request.clear(req.req_id)
     # 返回排好序的数据
     return sorted(resp, key=lambda bar: bar.date)
コード例 #31
0
ファイル: nwis.py プロジェクト: Geosyntec/dockside
class Station(object):
    """ USGS Station and download helpers

    Parameters
    ----------
    site : int, string, or sequence
        Site ID number from NWIS. E.g, site = 14211500 or site = '14211500' for
        Johnson Creek in Portland, OR. This can also be a list-like object for
        multiple sites (experimental).
    start, end : string or date-like
        Start and end dates for the period of interest.
    savepath : path-like
        Path to where data would be saved when using the `get_data` method.

    """

    def __init__(self, site, start, end, savepath='data'):
        self.site = site
        self.start = Timestamp(start)
        self.end = Timestamp(end)
        self.savepath = Path('.' or savepath)

        self._daily_json = None
        self._insta_json = None
        self._daily_data = None
        self._insta_data = None

    def _make_fpath(self, daily):
        datefmt = '%Y%m%d'
        suffix = 'daily' if daily else 'insta'
        fname = "_".join([
            f"{self.site}",
            self.start.strftime(datefmt),
            'thru',
            self.end.strftime(datefmt),
            suffix,
        ])
        return self.savepath / (fname + '.csv')

    @property
    def daily_json(self):
        if self._daily_json is None:
            self._daily_json = fetch_nwis(self.site, self.start, self.end,
                                          daily=True).json()
        return self._daily_json

    @property
    def insta_json(self):
        if self._insta_json is None:
            self._insta_json = fetch_nwis(self.site, self.start, self.end,
                                          daily=False).json()
        return self._insta_json

    @property
    def daily_data(self):
        if self._daily_data is None:
            self._daily_data = read_nwis(self.daily_json, daily=True)
        return self._daily_data

    @property
    def insta_data(self):
        if self._insta_data is None:
            self._insta_data = read_nwis(self.insta_json, daily=False)
        return self._insta_data

    def get_data(self, daily=False, save=False, force=False):
        """
        Fetch and save data for the site.

        Parameters
        ----------
        daily : bool (default False)
            Toggles fetching either instaneous (False) or daily values (True).
        save : bool (defaut False)
            Toggles saving the downloaded data to `site.savepath`
        force : bool (defaut False)
            If True and the data has already been downloaded and save, this
            will force the redownloading of the data.

        Returns
        -------
        pandas.DataFrame

        Note
        ----
        Unless readying from a cache, this method *always* redownloads the data,
        even with multiple calls, instead of relying on the `daily_data` and
        `insta_data` properties of the class.

        """

        fpath = self._make_fpath(daily=daily)

        if not fpath.exists() or force:
            df = read_nwis(self.site, self.start, self.end, daily=daily)
            if save:
                df.to_csv(fpath, encoding='utf-8')
        else:
            df = read_cache(fpath, daily=daily)
        return df