def get_data(self, state: Union[datetime.date, datetime.datetime] = None): if self._loaded_data is None: ds = Dataset(self._data_set) self._loaded_data = ds.get_data(self._min_date or state, self._max_date or state, assetId=(self._asset_id, )) return self._loaded_data[self._value_header]
def get_data(self, state): if self._loaded_data is None: ds = Dataset(self._data_set) self._loaded_data = ds.get_data(self._min_date or state, self._max_date or state, assetId=(self._asset_id, )) return self._loaded_data[self]
def populate_values(self, dataset, value_column, underlier_column): ds = Dataset(dataset) query = ds.get_data(start=self.date, end=self.date, assetId=[self.id]) if len(query) > 0: for node in self.direct_underlier_assets_as_nodes: value = query.loc[query[underlier_column] == node.id][value_column].iloc[0] node.data[value_column] = value node.populate_values(dataset, value_column, underlier_column)
def test_get_coverage(mocker): mocker.patch("gs_quant.api.gs.data.GsDataApi.get_coverage", return_value=test_coverage_data) mocker.patch("gs_quant.api.gs.data.GsDataApi.get_types", return_value={'gsid': 'string'}) data = Dataset(Dataset.TR.TREOD).get_coverage() assert data.equals( GsDataApi.construct_dataframe_with_types(str(Dataset.TR.TREOD), test_coverage_data))
def holidays(self) -> set: if self.__calendars and not self.__holidays_loaded: dataset = Dataset(Dataset.GS.HOLIDAY) data = dataset.get_data(exchange=self.__calendars, start=self.DATE_LOW_LIMIT, end=self.DATE_HIGH_LIMIT) if not data.empty: self.__holidays.update(data.index.values.astype('datetime64[D]')) self.__holidays_loaded = True return self.__holidays
def get_fx_spot_series(self) -> pd.Series: ds = Dataset('WMFXSPOT') coverage = ds.get_coverage() cross = self.currency + '/' + self.source_asset.currency asset_id = coverage[coverage['name'] == cross]['assetId'].values[0] time_series = ds.get_data(assetId=asset_id, start=dt.date(1970, 1, 1))['midPrice'] time_series = time_series[~time_series.index.duplicated(keep='last')] time_series.name = cross return time_series
def test_last_data(mocker): mocker.patch("gs_quant.api.gs.data.GsDataApi.last_data", return_value=[test_data[-1]]) mocker.patch("gs_quant.api.gs.data.GsDataApi.get_types", return_value=test_types) dataset = Dataset(Dataset.TR.TREOD) data = dataset.get_data_last(dt.date(2019, 1, 9), assetId='MA4B66MW5E27U8P32SB') assert data.equals( GsDataApi.construct_dataframe_with_types(str(Dataset.TR.TREOD), ([test_data[-1]])))
def _remote_load(self) -> DataFrame: dataset = Dataset( 'COVID19_COUNTRY_DAILY_WHO') # initialize the dataset try: frame = dataset.get_data(countryId='US', start=date( 2019, 1, 1)) # pull the US data into a Pandas dataframe except MqRequestError: frame = DataFrame() frame.reset_index(inplace=True) return frame
def get_data(self, state: Union[datetime.date, datetime.datetime] = None): if self._loaded_data is None: ds = Dataset(self._data_set) if self._min_date: self._loaded_data = ds.get_data(self._min_date, self._max_date, assetId=(self._asset_id, )) else: return ds.get_data( state, state, assetId=(self._asset_id, ))[self._value_header] return self._loaded_data[self._value_header].at[pd.to_datetime(state)]
def __get_direct_underliers(self, asset_id, dataset) -> pd.DataFrame: """ Queries the dataset for the date passed during initialisation. If date isn't passed, returns the data of the latest available date. """ ds = Dataset(dataset) if self.date: query = ds.get_data(start=self.date, end=self.date, assetId=[asset_id]).drop_duplicates() else: query = ds.get_data(assetId=[asset_id]).drop_duplicates() if len(query) > 0: self.date = query.index.max().date() query = query[query.index == query.index.max()].reset_index() return query
def test_get_data_series(mocker): field_value_maps = test_data mocker.patch("gs_quant.data.utils.get_types", return_value=test_types) mocker.patch.object(GsDataApi, 'query_data', return_value=field_value_maps) mocker.patch.object(GsDataApi, 'symbol_dimensions', return_value=('assetId',)) dataset = Dataset(Dataset.TR.TREOD) series = dataset.get_data_series('tradePrice', dt.date(2019, 1, 2), dt.date(2019, 1, 9), assetId='MA4B66MW5E27U8P32SB') df = pd.DataFrame(test_data) index = pd.to_datetime(df.loc[:, 'date'].values) expected = pd.Series(index=index, data=df.loc[:, 'tradePrice'].values) expected = expected.rename_axis('date') pd.testing.assert_series_equal(series, expected)
def test_get_coverage(mocker): mocker.patch("gs_quant.api.gs.data.GsDataApi.get_coverage", return_value=test_coverage_data) mocker.patch("gs_quant.api.gs.data.GsDataApi.get_types", return_value={'gsid': 'string'}) data = Dataset(Dataset.TR.TREOD).get_coverage() results = test_coverage_data["results"] gsid = GsDataApi.construct_dataframe_with_types(str(Dataset.TR.TREOD), results).get('gsid').get(0) assert data["results"][0]["gsid"] == gsid
def get_datasets(datasets): ds_dict = {} for dataset in datasets: try: df = Dataset(dataset).get_data(datetime.date(2020, 6, 24), datetime.datetime.today().date()) keys = [x for x in ['countryId', 'subdivisionId'] if x in df.columns] + ['date'] val_map = {'newConfirmed': 'totalConfirmed', 'newFatalities': 'totalFatalities'} vals = [x for x in list(val_map.keys()) if x in df.columns] df_t = df.groupby(keys).sum().groupby(level=0).cumsum().reset_index()[keys + vals].rename(columns=val_map) ds_dict[dataset] = df.reset_index().merge(df_t, on=keys, suffixes=('', '_y')).set_index('date') except Exception as err: print(f'Failed to obtain {dataset} with {getattr(err,"message",repr(err))}') return ds_dict
def volatility_screen(crosses, start_date, end_date, tenor='3m', plot=True): fxspot_dataset, fxvol_dataset = Dataset('FXSPOT'), Dataset('FXIMPLIEDVOL') spot_data, impvol_data, spot_fx, data = {}, {}, {}, {} for cross in crosses: spot = fxspot_dataset.get_data(start_date, end_date, bbid=cross)[[ 'spot' ]].drop_duplicates(keep='last') spot_fx[cross] = spot['spot'] spot_data[cross] = volatility(spot['spot'], tenor) # realized vol vol = fxvol_dataset.get_data(start_date, end_date, bbid=cross, tenor=tenor, deltaStrike='DN', location='NYC')[['impliedVolatility']] impvol_data[cross] = vol.drop_duplicates(keep='last') * 100 spdata, ivdata = format_df(spot_data), format_df(impvol_data) diff = ivdata.subtract(spdata).dropna() for cross in crosses: data[cross] = { 'Spot': last_value(spot_fx[cross]), '{} Implied'.format(tenor): last_value(ivdata[cross]), '{} Realized'.format(tenor): last_value(spdata[cross]), 'Diff': last_value(diff[cross]), 'Historical Implied Low': min(ivdata[cross]), 'Historical Implied High': max(ivdata[cross]), '%-ile': last_value(percentiles(ivdata[cross])) } df = pd.DataFrame(data) vol_screen = df.transpose() st.write(st.dataframe(vol_screen.style.highlight_max(axis=0))) if plot: for fx in vol_screen.index: plt.scatter(vol_screen.loc[fx]['%-ile'], vol_screen.loc[fx]['Diff']) plt.legend(vol_screen.index, loc='best', bbox_to_anchor=(0.9, -0.13), ncol=3) plt.xlabel('Percentile of Current Implied Vol') plt.ylabel('Implied vs Realized Vol') plt.title('Entry Point vs Richness') st.pyplot(plt) return
def test_data_series_format(mocker): start = dt.date(2019, 1, 2) end = dt.datetime(2019, 1, 9) df = pd.DataFrame(test_data) index = pd.to_datetime(df.loc[:, 'date'].values) expected = pd.Series(index=index, data=df.loc[:, 'tradePrice'].values) expected = expected.rename_axis('date') # mock GsSession and data response mocker.patch.object(GsSession.__class__, 'default_value', return_value=GsSession.get(Environment.QA, 'client_id', 'secret')) mock_response = {'requestId': 'qwerty', 'data': test_data} mocker.patch.object(GsSession.current, '_post', side_effect=lambda *args, **kwargs: mock_response) mocker.patch.object(GsDataApi, 'symbol_dimensions', return_value=('assetId', )) mocker.patch("gs_quant.api.gs.data.GsDataApi.get_types", return_value=test_types) actual = Dataset('TREOD').get_data_series(field='tradePrice', start=start, end=end, assetId='MA4B66MW5E27U8P32SB') pd.testing.assert_series_equal(actual, expected) assert len(GsSession.current._post.mock_calls) == 1 name, args, kwargs = GsSession.current._post.mock_calls[0] assert kwargs['payload'].format == Format.MessagePack assert kwargs['request_headers'] == {'Accept': 'application/msgpack'} assert args[0] == '/data/TREOD/query' GsSession.current._post.reset_mock() actual = Dataset('TREOD').get_data_series(field='tradePrice', start=start, end=end, assetId='MA4B66MW5E27U8P32SB', format=Format.Json) pd.testing.assert_series_equal(actual, expected) assert len(GsSession.current._post.mock_calls) == 1 name, args, kwargs = GsSession.current._post.mock_calls[0] assert kwargs['payload'].format == Format.Json assert 'request_headers' not in kwargs assert args[0] == '/data/TREOD/query'
def build_eq_vol_scenario_eod( asset_name: str, source_dataset: str, ref_spot: float = None, asset_name_type: AssetIdentifier = AssetIdentifier.REUTERS_ID, vol_date: date = date.today() ) -> MarketDataVolShockScenario: asset = SecurityMaster.get_asset(asset_name, asset_name_type) vol_dataset = Dataset(source_dataset) vol_data = vol_dataset.get_data(assetId=[asset.get_marquee_id()], strikeReference='forward', startDate=vol_date, endDate=vol_date) asset_ric = asset.get_identifier(AssetIdentifier.REUTERS_ID) return MarketDataVolShockScenario.from_dataframe(asset_ric, vol_data, ref_spot)
def build_eq_vol_scenario_intraday( asset_name: str, source_dataset: str, ref_spot: float = None, asset_name_type: AssetIdentifier = AssetIdentifier.REUTERS_ID, start_time: datetime = datetime.now() - timedelta(hours=1), end_time: datetime = datetime.now() ) -> MarketDataVolShockScenario: asset = SecurityMaster.get_asset(asset_name, asset_name_type) vol_dataset = Dataset(source_dataset) vol_data = vol_dataset.get_data(assetId=[asset.get_marquee_id()], strikeReference='forward', startTime=start_time, endTime=end_time) asset_ric = asset.get_identifier(AssetIdentifier.REUTERS_ID) return MarketDataVolShockScenario.from_dataframe(asset_ric, vol_data, ref_spot)
def get_time_series(self): if not self._time_series: data = Dataset('STSLEVELS').get_data(assetId=self.asset_id, start=dt.date(1970, 1, 1)) self._time_series = pd.Series() if data.size > 0: self._time_series = data['closePrice'] self._time_series = self._time_series[~self._time_series.index. duplicated(keep='last')] self._time_series.name = self.asset_name return self._time_series
def main() -> None: # df = pd.read_json(filename) GsSession.use( Environment.PROD, '77d7c80dec0b44e9868dfaa3a7e2cb36', '4edbc70b2249de3ddc9f303bb373575cb06839fb6857570648fdb772ccf8e377', ('read_product_data', )) ds_ecdc = Dataset('COVID19_COUNTRY_DAILY_ECDC') ds_who = Dataset('COVID19_COUNTRY_DAILY_WHO') data_who = ds_who.get_data(datetime.date(2020, 1, 21), countryId=["US", "GB", "IN", "BR", "NG", "NZ"]) df_ecdc = ds_ecdc.get_data(start=datetime.date(2019, 12, 31), end=datetime.date(2020, 6, 18), countryId=["US", "GB", "BR", "NZ", "IN", "NG"]) df_ecdc["casePopulation"] = df_ecdc["newConfirmed"] / df_ecdc["population"] df_ecdc['rateOfChange'] = ( df_ecdc['newConfirmed'] - df_ecdc['newConfirmed'].shift()) / df_ecdc['newConfirmed'].shift() df_ecdc['rateOfChange'] = df_ecdc['rateOfChange'].fillna(0) print(data_who) app = render_app(df_ecdc, data_who) app.run_server(port=2000)
def fci(country_id: str, measure: _FCI_MEASURE = _FCI_MEASURE.FCI, *, source: str = None, real_time: bool = False, request_id: Optional[str] = None) -> pd.Series: """ Daily Financial Conditions Index (FCI) for each of the world's large economies and many smaller ones, as well as aggregate FCIs for regions. :param country_id: id of country/region :param measure: FCI metric to retrieve :param source: name of function caller :param real_time: whether to retrieve intraday data instead of EOD :param request_id: server request id :return: FCI metric value """ if real_time: raise NotImplementedError('real-time FCI data is not available') type_ = QueryType(inflection.titleize(measure.value)) if (measure == _FCI_MEASURE.REAL_FCI or measure == _FCI_MEASURE.REAL_TWI_CONTRIBUTION): ds = Dataset('FCI') df = ds.get_data(geographyId=country_id) if (measure == _FCI_MEASURE.REAL_FCI): measure = 'realFCI' else: measure = 'realTWIContribution' series = ExtendedSeries(dtype=float) if ( measure not in df.columns) else ExtendedSeries(df[measure]) series.dataset_ids = ('FCI', ) return series q = GsDataApi.build_market_data_query([country_id], query_type=type_, source=source, real_time=real_time) df = _market_data_timed(q, request_id) return _extract_series_from_df(df, type_, True)
def plottovix(countryId): dataset='COVID19_COUNTRY_DAILY_WHO' frame = Dataset(dataset).get_data(start_date=datetime.date(2020, 1, 1), countryId=countryId) vix = pd.read_csv('./vix.csv', index_col='Date') vix.index = pd.to_datetime(vix.index) fig,ax = plt.subplots() fig.set_size_inches(18.5, 10.5) # make a plot ax.plot(vix['Adj Close'], color="red", marker="o") ax.set_xlabel("Date",fontsize=14) ax.set_ylabel("VIX",color="red",fontsize=14) ax2=ax.twinx() ax2.plot(frame['newConfirmed'],color="blue",marker="o") ax2.set_ylabel("New Cases",color="blue",fontsize=14) plt.show()
# In[42]: ind20 = frame[frame['% change'] > .25].index vixcop = vix.copy() vix.reindex(ind)['% change'].mean() # In[49]: coverage = Dataset(dataset).get_coverage() # In[50]: print(coverage) # In[77]: print(coverage['countryId']) print(Dataset(dataset).get_data(start_date=datetime.date(2020, 1, 1), countryId='NI'))
import plotly.express as px from dash import dash from gs_quant.session import GsSession, Environment from gs_quant.data import Dataset from datetime import date from credentials.config import GSConfig import numpy as np import pandas as pd GsSession.use(client_id=GSConfig.client_id, client_secret=GSConfig.client_secret, scopes=('read_product_data', )) dataset = Dataset('COVID19_COUNTRY_DAILY_WHO') df = dataset.get_data(countryId='US', start=date(2019, 1, 1)) app = dash.Dash(__name__) app.layout = dash_table.DataTable( id='table', columns=[{ "name": i, "id": i } for i in df.columns], data=df.to_dict('records'), ) if __name__ == '__main__': app.run_server(debug=True)
""" BROKEN """ import datetime from credentials.config import GSConfig from gs_quant.data import Dataset from gs_quant.session import GsSession, Environment GsSession.use(Environment.PROD, GSConfig.client_id, GSConfig.client_secret, ('read_product_data',)) ds = Dataset('COVID19_COUNTRY_DAILY_CDC') data = ds.get_data(start=datetime.date(2020, 1, 21), countryId="US") print(data.head()) # peek at first few rows of data
import datetime from gs_quant.data import Dataset from gs_quant.session import GsSession, Environment GsSession.use( Environment.PROD, '77d7c80dec0b44e9868dfaa3a7e2cb36', '4edbc70b2249de3ddc9f303bb373575cb06839fb6857570648fdb772ccf8e377', ('read_product_data', )) ds = Dataset('COVID19_COUNTRY_DAILY_WIKI') # data = ds.get_data() # print(data) # peek at first few rows of data data = ds.get_data(start=datetime.date(2019, 1, 20), countryId=["US", "GB", "BR", "NZ", "IN"]) print(data) # data.reset_index(inplace=True) # data.to_json(r'wiki.json')
import datetime from datetime import date import json import os import pandas as pd import heapq from gs_quant.data import Dataset from gs_quant.session import GsSession, Environment GsSession.use( Environment.PROD, 'b16a94fab7714a61b29065f6d6bda51b', '2179ad8fec38bbe8995f4d07293f9b476476dbef67b99f3a4074099de3fff049', ('read_product_data', )) ds = Dataset('COVID19_COUNTRY_DAILY_WHO') today = date.today() countries = ds.get_data(today)[['countryName', 'countryId']].drop_duplicates() country_id_name_dict = {} for index, row in countries.iterrows(): country_id_name_dict[row['countryId']] = row['countryName'] def get_new_daily_record_confirmed(queryType): return_countries = [] country_ids = list(country_id_name_dict.keys()) for country in country_ids: highest_confirmed = ds.get_data(datetime.date(2020, 1, 21), countryId=[country])[queryType].max() today_confirmed = ds.get_data(today, countryId=[country])[[queryType]] if highest_confirmed == today_confirmed[queryType].values[0]:
def get_time_series(self): if self._time_series: return self._time_series if self._units_calculator is None: assert 'Undefined units calculator.' # get holidays & dates holidays = pd.DatetimeIndex([]) for holiday in self._holidays: holidays = holidays.union( Dataset(Dataset.GS.HOLIDAY).get_data(exchange=holiday, start=dt.date(1952, 1, 1), end=dt.date(2052, 12, 31)).index) cal_dates = pd.bdate_range(start=dt.date(1952, 1, 1), end=dt.date(2052, 12, 31)).difference(holidays) # get price data for underliers assets = [] prices = [] servicing_cost = [] rebalance_cost = [] for row in self._asset_table: # only accept built assets not strings asset = row['asset'] assets.append(asset) if not self.excess_return: assert 'Total return basket is not yet implemented.' prices.append( asset.convert_er(row['money_market_asset']).convert_currency( self.currency).get_time_series()) if 'servicing_cost' in row: servicing_cost.append(row['servicing_cost']) else: servicing_cost.append(0.) if 'rebalance_cost' in row: rebalance_cost.append(row['rebalance_cost']) else: rebalance_cost.append(0.) prices = pd.DataFrame(prices).transpose() last_prices = prices.reindex(cal_dates).fillna(method='ffill') servicing_cost = np.array(servicing_cost) rebalance_cost = np.array(rebalance_cost) # index dates dates = cal_dates[np.logical_and(cal_dates >= self._start_date, cal_dates <= prices.index[-1])] # initialize backtest cash = 100 self._time_series = dict() units = np.zeros(len(assets)) # setup data for units calculator self._units_calculator.reset() data = { 'time_series': self._time_series, 'cal_dates': cal_dates, 'prices': prices, 'last_prices': last_prices } # compute index self._time_series[dates[0]] = cash for idx in np.arange(1, len(dates)): date = dates[idx] prices_vec = prices.loc[date].values last_prices_vec = last_prices.loc[date].values # remove servicing costs year_frac = (dates[idx] - dates[idx - 1]) / np.timedelta64( 1, 'D') / 360 mask = ~np.isnan(last_prices_vec) cash -= np.sum( np.abs(units[mask]) * last_prices_vec[mask] * servicing_cost[mask]) * year_frac # run units calculator # use 1b lag, need to generalize if idx >= 1: obs_date = dates[idx - 1] target_units = self._units_calculator.get_units(obs_date, data) else: target_units = units # generate trades for assets with price available trades = np.zeros(len(assets)) mask = ~np.any(np.isnan([prices_vec, target_units]), axis=0) trades[mask] = target_units[mask] - units[mask] # remove cash for trades, add units units[mask] += trades[mask] cash -= np.sum(trades[mask] * prices_vec[mask]) cash -= np.sum( np.abs(trades[mask]) * prices_vec[mask] * rebalance_cost[mask]) # calculate index level mask = ~np.isnan(last_prices_vec) self._time_series[date] = cash + np.sum( units[mask] * last_prices_vec[mask]) self._time_series = pd.Series(self._time_series) self._time_series.name = self.asset_name return self._time_series
access_token = access_token_dict['access_token'] # update session headers session.headers.update({'Authorization':'Bearer '+ access_token}) # test API connectivity request_url = 'https://api.marquee.gs.com/v1/users/self' request = session.get(url=request_url) GsSession.use(Environment.PROD, client_id, client_secret, ('read_product_data',)) ################################################################## ################################################################## #Extract Data ds_who = Dataset('COVID19_COUNTRY_DAILY_WHO') data_who = ds_who.get_data(datetime.date(2020, 1, 21), countryId=["US", "GB", "IN", "BR", "NG", "NZ"]) # In[37]: ################################################################## ################################################################## #Line graph for percentages countries = set(data_who['countryName']) line_p = go.Figure() x = data_who.index
# test API connectivity request_url = 'https://api.marquee.gs.com/v1/users/self' request = session.get(url=request_url) print(request.text) ''' from datetime import date from gs_quant.data import Dataset from gs_quant.markets.securities import SecurityMaster, AssetIdentifier from gs_quant.session import GsSession client_id = '' client_secret = '' scopes = GsSession.Scopes.get_default() GsSession.use(client_id=client_id, client_secret=client_secret, scopes=scopes) ds = Dataset('USCANFPP_MINI') print (ds) gsids = ds.get_coverage()['gsid'].values.tolist() df = ds.get_data(date(2012, 7, 2), date(2017, 6, 30), gsid=gsids[0:5]) print (df) for idx, row in df.iterrows(): marqueeAssetId = row['assetId'] asset = SecurityMaster.get_asset(marqueeAssetId, AssetIdentifier.MARQUEE_ID) df.loc[df['assetId'] == marqueeAssetId, 'assetName'] = asset.name print (df)
This product uses the FRED® API but is not endorsed or certified by the Federal Reserve Bank of St. Louis. FRED terms of use available at https://research.stlouisfed.org/docs/api/terms_of_use.html """ import pandas as pd import pytest from pandas.testing import assert_frame_equal, assert_series_equal from gs_quant.data import Dataset from gs_quant.api.fred.data import FredDataApi from unittest.mock import Mock fredAPI = FredDataApi(api_key='') fred_data = Dataset('GDP', fredAPI) GDP_data = { 'realtime_start': '2019-10-25', 'realtime_end': '2019-10-25', 'observation_start': '1600-01-01', 'observation_end': '9999-12-31', 'units': 'lin', 'output_type': 1, 'file_type':