def get_data_last(self, as_of: Optional[Union[dt.date, dt.datetime]], start: Optional[Union[dt.date, dt.datetime]] = None, fields: Optional[Iterable[str]] = None, **kwargs) -> pd.DataFrame: """ Get the last point for this DataSet, at or before as_of :param as_of: The date or time as of which to query :param start: The start of the range to query :param fields: The fields for which to query :param kwargs: Additional query parameters, e.g., city='Boston' :return: A Dataframe of values **Examples** >>> from gs_quant.data import Dataset >>> import datetime as dt >>> >>> weather = Dataset('WEATHER') >>> last = weather.get_data_last(dt.datetime.now()) """ query = self.provider.build_query(start=start, end=as_of, fields=fields, **kwargs) data = self.provider.last_data(query, self.id) return construct_dataframe_with_types(self.id, data)
def test_construct_dataframe_with_types(get_types): get_types.return_value = test_types df = construct_dataframe_with_types(str(Dataset.TR.TREOD), test_data) assert np.issubdtype(df.index.dtype, datetime64) assert df['adjustedAskPrice'].dtype == int64 assert df['adjustedBidPrice'].dtype == float64 assert df['assetId'].dtype == object # https://pbpython.com/pandas_dtypes.html python str == dtype object assert np.issubdtype(df['updateTime'].dtype, datetime64)
def get_data_series(self, field: Union[str, Fields], start: Optional[Union[dt.date, dt.datetime]] = None, end: Optional[Union[dt.date, dt.datetime]] = None, as_of: Optional[dt.datetime] = None, since: Optional[dt.datetime] = None, **kwargs) -> pd.Series: """ Get a time series of data for a field of a dataset :param field: The DataSet field to use :param start: Requested start date/datetime for data :param end: Requested end date/datetime for data :param as_of: Request data as_of :param since: Request data since :param kwargs: Extra query arguments, e.g. ticker='EDZ19' :return: A Series of the requested data, indexed by date or time, depending on the DataSet **Examples** >>> from gs_quant.data import Dataset >>> import datetime as dt >>> >>> weather = Dataset('WEATHER') >>> dew_point = weather >>>> .get_data_series('dewPoint', dt.date(2016, 1, 15), dt.date(2016, 1, 16), city=('Boston', 'Austin')) """ field_value = field if isinstance(field, str) else field.value query = self.provider.build_query(start=start, end=end, as_of=as_of, since=since, fields=(field_value, ), **kwargs) symbol_dimensions = self.provider.symbol_dimensions(self.id) if len(symbol_dimensions) != 1: raise MqValueError( 'get_data_series only valid for symbol_dimensions of length 1') symbol_dimension = symbol_dimensions[0] data = self.provider.query_data(query, self.id) df = construct_dataframe_with_types(self.id, data) gb = df.groupby(symbol_dimension) if len(gb.groups) > 1: raise MqValueError( 'Not a series for a single {}'.format(symbol_dimension)) return pd.Series(index=df.index, data=df.loc[:, field_value].values)
def get_data(self, start: Optional[Union[dt.date, dt.datetime]] = None, end: Optional[Union[dt.date, dt.datetime]] = None, as_of: Optional[dt.datetime] = None, since: Optional[dt.datetime] = None, fields: Optional[Iterable[Union[str, Fields]]] = None, asset_id_type: str = None, **kwargs) -> pd.DataFrame: """ Get data for the given range and parameters :param start: Requested start date/datetime for data :param end: Requested end date/datetime for data :param as_of: Request data as_of :param since: Request data since :param fields: DataSet fields to include :param kwargs: Extra query arguments, e.g. ticker='EDZ19' :return: A Dataframe of the requested data **Examples** >>> from gs_quant.data import Dataset >>> import datetime as dt >>> >>> weather = Dataset('WEATHER') >>> weather_data = weather.get_data(dt.date(2016, 1, 15), dt.date(2016, 1, 16), city=('Boston', 'Austin')) """ field_names = None if fields is None else list( map(lambda f: f if isinstance(f, str) else f.value, fields)) query = self.provider.build_query(start=start, end=end, as_of=as_of, since=since, fields=field_names, **kwargs) data = self.provider.query_data(query, self.id, asset_id_type=asset_id_type) return construct_dataframe_with_types(self.id, data)
def test_get_coverage(get_coverage, get_types): get_coverage.return_value = test_coverage_data get_types.return_value = {'gsid': 'string'} data = Dataset(Dataset.TR.TREOD).get_coverage() assert data.equals(construct_dataframe_with_types(str(Dataset.TR.TREOD), test_coverage_data))
def test_last_data(query_data, get_types): query_data.return_value = [test_data[-1]] get_types.return_value = test_types dataset = Dataset(Dataset.TR.TREOD) data = dataset.get_data_last(dt.date(2019, 1, 9), assetId='MA4B66MW5E27U8P32SB') assert data.equals(construct_dataframe_with_types(str(Dataset.TR.TREOD), ([test_data[-1]])))